mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
[web] Add support for syncing unicode line break properties (flutter/engine#18040)
This commit is contained in:
parent
62a91ad16c
commit
cec58e3fb0
@ -490,6 +490,7 @@ FILE: ../../../flutter/lib/web_ui/lib/src/engine/surface/surface_stats.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/surface/transform.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/test_embedding.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/font_collection.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/line_break_properties.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/line_breaker.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/measurement.dart
|
||||
FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/paragraph.dart
|
||||
|
||||
@ -108,6 +108,7 @@ part 'engine/surface/surface_stats.dart';
|
||||
part 'engine/surface/transform.dart';
|
||||
part 'engine/test_embedding.dart';
|
||||
part 'engine/text/font_collection.dart';
|
||||
part 'engine/text/line_break_properties.dart';
|
||||
part 'engine/text/line_breaker.dart';
|
||||
part 'engine/text/measurement.dart';
|
||||
part 'engine/text/paragraph.dart';
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -5,6 +5,14 @@
|
||||
// @dart = 2.6
|
||||
part of engine;
|
||||
|
||||
const int _kChar_0 = 48;
|
||||
const int _kChar_9 = 57;
|
||||
const int _kChar_A = 65;
|
||||
const int _kChar_Z = 90;
|
||||
const int _kChar_a = 97;
|
||||
const int _kChar_z = 122;
|
||||
const int _kCharBang = 33;
|
||||
|
||||
enum _ComparisonResult {
|
||||
inside,
|
||||
higher,
|
||||
@ -63,11 +71,29 @@ class UnicodeRange<P> {
|
||||
class UnicodePropertyLookup<P> {
|
||||
const UnicodePropertyLookup(this.ranges);
|
||||
|
||||
/// Creates a [UnicodePropertyLookup] from packed line break data.
|
||||
factory UnicodePropertyLookup.fromPackedData(
|
||||
String packedData,
|
||||
int singleRangesCount,
|
||||
List<P> propertyEnumValues,
|
||||
) {
|
||||
return UnicodePropertyLookup<P>(
|
||||
_unpackProperties<P>(packedData, singleRangesCount, propertyEnumValues),
|
||||
);
|
||||
}
|
||||
|
||||
final List<UnicodeRange<P>> ranges;
|
||||
|
||||
P find(int value) {
|
||||
final int index = _binarySearch(value);
|
||||
return index == -1 ? null : ranges[index].property;
|
||||
/// Take a [text] and an [index], and returns the property of the character
|
||||
/// located at that [index].
|
||||
///
|
||||
/// If the [index] is out of range, null will be returned.
|
||||
P find(String text, int index) {
|
||||
if (index < 0 || index >= text.length) {
|
||||
return null;
|
||||
}
|
||||
final int rangeIndex = _binarySearch(text.codeUnitAt(index));
|
||||
return rangeIndex == -1 ? null : ranges[rangeIndex].property;
|
||||
}
|
||||
|
||||
int _binarySearch(int value) {
|
||||
@ -90,3 +116,94 @@ class UnicodePropertyLookup<P> {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
List<UnicodeRange<P>> _unpackProperties<P>(
|
||||
String packedData,
|
||||
int singleRangesCount,
|
||||
List<P> propertyEnumValues,
|
||||
) {
|
||||
// Packed data is mostly structured in chunks of 9 characters each:
|
||||
//
|
||||
// * [0..3]: Range start, encoded as a base36 integer.
|
||||
// * [4..7]: Range end, encoded as a base36 integer.
|
||||
// * [8]: Index of the property enum value, encoded as a single letter.
|
||||
//
|
||||
// When the range is a single number (i.e. range start == range end), it gets
|
||||
// packed more efficiently in a chunk of 6 characters:
|
||||
//
|
||||
// * [0..3]: Range start (and range end), encoded as a base 36 integer.
|
||||
// * [4]: "!" to indicate that there's no range end.
|
||||
// * [5]: Index of the property enum value, encoded as a single letter.
|
||||
|
||||
// `packedData.length + singleRangesCount * 3` would have been the size of the
|
||||
// packed data if the efficient packing of single-range items wasn't applied.
|
||||
assert((packedData.length + singleRangesCount * 3) % 9 == 0);
|
||||
|
||||
final List<UnicodeRange<P>> ranges = <UnicodeRange<P>>[];
|
||||
final int dataLength = packedData.length;
|
||||
int i = 0;
|
||||
while (i < dataLength) {
|
||||
final int rangeStart = _consumeInt(packedData, i);
|
||||
i += 4;
|
||||
|
||||
int rangeEnd;
|
||||
if (packedData.codeUnitAt(i) == _kCharBang) {
|
||||
rangeEnd = rangeStart;
|
||||
i++;
|
||||
} else {
|
||||
rangeEnd = _consumeInt(packedData, i);
|
||||
i += 4;
|
||||
}
|
||||
final int charCode = packedData.codeUnitAt(i);
|
||||
final P property =
|
||||
propertyEnumValues[_getEnumIndexFromPackedValue(charCode)];
|
||||
i++;
|
||||
|
||||
ranges.add(UnicodeRange<P>(rangeStart, rangeEnd, property));
|
||||
}
|
||||
return ranges;
|
||||
}
|
||||
|
||||
int _getEnumIndexFromPackedValue(int charCode) {
|
||||
// This has to stay in sync with [EnumValue.serialized] in
|
||||
// `tool/unicode_sync_script.dart`.
|
||||
|
||||
assert((charCode >= _kChar_A && charCode <= _kChar_Z) ||
|
||||
(charCode >= _kChar_a && charCode <= _kChar_z));
|
||||
|
||||
// Uppercase letters were assigned to the first 26 enum values.
|
||||
if (charCode <= _kChar_Z) {
|
||||
return charCode - _kChar_A;
|
||||
}
|
||||
// Lowercase letters were assigned to enum values above 26.
|
||||
return 26 + charCode - _kChar_a;
|
||||
}
|
||||
|
||||
int _consumeInt(String packedData, int index) {
|
||||
// The implementation is equivalent to:
|
||||
//
|
||||
// ```dart
|
||||
// return int.tryParse(packedData.substring(index, index + 4), radix: 36);
|
||||
// ```
|
||||
//
|
||||
// But using substring is slow when called too many times. This custom
|
||||
// implementation makes the unpacking 25%-45% faster than using substring.
|
||||
final int digit0 = _getIntFromCharCode(packedData.codeUnitAt(index + 3));
|
||||
final int digit1 = _getIntFromCharCode(packedData.codeUnitAt(index + 2));
|
||||
final int digit2 = _getIntFromCharCode(packedData.codeUnitAt(index + 1));
|
||||
final int digit3 = _getIntFromCharCode(packedData.codeUnitAt(index));
|
||||
return digit0 + (digit1 * 36) + (digit2 * 36 * 36) + (digit3 * 36 * 36 * 36);
|
||||
}
|
||||
|
||||
/// Does the same thing as [int.parse(str, 36)] but takes only a single
|
||||
/// character as a [charCode] integer.
|
||||
int _getIntFromCharCode(int charCode) {
|
||||
assert((charCode >= _kChar_0 && charCode <= _kChar_9) ||
|
||||
(charCode >= _kChar_a && charCode <= _kChar_z));
|
||||
|
||||
if (charCode <= _kChar_9) {
|
||||
return charCode - _kChar_0;
|
||||
}
|
||||
// "a" starts from 10 and remaining letters go up from there.
|
||||
return charCode - _kChar_a + 10;
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -65,21 +65,21 @@ abstract class WordBreaker {
|
||||
return false;
|
||||
}
|
||||
|
||||
final CharProperty immediateRight = getCharProperty(text, index);
|
||||
CharProperty immediateLeft = getCharProperty(text, index - 1);
|
||||
final WordCharProperty immediateRight = wordLookup.find(text, index);
|
||||
WordCharProperty immediateLeft = wordLookup.find(text, index - 1);
|
||||
|
||||
// Do not break within CRLF.
|
||||
// WB3: CR × LF
|
||||
if (immediateLeft == CharProperty.CR && immediateRight == CharProperty.LF)
|
||||
if (immediateLeft == WordCharProperty.CR && immediateRight == WordCharProperty.LF)
|
||||
return false;
|
||||
|
||||
// Otherwise break before and after Newlines (including CR and LF)
|
||||
// WB3a: (Newline | CR | LF) ÷
|
||||
if (_oneOf(
|
||||
immediateLeft,
|
||||
CharProperty.Newline,
|
||||
CharProperty.CR,
|
||||
CharProperty.LF,
|
||||
WordCharProperty.Newline,
|
||||
WordCharProperty.CR,
|
||||
WordCharProperty.LF,
|
||||
)) {
|
||||
return true;
|
||||
}
|
||||
@ -87,9 +87,9 @@ abstract class WordBreaker {
|
||||
// WB3b: ÷ (Newline | CR | LF)
|
||||
if (_oneOf(
|
||||
immediateRight,
|
||||
CharProperty.Newline,
|
||||
CharProperty.CR,
|
||||
CharProperty.LF,
|
||||
WordCharProperty.Newline,
|
||||
WordCharProperty.CR,
|
||||
WordCharProperty.LF,
|
||||
)) {
|
||||
return true;
|
||||
}
|
||||
@ -99,8 +99,8 @@ abstract class WordBreaker {
|
||||
|
||||
// Keep horizontal whitespace together.
|
||||
// WB3d: WSegSpace × WSegSpace
|
||||
if (immediateLeft == CharProperty.WSegSpace &&
|
||||
immediateRight == CharProperty.WSegSpace) {
|
||||
if (immediateLeft == WordCharProperty.WSegSpace &&
|
||||
immediateRight == WordCharProperty.WSegSpace) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -109,9 +109,9 @@ abstract class WordBreaker {
|
||||
// WB4: X (Extend | Format | ZWJ)* → X
|
||||
if (_oneOf(
|
||||
immediateRight,
|
||||
CharProperty.Extend,
|
||||
CharProperty.Format,
|
||||
CharProperty.ZWJ,
|
||||
WordCharProperty.Extend,
|
||||
WordCharProperty.Format,
|
||||
WordCharProperty.ZWJ,
|
||||
)) {
|
||||
// The Extend|Format|ZWJ character is to the right, so it is attached
|
||||
// to a character to the left, don't split here
|
||||
@ -122,16 +122,16 @@ abstract class WordBreaker {
|
||||
int l = 0;
|
||||
while (_oneOf(
|
||||
immediateLeft,
|
||||
CharProperty.Extend,
|
||||
CharProperty.Format,
|
||||
CharProperty.ZWJ,
|
||||
WordCharProperty.Extend,
|
||||
WordCharProperty.Format,
|
||||
WordCharProperty.ZWJ,
|
||||
)) {
|
||||
l++;
|
||||
if (index - l - 1 < 0) {
|
||||
// Reached the beginning of text.
|
||||
return true;
|
||||
}
|
||||
immediateLeft = getCharProperty(text, index - l - 1);
|
||||
immediateLeft = wordLookup.find(text, index - l - 1);
|
||||
}
|
||||
|
||||
// Do not break between most letters.
|
||||
@ -145,27 +145,27 @@ abstract class WordBreaker {
|
||||
|
||||
// Skip all Format, Extend and ZWJ to the right.
|
||||
int r = 0;
|
||||
CharProperty nextRight;
|
||||
WordCharProperty nextRight;
|
||||
do {
|
||||
r++;
|
||||
nextRight = getCharProperty(text, index + r);
|
||||
nextRight = wordLookup.find(text, index + r);
|
||||
} while (_oneOf(
|
||||
nextRight,
|
||||
CharProperty.Extend,
|
||||
CharProperty.Format,
|
||||
CharProperty.ZWJ,
|
||||
WordCharProperty.Extend,
|
||||
WordCharProperty.Format,
|
||||
WordCharProperty.ZWJ,
|
||||
));
|
||||
|
||||
// Skip all Format, Extend and ZWJ to the left.
|
||||
CharProperty nextLeft;
|
||||
WordCharProperty nextLeft;
|
||||
do {
|
||||
l++;
|
||||
nextLeft = getCharProperty(text, index - l - 1);
|
||||
nextLeft = wordLookup.find(text, index - l - 1);
|
||||
} while (_oneOf(
|
||||
nextLeft,
|
||||
CharProperty.Extend,
|
||||
CharProperty.Format,
|
||||
CharProperty.ZWJ,
|
||||
WordCharProperty.Extend,
|
||||
WordCharProperty.Format,
|
||||
WordCharProperty.ZWJ,
|
||||
));
|
||||
|
||||
// Do not break letters across certain punctuation.
|
||||
@ -173,9 +173,9 @@ abstract class WordBreaker {
|
||||
if (_isAHLetter(immediateLeft) &&
|
||||
_oneOf(
|
||||
immediateRight,
|
||||
CharProperty.MidLetter,
|
||||
CharProperty.MidNumLet,
|
||||
CharProperty.SingleQuote,
|
||||
WordCharProperty.MidLetter,
|
||||
WordCharProperty.MidNumLet,
|
||||
WordCharProperty.SingleQuote,
|
||||
) &&
|
||||
_isAHLetter(nextRight)) {
|
||||
return false;
|
||||
@ -185,79 +185,79 @@ abstract class WordBreaker {
|
||||
if (_isAHLetter(nextLeft) &&
|
||||
_oneOf(
|
||||
immediateLeft,
|
||||
CharProperty.MidLetter,
|
||||
CharProperty.MidNumLet,
|
||||
CharProperty.SingleQuote,
|
||||
WordCharProperty.MidLetter,
|
||||
WordCharProperty.MidNumLet,
|
||||
WordCharProperty.SingleQuote,
|
||||
) &&
|
||||
_isAHLetter(immediateRight)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB7a: Hebrew_Letter × Single_Quote
|
||||
if (immediateLeft == CharProperty.HebrewLetter &&
|
||||
immediateRight == CharProperty.SingleQuote) {
|
||||
if (immediateLeft == WordCharProperty.HebrewLetter &&
|
||||
immediateRight == WordCharProperty.SingleQuote) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter
|
||||
if (immediateLeft == CharProperty.HebrewLetter &&
|
||||
immediateRight == CharProperty.DoubleQuote &&
|
||||
nextRight == CharProperty.HebrewLetter) {
|
||||
if (immediateLeft == WordCharProperty.HebrewLetter &&
|
||||
immediateRight == WordCharProperty.DoubleQuote &&
|
||||
nextRight == WordCharProperty.HebrewLetter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter
|
||||
if (nextLeft == CharProperty.HebrewLetter &&
|
||||
immediateLeft == CharProperty.DoubleQuote &&
|
||||
immediateRight == CharProperty.HebrewLetter) {
|
||||
if (nextLeft == WordCharProperty.HebrewLetter &&
|
||||
immediateLeft == WordCharProperty.DoubleQuote &&
|
||||
immediateRight == WordCharProperty.HebrewLetter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not break within sequences of digits, or digits adjacent to letters
|
||||
// (“3a”, or “A3”).
|
||||
// WB8: Numeric × Numeric
|
||||
if (immediateLeft == CharProperty.Numeric &&
|
||||
immediateRight == CharProperty.Numeric) {
|
||||
if (immediateLeft == WordCharProperty.Numeric &&
|
||||
immediateRight == WordCharProperty.Numeric) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB9: AHLetter × Numeric
|
||||
if (_isAHLetter(immediateLeft) && immediateRight == CharProperty.Numeric)
|
||||
if (_isAHLetter(immediateLeft) && immediateRight == WordCharProperty.Numeric)
|
||||
return false;
|
||||
|
||||
// WB10: Numeric × AHLetter
|
||||
if (immediateLeft == CharProperty.Numeric && _isAHLetter(immediateRight))
|
||||
if (immediateLeft == WordCharProperty.Numeric && _isAHLetter(immediateRight))
|
||||
return false;
|
||||
|
||||
// Do not break within sequences, such as “3.2” or “3,456.789”.
|
||||
// WB11: Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||
if (nextLeft == CharProperty.Numeric &&
|
||||
if (nextLeft == WordCharProperty.Numeric &&
|
||||
_oneOf(
|
||||
immediateLeft,
|
||||
CharProperty.MidNum,
|
||||
CharProperty.MidNumLet,
|
||||
CharProperty.SingleQuote,
|
||||
WordCharProperty.MidNum,
|
||||
WordCharProperty.MidNumLet,
|
||||
WordCharProperty.SingleQuote,
|
||||
) &&
|
||||
immediateRight == CharProperty.Numeric) {
|
||||
immediateRight == WordCharProperty.Numeric) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB12: Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
|
||||
if (immediateLeft == CharProperty.Numeric &&
|
||||
if (immediateLeft == WordCharProperty.Numeric &&
|
||||
_oneOf(
|
||||
immediateRight,
|
||||
CharProperty.MidNum,
|
||||
CharProperty.MidNumLet,
|
||||
CharProperty.SingleQuote,
|
||||
WordCharProperty.MidNum,
|
||||
WordCharProperty.MidNumLet,
|
||||
WordCharProperty.SingleQuote,
|
||||
) &&
|
||||
nextRight == CharProperty.Numeric) {
|
||||
nextRight == WordCharProperty.Numeric) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not break between Katakana.
|
||||
// WB13: Katakana × Katakana
|
||||
if (immediateLeft == CharProperty.Katakana &&
|
||||
immediateRight == CharProperty.Katakana) {
|
||||
if (immediateLeft == WordCharProperty.Katakana &&
|
||||
immediateRight == WordCharProperty.Katakana) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -265,24 +265,24 @@ abstract class WordBreaker {
|
||||
// WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
if (_oneOf(
|
||||
immediateLeft,
|
||||
CharProperty.ALetter,
|
||||
CharProperty.HebrewLetter,
|
||||
CharProperty.Numeric,
|
||||
CharProperty.Katakana,
|
||||
CharProperty.ExtendNumLet,
|
||||
WordCharProperty.ALetter,
|
||||
WordCharProperty.HebrewLetter,
|
||||
WordCharProperty.Numeric,
|
||||
WordCharProperty.Katakana,
|
||||
WordCharProperty.ExtendNumLet,
|
||||
) &&
|
||||
immediateRight == CharProperty.ExtendNumLet) {
|
||||
immediateRight == WordCharProperty.ExtendNumLet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// WB13b: ExtendNumLet × (AHLetter | Numeric | Katakana)
|
||||
if (immediateLeft == CharProperty.ExtendNumLet &&
|
||||
if (immediateLeft == WordCharProperty.ExtendNumLet &&
|
||||
_oneOf(
|
||||
immediateRight,
|
||||
CharProperty.ALetter,
|
||||
CharProperty.HebrewLetter,
|
||||
CharProperty.Numeric,
|
||||
CharProperty.Katakana,
|
||||
WordCharProperty.ALetter,
|
||||
WordCharProperty.HebrewLetter,
|
||||
WordCharProperty.Numeric,
|
||||
WordCharProperty.Katakana,
|
||||
)) {
|
||||
return false;
|
||||
}
|
||||
@ -306,12 +306,12 @@ abstract class WordBreaker {
|
||||
}
|
||||
|
||||
static bool _oneOf(
|
||||
CharProperty value,
|
||||
CharProperty choice1,
|
||||
CharProperty choice2, [
|
||||
CharProperty choice3,
|
||||
CharProperty choice4,
|
||||
CharProperty choice5,
|
||||
WordCharProperty value,
|
||||
WordCharProperty choice1,
|
||||
WordCharProperty choice2, [
|
||||
WordCharProperty choice3,
|
||||
WordCharProperty choice4,
|
||||
WordCharProperty choice5,
|
||||
]) {
|
||||
if (value == choice1) {
|
||||
return true;
|
||||
@ -331,7 +331,7 @@ abstract class WordBreaker {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool _isAHLetter(CharProperty property) {
|
||||
return _oneOf(property, CharProperty.ALetter, CharProperty.HebrewLetter);
|
||||
static bool _isAHLetter(WordCharProperty property) {
|
||||
return _oneOf(property, WordCharProperty.ALetter, WordCharProperty.HebrewLetter);
|
||||
}
|
||||
}
|
||||
|
||||
@ -8,6 +8,19 @@ part of ui;
|
||||
/// Initializes the platform.
|
||||
Future<void> webOnlyInitializePlatform({
|
||||
engine.AssetManager assetManager,
|
||||
}) {
|
||||
final Future<void> initializationFuture = _initializePlatform(assetManager: assetManager);
|
||||
scheduleMicrotask(() {
|
||||
// Access [engine.lineLookup] to force the lazy unpacking of line break data
|
||||
// now. Removing this line won't break anything. It's just an optimization
|
||||
// to make the unpacking happen while we are waiting for network requests.
|
||||
engine.lineLookup;
|
||||
});
|
||||
return initializationFuture;
|
||||
}
|
||||
|
||||
Future<void> _initializePlatform({
|
||||
engine.AssetManager assetManager,
|
||||
}) async {
|
||||
if (!debugEmulateFlutterTesterEnvironment) {
|
||||
engine.window.locationStrategy = const engine.HashLocationStrategy();
|
||||
|
||||
@ -2,76 +2,139 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// TODO(mdebbar): To reduce the size of generated code, we could pack the data
|
||||
// into a smaller format, e.g:
|
||||
//
|
||||
// ```dart
|
||||
// const _rawData = [
|
||||
// 0x000A, 0x000A, 1,
|
||||
// 0x000B, 0x000C, 2,
|
||||
// 0x000D, 0x000D, 3,
|
||||
// 0x0020, 0x0020, 4,
|
||||
// // ...
|
||||
// ];
|
||||
// ```
|
||||
//
|
||||
// Then we could lazily build the lookup instance on demand.
|
||||
// @dart = 2.6
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:args/args.dart';
|
||||
import 'package:path/path.dart' as path;
|
||||
|
||||
const int _kChar_A = 65;
|
||||
const int _kChar_a = 97;
|
||||
|
||||
final ArgParser argParser = ArgParser()
|
||||
..addOption(
|
||||
'words',
|
||||
abbr: 'w',
|
||||
help: 'Sync the word break properties.',
|
||||
)
|
||||
..addOption(
|
||||
'lines',
|
||||
abbr: 'l',
|
||||
help: 'Sync the line break properties.',
|
||||
)
|
||||
..addFlag(
|
||||
'dry',
|
||||
abbr: 'd',
|
||||
help: 'Dry mode does not write anything to disk. '
|
||||
'The output is printed to the console.',
|
||||
);
|
||||
|
||||
/// A tuple that holds a [start] and [end] of a unicode range and a [property].
|
||||
class PropertyTuple {
|
||||
const PropertyTuple(this.start, this.end, this.property);
|
||||
class UnicodeRange {
|
||||
const UnicodeRange(this.start, this.end, this.property);
|
||||
|
||||
final int start;
|
||||
final int end;
|
||||
final String property;
|
||||
final EnumValue property;
|
||||
|
||||
/// Checks if there's an overlap between this tuple's range and [other]'s
|
||||
/// range.
|
||||
bool isOverlapping(PropertyTuple other) {
|
||||
/// Checks if there's an overlap between this range and the [other] range.
|
||||
bool isOverlapping(UnicodeRange other) {
|
||||
return start <= other.end && end >= other.start;
|
||||
}
|
||||
|
||||
/// Checks if the [other] tuple is adjacent to this tuple.
|
||||
/// Checks if the [other] range is adjacent to this range.
|
||||
///
|
||||
/// Two tuples are considered adjacent if:
|
||||
/// - The new tuple's range immediately follows this tuple's range, and
|
||||
/// - The new tuple has the same property as this tuple.
|
||||
bool isAdjacent(PropertyTuple other) {
|
||||
/// Two ranges are considered adjacent if:
|
||||
/// - The new range immediately follows this range, and
|
||||
/// - The new range has the same property as this range.
|
||||
bool isAdjacent(UnicodeRange other) {
|
||||
return other.start == end + 1 && property == other.property;
|
||||
}
|
||||
|
||||
/// Merges the ranges of the 2 [PropertyTuples] if they are adjacent.
|
||||
PropertyTuple extendRange(PropertyTuple extension) {
|
||||
/// Merges the ranges of the 2 [UnicodeRange]s if they are adjacent.
|
||||
UnicodeRange extendRange(UnicodeRange extension) {
|
||||
assert(isAdjacent(extension));
|
||||
return PropertyTuple(start, extension.end, property);
|
||||
return UnicodeRange(start, extension.end, property);
|
||||
}
|
||||
}
|
||||
|
||||
/// Usage (from the root of the project):
|
||||
final String codegenPath = path.join(
|
||||
path.dirname(Platform.script.toFilePath()),
|
||||
'../lib/src/engine/text',
|
||||
);
|
||||
final String wordBreakCodegen =
|
||||
path.join(codegenPath, 'word_break_properties.dart');
|
||||
final String lineBreakCodegen =
|
||||
path.join(codegenPath, 'line_break_properties.dart');
|
||||
|
||||
/// Usage (from the root of the web_ui project).
|
||||
///
|
||||
/// To generate code for word break properties:
|
||||
/// ```
|
||||
/// dart tool/unicode_sync_script.dart <path/to/word/break/properties>
|
||||
/// dart tool/unicode_sync_script.dart -w <path/to/word/break/properties>
|
||||
/// ```
|
||||
///
|
||||
/// This script parses the unicode word break properties(1) and generates Dart
|
||||
/// To generate code for line break properties:
|
||||
/// ```
|
||||
/// dart tool/unicode_sync_script.dart -l <path/to/line/break/properties>
|
||||
/// ```
|
||||
///
|
||||
/// To do a dry run, add the `-d` flag:
|
||||
///
|
||||
/// ```
|
||||
/// dart tool/unicode_sync_script.dart -d ...
|
||||
/// ```
|
||||
///
|
||||
/// This script parses the unicode word/line break properties(1) and generates Dart
|
||||
/// code(2) that can perform lookups in the unicode ranges to find what property
|
||||
/// a letter has.
|
||||
///
|
||||
/// (1) The properties file can be downloaded from:
|
||||
/// https://www.unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty.txt
|
||||
/// (1) The word break properties file can be downloaded from:
|
||||
/// https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt
|
||||
///
|
||||
/// (2) The codegen'd Dart file is located at:
|
||||
/// lib/src/text/word_break_properties.dart
|
||||
/// The line break properties file can be downloaded from:
|
||||
/// https://www.unicode.org/Public/13.0.0/ucd/LineBreak.txt
|
||||
///
|
||||
/// (2) The codegen'd Dart files is located at:
|
||||
/// lib/src/engine/text/word_break_properties.dart
|
||||
/// lib/src/engine/text/line_break_properties.dart
|
||||
void main(List<String> arguments) async {
|
||||
final String propertiesFile = arguments[0];
|
||||
final String codegenFile = path.join(
|
||||
path.dirname(Platform.script.toFilePath()),
|
||||
'../lib/src/engine/text/word_break_properties.dart',
|
||||
final ArgResults result = argParser.parse(arguments);
|
||||
final PropertiesSyncer syncer = getSyncer(
|
||||
result['words'],
|
||||
result['lines'],
|
||||
result['dry'],
|
||||
);
|
||||
WordBreakPropertiesSyncer(propertiesFile, codegenFile).perform();
|
||||
|
||||
syncer.perform();
|
||||
}
|
||||
|
||||
PropertiesSyncer getSyncer(
|
||||
String wordBreakProperties,
|
||||
String lineBreakProperties,
|
||||
bool dry,
|
||||
) {
|
||||
if (wordBreakProperties == null && lineBreakProperties == null) {
|
||||
print(
|
||||
'Expecting either a word break properties file or a line break properties file. None was given.\n');
|
||||
print(argParser.usage);
|
||||
exit(64);
|
||||
}
|
||||
if (wordBreakProperties != null && lineBreakProperties != null) {
|
||||
print(
|
||||
'Expecting either a word break properties file or a line break properties file. Both were given.\n');
|
||||
print(argParser.usage);
|
||||
exit(64);
|
||||
}
|
||||
if (wordBreakProperties != null) {
|
||||
return dry
|
||||
? WordBreakPropertiesSyncer.dry(wordBreakProperties)
|
||||
: WordBreakPropertiesSyncer(wordBreakProperties, '$wordBreakCodegen');
|
||||
} else {
|
||||
return dry
|
||||
? LineBreakPropertiesSyncer.dry(lineBreakProperties)
|
||||
: LineBreakPropertiesSyncer(lineBreakProperties, '$lineBreakCodegen');
|
||||
}
|
||||
}
|
||||
|
||||
/// Base class that provides common logic for syncing all kinds of unicode
|
||||
@ -80,29 +143,34 @@ void main(List<String> arguments) async {
|
||||
/// Subclasses implement the [template] method which receives as argument the
|
||||
/// list of data parsed by [processLines].
|
||||
abstract class PropertiesSyncer {
|
||||
PropertiesSyncer(this._src, this._dest);
|
||||
PropertiesSyncer(this._src, this._dest) : _dryRun = false;
|
||||
PropertiesSyncer.dry(this._src)
|
||||
: _dest = null,
|
||||
_dryRun = true;
|
||||
|
||||
final String _src;
|
||||
final String _dest;
|
||||
final bool _dryRun;
|
||||
|
||||
String get prefix;
|
||||
String get enumDocLink;
|
||||
|
||||
void perform() async {
|
||||
final List<String> lines = await File(_src).readAsLines();
|
||||
final List<String> header = extractHeader(lines);
|
||||
final List<PropertyTuple> data = processLines(lines);
|
||||
final PropertyCollection data = PropertyCollection.fromLines(lines);
|
||||
|
||||
final IOSink sink = File(_dest).openWrite();
|
||||
sink.write(template(header, data));
|
||||
final String output = template(header, data);
|
||||
|
||||
if (_dryRun) {
|
||||
print(output);
|
||||
} else {
|
||||
final IOSink sink = File(_dest).openWrite();
|
||||
sink.write(output);
|
||||
}
|
||||
}
|
||||
|
||||
String template(List<String> header, List<PropertyTuple> data);
|
||||
}
|
||||
|
||||
/// Syncs Unicode's word break properties.
|
||||
class WordBreakPropertiesSyncer extends PropertiesSyncer {
|
||||
WordBreakPropertiesSyncer(String src, String dest) : super(src, dest);
|
||||
|
||||
@override
|
||||
String template(List<String> header, List<PropertyTuple> data) {
|
||||
String template(List<String> header, PropertyCollection data) {
|
||||
return '''
|
||||
// Copyright 2013 The Flutter Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
@ -117,64 +185,198 @@ class WordBreakPropertiesSyncer extends PropertiesSyncer {
|
||||
// @dart = 2.6
|
||||
part of engine;
|
||||
|
||||
CharProperty getCharProperty(String text, int index) {
|
||||
if (index < 0 || index >= text.length) {
|
||||
return null;
|
||||
}
|
||||
return lookup.find(text.codeUnitAt(index));
|
||||
/// For an explanation of these enum values, see:
|
||||
///
|
||||
/// * ${enumDocLink}
|
||||
enum ${prefix}CharProperty {
|
||||
${_getEnumValues(data.enumCollection).join('\n ')}
|
||||
}
|
||||
|
||||
enum CharProperty {
|
||||
${getEnumValues(data).join(',\n ')}
|
||||
}
|
||||
const String _packed${prefix}BreakProperties =
|
||||
'${_packProperties(data)}';
|
||||
|
||||
const UnicodePropertyLookup<CharProperty> lookup =
|
||||
UnicodePropertyLookup<CharProperty>(<UnicodeRange<CharProperty>>[
|
||||
${getLookupEntries(data).join(',\n ')}
|
||||
]);
|
||||
|
||||
UnicodePropertyLookup<${prefix}CharProperty> ${prefix.toLowerCase()}Lookup =
|
||||
UnicodePropertyLookup<${prefix}CharProperty>.fromPackedData(
|
||||
_packed${prefix}BreakProperties,
|
||||
${_getSingleRangesCount(data)},
|
||||
${prefix}CharProperty.values,
|
||||
);
|
||||
''';
|
||||
}
|
||||
|
||||
Iterable<String> getEnumValues(List<PropertyTuple> data) {
|
||||
return Set<String>.from(
|
||||
data.map<String>((PropertyTuple tuple) => tuple.property))
|
||||
.map(normalizePropertyName);
|
||||
}
|
||||
|
||||
Iterable<String> getLookupEntries(List<PropertyTuple> data) {
|
||||
data.sort(
|
||||
// Ranges don't overlap so it's safe to sort based on the start of each
|
||||
// range.
|
||||
(PropertyTuple tuple1, PropertyTuple tuple2) =>
|
||||
tuple1.start.compareTo(tuple2.start),
|
||||
Iterable<String> _getEnumValues(EnumCollection enumCollection) {
|
||||
return enumCollection.values.map(
|
||||
(EnumValue value) =>
|
||||
'${value.enumName}, // serialized as "${value.serialized}"',
|
||||
);
|
||||
verifyNoOverlappingRanges(data);
|
||||
return combineAdjacentRanges(data)
|
||||
.map((PropertyTuple tuple) => generateLookupEntry(tuple));
|
||||
}
|
||||
|
||||
String generateLookupEntry(PropertyTuple tuple) {
|
||||
final String propertyStr =
|
||||
'CharProperty.${normalizePropertyName(tuple.property)}';
|
||||
return 'UnicodeRange<CharProperty>(${toHex(tuple.start)}, ${toHex(tuple.end)}, $propertyStr)';
|
||||
int _getSingleRangesCount(PropertyCollection data) {
|
||||
int count = 0;
|
||||
for (final UnicodeRange range in data.ranges) {
|
||||
if (range.start == range.end) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
String _packProperties(PropertyCollection data) {
|
||||
final StringBuffer buffer = StringBuffer();
|
||||
for (final UnicodeRange range in data.ranges) {
|
||||
buffer.write(range.start.toRadixString(36).padLeft(4, '0'));
|
||||
if (range.start == range.end) {
|
||||
buffer.write('!');
|
||||
} else {
|
||||
buffer.write(range.end.toRadixString(36).padLeft(4, '0'));
|
||||
}
|
||||
buffer.write(range.property.serialized);
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/// Syncs Unicode's word break properties.
|
||||
class WordBreakPropertiesSyncer extends PropertiesSyncer {
|
||||
WordBreakPropertiesSyncer(String src, String dest) : super(src, dest);
|
||||
WordBreakPropertiesSyncer.dry(String src) : super.dry(src);
|
||||
|
||||
final String prefix = 'Word';
|
||||
final String enumDocLink =
|
||||
'http://unicode.org/reports/tr29/#Table_Word_Break_Property_Values';
|
||||
}
|
||||
|
||||
/// Syncs Unicode's line break properties.
|
||||
class LineBreakPropertiesSyncer extends PropertiesSyncer {
|
||||
LineBreakPropertiesSyncer(String src, String dest) : super(src, dest);
|
||||
LineBreakPropertiesSyncer.dry(String src) : super.dry(src);
|
||||
|
||||
final String prefix = 'Line';
|
||||
final String enumDocLink =
|
||||
'https://unicode.org/reports/tr14/#DescriptionOfProperties';
|
||||
}
|
||||
|
||||
/// Holds the collection of properties parsed from the unicode spec file.
|
||||
class PropertyCollection {
|
||||
PropertyCollection.fromLines(List<String> lines) {
|
||||
final List<UnicodeRange> unprocessedRanges = lines
|
||||
.map(removeCommentFromLine)
|
||||
.where((String line) => line.isNotEmpty)
|
||||
.map(parseLineIntoUnicodeRange)
|
||||
.toList();
|
||||
ranges = processRanges(unprocessedRanges);
|
||||
}
|
||||
|
||||
List<UnicodeRange> ranges;
|
||||
|
||||
final EnumCollection enumCollection = EnumCollection();
|
||||
|
||||
/// Examples:
|
||||
///
|
||||
/// 00C0..00D6 ; ALetter
|
||||
/// 037F ; ALetter
|
||||
///
|
||||
/// Would be parsed into:
|
||||
///
|
||||
/// ```dart
|
||||
/// UnicodeRange(192, 214, EnumValue('ALetter'));
|
||||
/// UnicodeRange(895, 895, EnumValue('ALetter'));
|
||||
/// ```
|
||||
UnicodeRange parseLineIntoUnicodeRange(String line) {
|
||||
final List<String> split = line.split(';');
|
||||
final String rangeStr = split[0].trim();
|
||||
final String propertyStr = split[1].trim();
|
||||
|
||||
return UnicodeRange(
|
||||
getRangeStart(rangeStr),
|
||||
getRangeEnd(rangeStr),
|
||||
enumCollection.add(propertyStr),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the collection of values of an enum.
|
||||
class EnumCollection {
|
||||
final List<EnumValue> values = <EnumValue>[];
|
||||
|
||||
EnumValue add(String name) {
|
||||
final int index =
|
||||
values.indexWhere((EnumValue value) => value.name == name);
|
||||
EnumValue value;
|
||||
if (index == -1) {
|
||||
value = EnumValue(values.length, name);
|
||||
values.add(value);
|
||||
} else {
|
||||
value = values[index];
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single value in an [EnumCollection].
|
||||
class EnumValue {
|
||||
EnumValue(this.index, this.name);
|
||||
|
||||
final int index;
|
||||
final String name;
|
||||
|
||||
/// Returns a serialized, compact format of the enum value.
|
||||
///
|
||||
/// Enum values are serialized based on their index. We start serializing them
|
||||
/// to "A", "B", "C", etc until we reach "Z". Then we continue with "a", "b",
|
||||
/// "c", etc.
|
||||
String get serialized {
|
||||
// We assign uppercase letters to the first 26 enum values.
|
||||
if (index < 26) {
|
||||
return String.fromCharCode(_kChar_A + index);
|
||||
}
|
||||
// Enum values above 26 will be assigned a lowercase letter.
|
||||
return String.fromCharCode(_kChar_a + index - 26);
|
||||
}
|
||||
|
||||
/// Returns the enum name that'll be used in the Dart code.
|
||||
///
|
||||
/// ```dart
|
||||
/// enum CharProperty {
|
||||
/// ALetter, // <-- this is the name returned by this method ("ALetter").
|
||||
/// Numeric,
|
||||
/// // etc...
|
||||
/// }
|
||||
/// ```
|
||||
String get enumName {
|
||||
return name.replaceAll('_', '');
|
||||
}
|
||||
}
|
||||
|
||||
/// Sorts ranges and combines adjacent ranges that have the same property and
|
||||
/// can be merged.
|
||||
Iterable<UnicodeRange> processRanges(List<UnicodeRange> data) {
|
||||
data.sort(
|
||||
// Ranges don't overlap so it's safe to sort based on the start of each
|
||||
// range.
|
||||
(UnicodeRange range1, UnicodeRange range2) =>
|
||||
range1.start.compareTo(range2.start),
|
||||
);
|
||||
verifyNoOverlappingRanges(data);
|
||||
return combineAdjacentRanges(data);
|
||||
}
|
||||
|
||||
/// Example:
|
||||
///
|
||||
/// ```
|
||||
/// UnicodeRange<CharProperty>(0x01C4, 0x0293, CharProperty.ALetter),
|
||||
/// UnicodeRange<CharProperty>(0x0294, 0x0294, CharProperty.ALetter),
|
||||
/// UnicodeRange<CharProperty>(0x0295, 0x02AF, CharProperty.ALetter),
|
||||
/// 0x01C4..0x0293; ALetter
|
||||
/// 0x0294..0x0294; ALetter
|
||||
/// 0x0295..0x02AF; ALetter
|
||||
/// ```
|
||||
///
|
||||
/// will get combined into:
|
||||
///
|
||||
/// ```
|
||||
/// UnicodeRange<CharProperty>(0x01C4, 0x02AF, CharProperty.ALetter)
|
||||
/// 0x01C4..0x02AF; ALetter
|
||||
/// ```
|
||||
List<PropertyTuple> combineAdjacentRanges(List<PropertyTuple> data) {
|
||||
final List<PropertyTuple> result = <PropertyTuple>[data.first];
|
||||
List<UnicodeRange> combineAdjacentRanges(List<UnicodeRange> data) {
|
||||
final List<UnicodeRange> result = <UnicodeRange>[data.first];
|
||||
for (int i = 1; i < data.length; i++) {
|
||||
if (result.last.isAdjacent(data[i])) {
|
||||
result.last = result.last.extendRange(data[i]);
|
||||
@ -196,11 +398,7 @@ int getRangeEnd(String range) {
|
||||
return int.parse(range, radix: 16);
|
||||
}
|
||||
|
||||
String toHex(int value) {
|
||||
return '0x${value.toRadixString(16).padLeft(4, '0').toUpperCase()}';
|
||||
}
|
||||
|
||||
void verifyNoOverlappingRanges(List<PropertyTuple> data) {
|
||||
void verifyNoOverlappingRanges(List<UnicodeRange> data) {
|
||||
for (int i = 1; i < data.length; i++) {
|
||||
if (data[i].isOverlapping(data[i - 1])) {
|
||||
throw Exception('Data contains overlapping ranges.');
|
||||
@ -211,7 +409,7 @@ void verifyNoOverlappingRanges(List<PropertyTuple> data) {
|
||||
List<String> extractHeader(List<String> lines) {
|
||||
final List<String> headerLines = <String>[];
|
||||
for (String line in lines) {
|
||||
if (line.contains('=======')) {
|
||||
if (line.trim() == '#' || line.trim().isEmpty) {
|
||||
break;
|
||||
}
|
||||
if (line.isNotEmpty) {
|
||||
@ -221,45 +419,7 @@ List<String> extractHeader(List<String> lines) {
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
List<PropertyTuple> processLines(List<String> lines) {
|
||||
return lines
|
||||
.map(removeCommentFromLine)
|
||||
.where((String line) => line.isNotEmpty)
|
||||
.map(parseLineIntoPropertyTuple)
|
||||
.toList();
|
||||
}
|
||||
|
||||
String normalizePropertyName(String property) {
|
||||
return property.replaceAll('_', '');
|
||||
}
|
||||
|
||||
String removeCommentFromLine(String line) {
|
||||
final int poundIdx = line.indexOf('#');
|
||||
return (poundIdx == -1) ? line : line.substring(0, poundIdx);
|
||||
}
|
||||
|
||||
/// Examples:
|
||||
///
|
||||
/// 00C0..00D6 ; ALetter
|
||||
/// 037F ; ALetter
|
||||
///
|
||||
/// Would be parsed into:
|
||||
///
|
||||
/// ```dart
|
||||
/// PropertyTuple(192, 214, 'ALetter');
|
||||
/// PropertyTuple(895, 895, 'ALetter');
|
||||
/// ```
|
||||
PropertyTuple parseLineIntoPropertyTuple(String line) {
|
||||
final List<String> split = line.split(';');
|
||||
final String rangeStr = split[0].trim();
|
||||
final String propertyStr = split[1].trim();
|
||||
|
||||
final List<String> rangeSplit = rangeStr.contains('..')
|
||||
? rangeStr.split('..')
|
||||
: <String>[rangeStr, rangeStr];
|
||||
return PropertyTuple(
|
||||
int.parse(rangeSplit[0], radix: 16),
|
||||
int.parse(rangeSplit[1], radix: 16),
|
||||
propertyStr,
|
||||
);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user