[web] Add support for syncing unicode line break properties (flutter/engine#18040)

2026-02-20 02:29:02 +08:00 · 2020-05-04 20:59:41 -07:00 · 2020-05-04 20:59:41 -07:00 · cec58e3fb0
commit cec58e3fb0
parent 62a91ad16c
8 changed files with 616 additions and 1218 deletions
--- a/engine/src/flutter/ci/licenses_golden/licenses_flutter
+++ b/engine/src/flutter/ci/licenses_golden/licenses_flutter
@ -490,6 +490,7 @@ FILE: ../../../flutter/lib/web_ui/lib/src/engine/surface/surface_stats.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/surface/transform.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/test_embedding.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/font_collection.dart
+FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/line_break_properties.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/line_breaker.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/measurement.dart
 FILE: ../../../flutter/lib/web_ui/lib/src/engine/text/paragraph.dart
--- a/engine/src/flutter/lib/web_ui/lib/src/engine.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/engine.dart
@ -108,6 +108,7 @@ part 'engine/surface/surface_stats.dart';
 part 'engine/surface/transform.dart';
 part 'engine/test_embedding.dart';
 part 'engine/text/font_collection.dart';
+part 'engine/text/line_break_properties.dart';
 part 'engine/text/line_breaker.dart';
 part 'engine/text/measurement.dart';
 part 'engine/text/paragraph.dart';
--- a/engine/src/flutter/lib/web_ui/lib/src/engine/text/line_break_properties.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/engine/text/line_break_properties.dart
--- a/engine/src/flutter/lib/web_ui/lib/src/engine/text/unicode_range.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/engine/text/unicode_range.dart
@ -5,6 +5,14 @@
 // @dart = 2.6
 part of engine;

+const int _kChar_0 = 48;
+const int _kChar_9 = 57;
+const int _kChar_A = 65;
+const int _kChar_Z = 90;
+const int _kChar_a = 97;
+const int _kChar_z = 122;
+const int _kCharBang = 33;
+
 enum _ComparisonResult {
  inside,
  higher,
@ -63,11 +71,29 @@ class UnicodeRange<P> {
 class UnicodePropertyLookup<P> {
  const UnicodePropertyLookup(this.ranges);

+  /// Creates a [UnicodePropertyLookup] from packed line break data.
+  factory UnicodePropertyLookup.fromPackedData(
+    String packedData,
+    int singleRangesCount,
+    List<P> propertyEnumValues,
+  ) {
+    return UnicodePropertyLookup<P>(
+      _unpackProperties<P>(packedData, singleRangesCount, propertyEnumValues),
+    );
+  }
+
  final List<UnicodeRange<P>> ranges;

-  P find(int value) {
-    final int index = _binarySearch(value);
-    return index == -1 ? null : ranges[index].property;
+  /// Take a [text] and an [index], and returns the property of the character
+  /// located at that [index].
+  ///
+  /// If the [index] is out of range, null will be returned.
+  P find(String text, int index) {
+    if (index < 0 || index >= text.length) {
+      return null;
+    }
+    final int rangeIndex = _binarySearch(text.codeUnitAt(index));
+    return rangeIndex == -1 ? null : ranges[rangeIndex].property;
  }

  int _binarySearch(int value) {
@ -90,3 +116,94 @@ class UnicodePropertyLookup<P> {
    return -1;
  }
 }
+
+List<UnicodeRange<P>> _unpackProperties<P>(
+  String packedData,
+  int singleRangesCount,
+  List<P> propertyEnumValues,
+) {
+  // Packed data is mostly structured in chunks of 9 characters each:
+  //
+  // * [0..3]: Range start, encoded as a base36 integer.
+  // * [4..7]: Range end, encoded as a base36 integer.
+  // * [8]: Index of the property enum value, encoded as a single letter.
+  //
+  // When the range is a single number (i.e. range start == range end), it gets
+  // packed more efficiently in a chunk of 6 characters:
+  //
+  // * [0..3]: Range start (and range end), encoded as a base 36 integer.
+  // * [4]: "!" to indicate that there's no range end.
+  // * [5]: Index of the property enum value, encoded as a single letter.
+
+  // `packedData.length + singleRangesCount * 3` would have been the size of the
+  // packed data if the efficient packing of single-range items wasn't applied.
+  assert((packedData.length + singleRangesCount * 3) % 9 == 0);
+
+  final List<UnicodeRange<P>> ranges = <UnicodeRange<P>>[];
+  final int dataLength = packedData.length;
+  int i = 0;
+  while (i < dataLength) {
+    final int rangeStart = _consumeInt(packedData, i);
+    i += 4;
+
+    int rangeEnd;
+    if (packedData.codeUnitAt(i) == _kCharBang) {
+      rangeEnd = rangeStart;
+      i++;
+    } else {
+      rangeEnd = _consumeInt(packedData, i);
+      i += 4;
+    }
+    final int charCode = packedData.codeUnitAt(i);
+    final P property =
+        propertyEnumValues[_getEnumIndexFromPackedValue(charCode)];
+    i++;
+
+    ranges.add(UnicodeRange<P>(rangeStart, rangeEnd, property));
+  }
+  return ranges;
+}
+
+int _getEnumIndexFromPackedValue(int charCode) {
+  // This has to stay in sync with [EnumValue.serialized] in
+  // `tool/unicode_sync_script.dart`.
+
+  assert((charCode >= _kChar_A && charCode <= _kChar_Z) ||
+      (charCode >= _kChar_a && charCode <= _kChar_z));
+
+  // Uppercase letters were assigned to the first 26 enum values.
+  if (charCode <= _kChar_Z) {
+    return charCode - _kChar_A;
+  }
+  // Lowercase letters were assigned to enum values above 26.
+  return 26 + charCode - _kChar_a;
+}
+
+int _consumeInt(String packedData, int index) {
+  // The implementation is equivalent to:
+  //
+  // ```dart
+  // return int.tryParse(packedData.substring(index, index + 4), radix: 36);
+  // ```
+  //
+  // But using substring is slow when called too many times. This custom
+  // implementation makes the unpacking 25%-45% faster than using substring.
+  final int digit0 = _getIntFromCharCode(packedData.codeUnitAt(index + 3));
+  final int digit1 = _getIntFromCharCode(packedData.codeUnitAt(index + 2));
+  final int digit2 = _getIntFromCharCode(packedData.codeUnitAt(index + 1));
+  final int digit3 = _getIntFromCharCode(packedData.codeUnitAt(index));
+  return digit0 + (digit1 * 36) + (digit2 * 36 * 36) + (digit3 * 36 * 36 * 36);
+}
+
+/// Does the same thing as [int.parse(str, 36)] but takes only a single
+/// character as a [charCode] integer.
+int _getIntFromCharCode(int charCode) {
+  assert((charCode >= _kChar_0 && charCode <= _kChar_9) ||
+      (charCode >= _kChar_a && charCode <= _kChar_z));
+
+  if (charCode <= _kChar_9) {
+    return charCode - _kChar_0;
+  }
+  // "a" starts from 10 and remaining letters go up from there.
+  return charCode - _kChar_a + 10;
+}
--- a/engine/src/flutter/lib/web_ui/lib/src/engine/text/word_break_properties.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/engine/text/word_break_properties.dart
--- a/engine/src/flutter/lib/web_ui/lib/src/engine/text/word_breaker.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/engine/text/word_breaker.dart
@ -65,21 +65,21 @@ abstract class WordBreaker {
      return false;
    }

-    final CharProperty immediateRight = getCharProperty(text, index);
-    CharProperty immediateLeft = getCharProperty(text, index - 1);
+    final WordCharProperty immediateRight = wordLookup.find(text, index);
+    WordCharProperty immediateLeft = wordLookup.find(text, index - 1);

    // Do not break within CRLF.
    // WB3: CR × LF
-    if (immediateLeft == CharProperty.CR && immediateRight == CharProperty.LF)
+    if (immediateLeft == WordCharProperty.CR && immediateRight == WordCharProperty.LF)
      return false;

    // Otherwise break before and after Newlines (including CR and LF)
    // WB3a: (Newline | CR | LF) ÷
    if (_oneOf(
      immediateLeft,
-      CharProperty.Newline,
-      CharProperty.CR,
-      CharProperty.LF,
+      WordCharProperty.Newline,
+      WordCharProperty.CR,
+      WordCharProperty.LF,
    )) {
      return true;
    }
@ -87,9 +87,9 @@ abstract class WordBreaker {
    // WB3b: ÷ (Newline | CR | LF)
    if (_oneOf(
      immediateRight,
-      CharProperty.Newline,
-      CharProperty.CR,
-      CharProperty.LF,
+      WordCharProperty.Newline,
+      WordCharProperty.CR,
+      WordCharProperty.LF,
    )) {
      return true;
    }
@ -99,8 +99,8 @@ abstract class WordBreaker {

    // Keep horizontal whitespace together.
    // WB3d: WSegSpace × WSegSpace
-    if (immediateLeft == CharProperty.WSegSpace &&
-        immediateRight == CharProperty.WSegSpace) {
+    if (immediateLeft == WordCharProperty.WSegSpace &&
+        immediateRight == WordCharProperty.WSegSpace) {
      return false;
    }

@ -109,9 +109,9 @@ abstract class WordBreaker {
    // WB4: X (Extend | Format | ZWJ)* → X
    if (_oneOf(
      immediateRight,
-      CharProperty.Extend,
-      CharProperty.Format,
-      CharProperty.ZWJ,
+      WordCharProperty.Extend,
+      WordCharProperty.Format,
+      WordCharProperty.ZWJ,
    )) {
      // The Extend|Format|ZWJ character is to the right, so it is attached
      // to a character to the left, don't split here
@ -122,16 +122,16 @@ abstract class WordBreaker {
    int l = 0;
    while (_oneOf(
      immediateLeft,
-      CharProperty.Extend,
-      CharProperty.Format,
-      CharProperty.ZWJ,
+      WordCharProperty.Extend,
+      WordCharProperty.Format,
+      WordCharProperty.ZWJ,
    )) {
      l++;
      if (index - l - 1 < 0) {
        // Reached the beginning of text.
        return true;
      }
-      immediateLeft = getCharProperty(text, index - l - 1);
+      immediateLeft = wordLookup.find(text, index - l - 1);
    }

    // Do not break between most letters.
@ -145,27 +145,27 @@ abstract class WordBreaker {

    // Skip all Format, Extend and ZWJ to the right.
    int r = 0;
-    CharProperty nextRight;
+    WordCharProperty nextRight;
    do {
      r++;
-      nextRight = getCharProperty(text, index + r);
+      nextRight = wordLookup.find(text, index + r);
    } while (_oneOf(
      nextRight,
-      CharProperty.Extend,
-      CharProperty.Format,
-      CharProperty.ZWJ,
+      WordCharProperty.Extend,
+      WordCharProperty.Format,
+      WordCharProperty.ZWJ,
    ));

    // Skip all Format, Extend and ZWJ to the left.
-    CharProperty nextLeft;
+    WordCharProperty nextLeft;
    do {
      l++;
-      nextLeft = getCharProperty(text, index - l - 1);
+      nextLeft = wordLookup.find(text, index - l - 1);
    } while (_oneOf(
      nextLeft,
-      CharProperty.Extend,
-      CharProperty.Format,
-      CharProperty.ZWJ,
+      WordCharProperty.Extend,
+      WordCharProperty.Format,
+      WordCharProperty.ZWJ,
    ));

    // Do not break letters across certain punctuation.
@ -173,9 +173,9 @@ abstract class WordBreaker {
    if (_isAHLetter(immediateLeft) &&
        _oneOf(
          immediateRight,
-          CharProperty.MidLetter,
-          CharProperty.MidNumLet,
-          CharProperty.SingleQuote,
+          WordCharProperty.MidLetter,
+          WordCharProperty.MidNumLet,
+          WordCharProperty.SingleQuote,
        ) &&
        _isAHLetter(nextRight)) {
      return false;
@ -185,79 +185,79 @@ abstract class WordBreaker {
    if (_isAHLetter(nextLeft) &&
        _oneOf(
          immediateLeft,
-          CharProperty.MidLetter,
-          CharProperty.MidNumLet,
-          CharProperty.SingleQuote,
+          WordCharProperty.MidLetter,
+          WordCharProperty.MidNumLet,
+          WordCharProperty.SingleQuote,
        ) &&
        _isAHLetter(immediateRight)) {
      return false;
    }

    // WB7a: Hebrew_Letter × Single_Quote
-    if (immediateLeft == CharProperty.HebrewLetter &&
-        immediateRight == CharProperty.SingleQuote) {
+    if (immediateLeft == WordCharProperty.HebrewLetter &&
+        immediateRight == WordCharProperty.SingleQuote) {
      return false;
    }

    // WB7b: Hebrew_Letter × Double_Quote Hebrew_Letter
-    if (immediateLeft == CharProperty.HebrewLetter &&
-        immediateRight == CharProperty.DoubleQuote &&
-        nextRight == CharProperty.HebrewLetter) {
+    if (immediateLeft == WordCharProperty.HebrewLetter &&
+        immediateRight == WordCharProperty.DoubleQuote &&
+        nextRight == WordCharProperty.HebrewLetter) {
      return false;
    }

    // WB7c: Hebrew_Letter Double_Quote × Hebrew_Letter
-    if (nextLeft == CharProperty.HebrewLetter &&
-        immediateLeft == CharProperty.DoubleQuote &&
-        immediateRight == CharProperty.HebrewLetter) {
+    if (nextLeft == WordCharProperty.HebrewLetter &&
+        immediateLeft == WordCharProperty.DoubleQuote &&
+        immediateRight == WordCharProperty.HebrewLetter) {
      return false;
    }

    // Do not break within sequences of digits, or digits adjacent to letters
    // (“3a”, or “A3”).
    // WB8: Numeric × Numeric
-    if (immediateLeft == CharProperty.Numeric &&
-        immediateRight == CharProperty.Numeric) {
+    if (immediateLeft == WordCharProperty.Numeric &&
+        immediateRight == WordCharProperty.Numeric) {
      return false;
    }

    // WB9: AHLetter × Numeric
-    if (_isAHLetter(immediateLeft) && immediateRight == CharProperty.Numeric)
+    if (_isAHLetter(immediateLeft) && immediateRight == WordCharProperty.Numeric)
      return false;

    // WB10: Numeric × AHLetter
-    if (immediateLeft == CharProperty.Numeric && _isAHLetter(immediateRight))
+    if (immediateLeft == WordCharProperty.Numeric && _isAHLetter(immediateRight))
      return false;

    // Do not break within sequences, such as “3.2” or “3,456.789”.
    // WB11: Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
-    if (nextLeft == CharProperty.Numeric &&
+    if (nextLeft == WordCharProperty.Numeric &&
        _oneOf(
          immediateLeft,
-          CharProperty.MidNum,
-          CharProperty.MidNumLet,
-          CharProperty.SingleQuote,
+          WordCharProperty.MidNum,
+          WordCharProperty.MidNumLet,
+          WordCharProperty.SingleQuote,
        ) &&
-        immediateRight == CharProperty.Numeric) {
+        immediateRight == WordCharProperty.Numeric) {
      return false;
    }

    // WB12: Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
-    if (immediateLeft == CharProperty.Numeric &&
+    if (immediateLeft == WordCharProperty.Numeric &&
        _oneOf(
          immediateRight,
-          CharProperty.MidNum,
-          CharProperty.MidNumLet,
-          CharProperty.SingleQuote,
+          WordCharProperty.MidNum,
+          WordCharProperty.MidNumLet,
+          WordCharProperty.SingleQuote,
        ) &&
-        nextRight == CharProperty.Numeric) {
+        nextRight == WordCharProperty.Numeric) {
      return false;
    }

    // Do not break between Katakana.
    // WB13: Katakana × Katakana
-    if (immediateLeft == CharProperty.Katakana &&
-        immediateRight == CharProperty.Katakana) {
+    if (immediateLeft == WordCharProperty.Katakana &&
+        immediateRight == WordCharProperty.Katakana) {
      return false;
    }

@ -265,24 +265,24 @@ abstract class WordBreaker {
    // WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
    if (_oneOf(
          immediateLeft,
-          CharProperty.ALetter,
-          CharProperty.HebrewLetter,
-          CharProperty.Numeric,
-          CharProperty.Katakana,
-          CharProperty.ExtendNumLet,
+          WordCharProperty.ALetter,
+          WordCharProperty.HebrewLetter,
+          WordCharProperty.Numeric,
+          WordCharProperty.Katakana,
+          WordCharProperty.ExtendNumLet,
        ) &&
-        immediateRight == CharProperty.ExtendNumLet) {
+        immediateRight == WordCharProperty.ExtendNumLet) {
      return false;
    }

    // WB13b: ExtendNumLet × (AHLetter | Numeric | Katakana)
-    if (immediateLeft == CharProperty.ExtendNumLet &&
+    if (immediateLeft == WordCharProperty.ExtendNumLet &&
        _oneOf(
          immediateRight,
-          CharProperty.ALetter,
-          CharProperty.HebrewLetter,
-          CharProperty.Numeric,
-          CharProperty.Katakana,
+          WordCharProperty.ALetter,
+          WordCharProperty.HebrewLetter,
+          WordCharProperty.Numeric,
+          WordCharProperty.Katakana,
        )) {
      return false;
    }
@ -306,12 +306,12 @@ abstract class WordBreaker {
  }

  static bool _oneOf(
-    CharProperty value,
-    CharProperty choice1,
-    CharProperty choice2, [
-    CharProperty choice3,
-    CharProperty choice4,
-    CharProperty choice5,
+    WordCharProperty value,
+    WordCharProperty choice1,
+    WordCharProperty choice2, [
+    WordCharProperty choice3,
+    WordCharProperty choice4,
+    WordCharProperty choice5,
  ]) {
    if (value == choice1) {
      return true;
@ -331,7 +331,7 @@ abstract class WordBreaker {
    return false;
  }

-  static bool _isAHLetter(CharProperty property) {
-    return _oneOf(property, CharProperty.ALetter, CharProperty.HebrewLetter);
+  static bool _isAHLetter(WordCharProperty property) {
+    return _oneOf(property, WordCharProperty.ALetter, WordCharProperty.HebrewLetter);
  }
 }
--- a/engine/src/flutter/lib/web_ui/lib/src/ui/initialization.dart
+++ b/engine/src/flutter/lib/web_ui/lib/src/ui/initialization.dart
@ -8,6 +8,19 @@ part of ui;
 /// Initializes the platform.
 Future<void> webOnlyInitializePlatform({
  engine.AssetManager assetManager,
+}) {
+  final Future<void> initializationFuture = _initializePlatform(assetManager: assetManager);
+  scheduleMicrotask(() {
+    // Access [engine.lineLookup] to force the lazy unpacking of line break data
+    // now. Removing this line won't break anything. It's just an optimization
+    // to make the unpacking happen while we are waiting for network requests.
+    engine.lineLookup;
+  });
+  return initializationFuture;
+}
+
+Future<void> _initializePlatform({
+  engine.AssetManager assetManager,
 }) async {
  if (!debugEmulateFlutterTesterEnvironment) {
    engine.window.locationStrategy = const engine.HashLocationStrategy();
--- a/engine/src/flutter/lib/web_ui/tool/unicode_sync_script.dart
+++ b/engine/src/flutter/lib/web_ui/tool/unicode_sync_script.dart
@ -2,76 +2,139 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

-// TODO(mdebbar): To reduce the size of generated code, we could pack the data
-//   into a smaller format, e.g:
-//
-// ```dart
-// const _rawData = [
-//   0x000A, 0x000A, 1,
-//   0x000B, 0x000C, 2,
-//   0x000D, 0x000D, 3,
-//   0x0020, 0x0020, 4,
-//   // ...
-// ];
-// ```
-//
-// Then we could lazily build the lookup instance on demand.
 // @dart = 2.6
 import 'dart:io';
+
+import 'package:args/args.dart';
 import 'package:path/path.dart' as path;

+const int _kChar_A = 65;
+const int _kChar_a = 97;
+
+final ArgParser argParser = ArgParser()
+  ..addOption(
+    'words',
+    abbr: 'w',
+    help: 'Sync the word break properties.',
+  )
+  ..addOption(
+    'lines',
+    abbr: 'l',
+    help: 'Sync the line break properties.',
+  )
+  ..addFlag(
+    'dry',
+    abbr: 'd',
+    help: 'Dry mode does not write anything to disk. '
+        'The output is printed to the console.',
+  );
+
 /// A tuple that holds a [start] and [end] of a unicode range and a [property].
-class PropertyTuple {
-  const PropertyTuple(this.start, this.end, this.property);
+class UnicodeRange {
+  const UnicodeRange(this.start, this.end, this.property);

  final int start;
  final int end;
-  final String property;
+  final EnumValue property;

-  /// Checks if there's an overlap between this tuple's range and [other]'s
-  /// range.
-  bool isOverlapping(PropertyTuple other) {
+  /// Checks if there's an overlap between this range and the [other] range.
+  bool isOverlapping(UnicodeRange other) {
    return start <= other.end && end >= other.start;
  }

-  /// Checks if the [other] tuple is adjacent to this tuple.
+  /// Checks if the [other] range is adjacent to this range.
  ///
-  /// Two tuples are considered adjacent if:
-  /// - The new tuple's range immediately follows this tuple's range, and
-  /// - The new tuple has the same property as this tuple.
-  bool isAdjacent(PropertyTuple other) {
+  /// Two ranges are considered adjacent if:
+  /// - The new range immediately follows this range, and
+  /// - The new range has the same property as this range.
+  bool isAdjacent(UnicodeRange other) {
    return other.start == end + 1 && property == other.property;
  }

-  /// Merges the ranges of the 2 [PropertyTuples] if they are adjacent.
-  PropertyTuple extendRange(PropertyTuple extension) {
+  /// Merges the ranges of the 2 [UnicodeRange]s if they are adjacent.
+  UnicodeRange extendRange(UnicodeRange extension) {
    assert(isAdjacent(extension));
-    return PropertyTuple(start, extension.end, property);
+    return UnicodeRange(start, extension.end, property);
  }
 }

-/// Usage (from the root of the project):
+final String codegenPath = path.join(
+  path.dirname(Platform.script.toFilePath()),
+  '../lib/src/engine/text',
+);
+final String wordBreakCodegen =
+    path.join(codegenPath, 'word_break_properties.dart');
+final String lineBreakCodegen =
+    path.join(codegenPath, 'line_break_properties.dart');
+
+/// Usage (from the root of the web_ui project).
 ///
+/// To generate code for word break properties:
 /// ```
-/// dart tool/unicode_sync_script.dart <path/to/word/break/properties>
+/// dart tool/unicode_sync_script.dart -w <path/to/word/break/properties>
 /// ```
 ///
-/// This script parses the unicode word break properties(1) and generates Dart
+/// To generate code for line break properties:
+/// ```
+/// dart tool/unicode_sync_script.dart -l <path/to/line/break/properties>
+/// ```
+///
+/// To do a dry run, add the `-d` flag:
+///
+/// ```
+/// dart tool/unicode_sync_script.dart -d ...
+/// ```
+///
+/// This script parses the unicode word/line break properties(1) and generates Dart
 /// code(2) that can perform lookups in the unicode ranges to find what property
 /// a letter has.
 ///
-/// (1) The properties file can be downloaded from:
-///     https://www.unicode.org/Public/11.0.0/ucd/auxiliary/WordBreakProperty.txt
+/// (1) The word break properties file can be downloaded from:
+///     https://www.unicode.org/Public/13.0.0/ucd/auxiliary/WordBreakProperty.txt
 ///
-/// (2) The codegen'd Dart file is located at:
-///     lib/src/text/word_break_properties.dart
+///     The line break properties file can be downloaded from:
+///     https://www.unicode.org/Public/13.0.0/ucd/LineBreak.txt
+///
+/// (2) The codegen'd Dart files is located at:
+///     lib/src/engine/text/word_break_properties.dart
+///     lib/src/engine/text/line_break_properties.dart
 void main(List<String> arguments) async {
-  final String propertiesFile = arguments[0];
-  final String codegenFile = path.join(
-    path.dirname(Platform.script.toFilePath()),
-    '../lib/src/engine/text/word_break_properties.dart',
+  final ArgResults result = argParser.parse(arguments);
+  final PropertiesSyncer syncer = getSyncer(
+    result['words'],
+    result['lines'],
+    result['dry'],
  );
-  WordBreakPropertiesSyncer(propertiesFile, codegenFile).perform();
+
+  syncer.perform();
+}
+
+PropertiesSyncer getSyncer(
+  String wordBreakProperties,
+  String lineBreakProperties,
+  bool dry,
+) {
+  if (wordBreakProperties == null && lineBreakProperties == null) {
+    print(
+        'Expecting either a word break properties file or a line break properties file. None was given.\n');
+    print(argParser.usage);
+    exit(64);
+  }
+  if (wordBreakProperties != null && lineBreakProperties != null) {
+    print(
+        'Expecting either a word break properties file or a line break properties file. Both were given.\n');
+    print(argParser.usage);
+    exit(64);
+  }
+  if (wordBreakProperties != null) {
+    return dry
+        ? WordBreakPropertiesSyncer.dry(wordBreakProperties)
+        : WordBreakPropertiesSyncer(wordBreakProperties, '$wordBreakCodegen');
+  } else {
+    return dry
+        ? LineBreakPropertiesSyncer.dry(lineBreakProperties)
+        : LineBreakPropertiesSyncer(lineBreakProperties, '$lineBreakCodegen');
+  }
 }

 /// Base class that provides common logic for syncing all kinds of unicode
@ -80,29 +143,34 @@ void main(List<String> arguments) async {
 /// Subclasses implement the [template] method which receives as argument the
 /// list of data parsed by [processLines].
 abstract class PropertiesSyncer {
-  PropertiesSyncer(this._src, this._dest);
+  PropertiesSyncer(this._src, this._dest) : _dryRun = false;
+  PropertiesSyncer.dry(this._src)
+      : _dest = null,
+        _dryRun = true;

  final String _src;
  final String _dest;
+  final bool _dryRun;
+
+  String get prefix;
+  String get enumDocLink;

  void perform() async {
    final List<String> lines = await File(_src).readAsLines();
    final List<String> header = extractHeader(lines);
-    final List<PropertyTuple> data = processLines(lines);
+    final PropertyCollection data = PropertyCollection.fromLines(lines);

-    final IOSink sink = File(_dest).openWrite();
-    sink.write(template(header, data));
+    final String output = template(header, data);
+
+    if (_dryRun) {
+      print(output);
+    } else {
+      final IOSink sink = File(_dest).openWrite();
+      sink.write(output);
+    }
  }

-  String template(List<String> header, List<PropertyTuple> data);
-}
-
-/// Syncs Unicode's word break properties.
-class WordBreakPropertiesSyncer extends PropertiesSyncer {
-  WordBreakPropertiesSyncer(String src, String dest) : super(src, dest);
-
-  @override
-  String template(List<String> header, List<PropertyTuple> data) {
+  String template(List<String> header, PropertyCollection data) {
    return '''
 // Copyright 2013 The Flutter Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
@ -117,64 +185,198 @@ class WordBreakPropertiesSyncer extends PropertiesSyncer {
 // @dart = 2.6
 part of engine;

-CharProperty getCharProperty(String text, int index) {
-  if (index < 0 || index >= text.length) {
-    return null;
-  }
-  return lookup.find(text.codeUnitAt(index));
+/// For an explanation of these enum values, see:
+///
+/// * ${enumDocLink}
+enum ${prefix}CharProperty {
+  ${_getEnumValues(data.enumCollection).join('\n  ')}
 }

-enum CharProperty {
-  ${getEnumValues(data).join(',\n  ')}
-}
+const String _packed${prefix}BreakProperties =
+  '${_packProperties(data)}';

-const UnicodePropertyLookup<CharProperty> lookup =
-    UnicodePropertyLookup<CharProperty>(<UnicodeRange<CharProperty>>[
-  ${getLookupEntries(data).join(',\n  ')}
-]);
+
+UnicodePropertyLookup<${prefix}CharProperty> ${prefix.toLowerCase()}Lookup =
+    UnicodePropertyLookup<${prefix}CharProperty>.fromPackedData(
+  _packed${prefix}BreakProperties,
+  ${_getSingleRangesCount(data)},
+  ${prefix}CharProperty.values,
+);
 ''';
  }

-  Iterable<String> getEnumValues(List<PropertyTuple> data) {
-    return Set<String>.from(
-            data.map<String>((PropertyTuple tuple) => tuple.property))
-        .map(normalizePropertyName);
-  }
-
-  Iterable<String> getLookupEntries(List<PropertyTuple> data) {
-    data.sort(
-      // Ranges don't overlap so it's safe to sort based on the start of each
-      // range.
-      (PropertyTuple tuple1, PropertyTuple tuple2) =>
-          tuple1.start.compareTo(tuple2.start),
+  Iterable<String> _getEnumValues(EnumCollection enumCollection) {
+    return enumCollection.values.map(
+      (EnumValue value) =>
+          '${value.enumName}, // serialized as "${value.serialized}"',
    );
-    verifyNoOverlappingRanges(data);
-    return combineAdjacentRanges(data)
-        .map((PropertyTuple tuple) => generateLookupEntry(tuple));
  }

-  String generateLookupEntry(PropertyTuple tuple) {
-    final String propertyStr =
-        'CharProperty.${normalizePropertyName(tuple.property)}';
-    return 'UnicodeRange<CharProperty>(${toHex(tuple.start)}, ${toHex(tuple.end)}, $propertyStr)';
+  int _getSingleRangesCount(PropertyCollection data) {
+    int count = 0;
+    for (final UnicodeRange range in data.ranges) {
+      if (range.start == range.end) {
+        count++;
+      }
+    }
+    return count;
  }
+
+  String _packProperties(PropertyCollection data) {
+    final StringBuffer buffer = StringBuffer();
+    for (final UnicodeRange range in data.ranges) {
+      buffer.write(range.start.toRadixString(36).padLeft(4, '0'));
+      if (range.start == range.end) {
+        buffer.write('!');
+      } else {
+        buffer.write(range.end.toRadixString(36).padLeft(4, '0'));
+      }
+      buffer.write(range.property.serialized);
+    }
+    return buffer.toString();
+  }
+}
+
+/// Syncs Unicode's word break properties.
+class WordBreakPropertiesSyncer extends PropertiesSyncer {
+  WordBreakPropertiesSyncer(String src, String dest) : super(src, dest);
+  WordBreakPropertiesSyncer.dry(String src) : super.dry(src);
+
+  final String prefix = 'Word';
+  final String enumDocLink =
+      'http://unicode.org/reports/tr29/#Table_Word_Break_Property_Values';
+}
+
+/// Syncs Unicode's line break properties.
+class LineBreakPropertiesSyncer extends PropertiesSyncer {
+  LineBreakPropertiesSyncer(String src, String dest) : super(src, dest);
+  LineBreakPropertiesSyncer.dry(String src) : super.dry(src);
+
+  final String prefix = 'Line';
+  final String enumDocLink =
+      'https://unicode.org/reports/tr14/#DescriptionOfProperties';
+}
+
+/// Holds the collection of properties parsed from the unicode spec file.
+class PropertyCollection {
+  PropertyCollection.fromLines(List<String> lines) {
+    final List<UnicodeRange> unprocessedRanges = lines
+        .map(removeCommentFromLine)
+        .where((String line) => line.isNotEmpty)
+        .map(parseLineIntoUnicodeRange)
+        .toList();
+    ranges = processRanges(unprocessedRanges);
+  }
+
+  List<UnicodeRange> ranges;
+
+  final EnumCollection enumCollection = EnumCollection();
+
+  /// Examples:
+  ///
+  /// 00C0..00D6    ; ALetter
+  /// 037F          ; ALetter
+  ///
+  /// Would be parsed into:
+  ///
+  /// ```dart
+  /// UnicodeRange(192, 214, EnumValue('ALetter'));
+  /// UnicodeRange(895, 895, EnumValue('ALetter'));
+  /// ```
+  UnicodeRange parseLineIntoUnicodeRange(String line) {
+    final List<String> split = line.split(';');
+    final String rangeStr = split[0].trim();
+    final String propertyStr = split[1].trim();
+
+    return UnicodeRange(
+      getRangeStart(rangeStr),
+      getRangeEnd(rangeStr),
+      enumCollection.add(propertyStr),
+    );
+  }
+}
+
+/// Represents the collection of values of an enum.
+class EnumCollection {
+  final List<EnumValue> values = <EnumValue>[];
+
+  EnumValue add(String name) {
+    final int index =
+        values.indexWhere((EnumValue value) => value.name == name);
+    EnumValue value;
+    if (index == -1) {
+      value = EnumValue(values.length, name);
+      values.add(value);
+    } else {
+      value = values[index];
+    }
+    return value;
+  }
+}
+
+/// Represents a single value in an [EnumCollection].
+class EnumValue {
+  EnumValue(this.index, this.name);
+
+  final int index;
+  final String name;
+
+  /// Returns a serialized, compact format of the enum value.
+  ///
+  /// Enum values are serialized based on their index. We start serializing them
+  /// to "A", "B", "C", etc until we reach "Z". Then we continue with "a", "b",
+  /// "c", etc.
+  String get serialized {
+    // We assign uppercase letters to the first 26 enum values.
+    if (index < 26) {
+      return String.fromCharCode(_kChar_A + index);
+    }
+    // Enum values above 26 will be assigned a lowercase letter.
+    return String.fromCharCode(_kChar_a + index - 26);
+  }
+
+  /// Returns the enum name that'll be used in the Dart code.
+  ///
+  /// ```dart
+  /// enum CharProperty {
+  ///   ALetter, // <-- this is the name returned by this method ("ALetter").
+  ///   Numeric,
+  ///   // etc...
+  /// }
+  /// ```
+  String get enumName {
+    return name.replaceAll('_', '');
+  }
+}
+
+/// Sorts ranges and combines adjacent ranges that have the same property and
+/// can be merged.
+Iterable<UnicodeRange> processRanges(List<UnicodeRange> data) {
+  data.sort(
+    // Ranges don't overlap so it's safe to sort based on the start of each
+    // range.
+    (UnicodeRange range1, UnicodeRange range2) =>
+        range1.start.compareTo(range2.start),
+  );
+  verifyNoOverlappingRanges(data);
+  return combineAdjacentRanges(data);
 }

 /// Example:
 ///
 /// ```
-/// UnicodeRange<CharProperty>(0x01C4, 0x0293, CharProperty.ALetter),
-/// UnicodeRange<CharProperty>(0x0294, 0x0294, CharProperty.ALetter),
-/// UnicodeRange<CharProperty>(0x0295, 0x02AF, CharProperty.ALetter),
+/// 0x01C4..0x0293; ALetter
+/// 0x0294..0x0294; ALetter
+/// 0x0295..0x02AF; ALetter
 /// ```
 ///
 /// will get combined into:
 ///
 /// ```
-/// UnicodeRange<CharProperty>(0x01C4, 0x02AF, CharProperty.ALetter)
+/// 0x01C4..0x02AF; ALetter
 /// ```
-List<PropertyTuple> combineAdjacentRanges(List<PropertyTuple> data) {
-  final List<PropertyTuple> result = <PropertyTuple>[data.first];
+List<UnicodeRange> combineAdjacentRanges(List<UnicodeRange> data) {
+  final List<UnicodeRange> result = <UnicodeRange>[data.first];
  for (int i = 1; i < data.length; i++) {
    if (result.last.isAdjacent(data[i])) {
      result.last = result.last.extendRange(data[i]);
@ -196,11 +398,7 @@ int getRangeEnd(String range) {
  return int.parse(range, radix: 16);
 }

-String toHex(int value) {
-  return '0x${value.toRadixString(16).padLeft(4, '0').toUpperCase()}';
-}
-
-void verifyNoOverlappingRanges(List<PropertyTuple> data) {
+void verifyNoOverlappingRanges(List<UnicodeRange> data) {
  for (int i = 1; i < data.length; i++) {
    if (data[i].isOverlapping(data[i - 1])) {
      throw Exception('Data contains overlapping ranges.');
@ -211,7 +409,7 @@ void verifyNoOverlappingRanges(List<PropertyTuple> data) {
 List<String> extractHeader(List<String> lines) {
  final List<String> headerLines = <String>[];
  for (String line in lines) {
-    if (line.contains('=======')) {
+    if (line.trim() == '#' || line.trim().isEmpty) {
      break;
    }
    if (line.isNotEmpty) {
@ -221,45 +419,7 @@ List<String> extractHeader(List<String> lines) {
  return headerLines;
 }

-List<PropertyTuple> processLines(List<String> lines) {
-  return lines
-      .map(removeCommentFromLine)
-      .where((String line) => line.isNotEmpty)
-      .map(parseLineIntoPropertyTuple)
-      .toList();
-}
-
-String normalizePropertyName(String property) {
-  return property.replaceAll('_', '');
-}
-
 String removeCommentFromLine(String line) {
  final int poundIdx = line.indexOf('#');
  return (poundIdx == -1) ? line : line.substring(0, poundIdx);
 }
-
-/// Examples:
-///
-/// 00C0..00D6    ; ALetter
-/// 037F          ; ALetter
-///
-/// Would be parsed into:
-///
-/// ```dart
-/// PropertyTuple(192, 214, 'ALetter');
-/// PropertyTuple(895, 895, 'ALetter');
-/// ```
-PropertyTuple parseLineIntoPropertyTuple(String line) {
-  final List<String> split = line.split(';');
-  final String rangeStr = split[0].trim();
-  final String propertyStr = split[1].trim();
-
-  final List<String> rangeSplit = rangeStr.contains('..')
-      ? rangeStr.split('..')
-      : <String>[rangeStr, rangeStr];
-  return PropertyTuple(
-    int.parse(rangeSplit[0], radix: 16),
-    int.parse(rangeSplit[1], radix: 16),
-    propertyStr,
-  );
-}