mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
[web] Use v8BreakIterator where possible (flutter/engine#37317)
* [web] Use v8BreakIterator where possible * address review comments
This commit is contained in:
parent
c3260418f6
commit
45ad44de85
@ -66,6 +66,9 @@ extension DomWindowExtension on DomWindow {
|
||||
/// The Trusted Types API (when available).
|
||||
/// See: https://developer.mozilla.org/en-US/docs/Web/API/Trusted_Types_API
|
||||
external DomTrustedTypePolicyFactory? get trustedTypes;
|
||||
|
||||
// ignore: non_constant_identifier_names
|
||||
external DomIntl get Intl;
|
||||
}
|
||||
|
||||
typedef DomRequestAnimationFrameCallback = void Function(num highResTime);
|
||||
@ -1659,3 +1662,42 @@ class _DomListWrapper<T> extends Iterable<T> {
|
||||
/// `toList` on the `Iterable`.
|
||||
Iterable<T> createDomListWrapper<T>(_DomList list) =>
|
||||
_DomListWrapper<T>._(list).cast<T>();
|
||||
|
||||
@JS()
|
||||
@staticInterop
|
||||
class DomIntl {}
|
||||
|
||||
extension DomIntlExtension on DomIntl {
|
||||
/// This is a V8-only API for segmenting text.
|
||||
///
|
||||
/// See: https://code.google.com/archive/p/v8-i18n/wikis/BreakIterator.wiki
|
||||
external Object? get v8BreakIterator;
|
||||
}
|
||||
|
||||
|
||||
@JS()
|
||||
@staticInterop
|
||||
class DomV8BreakIterator {}
|
||||
|
||||
extension DomV8BreakIteratorExtension on DomV8BreakIterator {
|
||||
external void adoptText(String text);
|
||||
external int first();
|
||||
external int next();
|
||||
external int current();
|
||||
external String breakType();
|
||||
}
|
||||
|
||||
DomV8BreakIterator createV8BreakIterator() {
|
||||
final Object? v8BreakIterator = domWindow.Intl.v8BreakIterator;
|
||||
if (v8BreakIterator == null) {
|
||||
throw UnimplementedError('v8BreakIterator is not supported.');
|
||||
}
|
||||
|
||||
return js_util.callConstructor<DomV8BreakIterator>(
|
||||
v8BreakIterator,
|
||||
<Object?>[
|
||||
js_util.getProperty(domWindow, 'undefined'),
|
||||
js_util.jsify(const <String, String>{'type': 'line'}),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
@ -2,10 +2,25 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
import '../dom.dart';
|
||||
import 'fragmenter.dart';
|
||||
import 'line_break_properties.dart';
|
||||
import 'unicode_range.dart';
|
||||
|
||||
const Set<int> _kNewlines = <int>{
|
||||
0x000A, // LF
|
||||
0x000B, // BK
|
||||
0x000C, // BK
|
||||
0x000D, // CR
|
||||
0x0085, // NL
|
||||
0x2028, // BK
|
||||
0x2029, // BK
|
||||
};
|
||||
const Set<int> _kSpaces = <int>{
|
||||
0x0020, // SP
|
||||
0x200B, // ZW
|
||||
};
|
||||
|
||||
/// Various types of line breaks as defined by the Unicode spec.
|
||||
enum LineBreakType {
|
||||
/// Indicates that a line break is possible but not mandatory.
|
||||
@ -25,8 +40,21 @@ enum LineBreakType {
|
||||
}
|
||||
|
||||
/// Splits [text] into fragments based on line breaks.
|
||||
class LineBreakFragmenter extends TextFragmenter {
|
||||
const LineBreakFragmenter(super.text);
|
||||
abstract class LineBreakFragmenter extends TextFragmenter {
|
||||
factory LineBreakFragmenter(String text) {
|
||||
if (domWindow.Intl.v8BreakIterator != null) {
|
||||
return V8LineBreakFragmenter(text);
|
||||
}
|
||||
return FWLineBreakFragmenter(text);
|
||||
}
|
||||
|
||||
@override
|
||||
List<LineBreakFragment> fragment();
|
||||
}
|
||||
|
||||
/// Flutter web's custom implementation of [LineBreakFragmenter].
|
||||
class FWLineBreakFragmenter extends TextFragmenter implements LineBreakFragmenter {
|
||||
FWLineBreakFragmenter(super.text);
|
||||
|
||||
@override
|
||||
List<LineBreakFragment> fragment() {
|
||||
@ -34,6 +62,85 @@ class LineBreakFragmenter extends TextFragmenter {
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of [LineBreakFragmenter] that uses V8's
|
||||
/// `v8BreakIterator` API to find line breaks in the given [text].
|
||||
class V8LineBreakFragmenter extends TextFragmenter implements LineBreakFragmenter {
|
||||
V8LineBreakFragmenter(super.text)
|
||||
: assert(domWindow.Intl.v8BreakIterator != null);
|
||||
|
||||
@override
|
||||
List<LineBreakFragment> fragment() {
|
||||
final List<LineBreakFragment> breaks = <LineBreakFragment>[];
|
||||
int fragmentStart = 0;
|
||||
|
||||
final DomV8BreakIterator iterator = createV8BreakIterator();
|
||||
|
||||
iterator.adoptText(text);
|
||||
iterator.first();
|
||||
while (iterator.next() != -1) {
|
||||
final LineBreakType type = _getBreakType(iterator);
|
||||
|
||||
final int fragmentEnd = iterator.current();
|
||||
int trailingNewlines = 0;
|
||||
int trailingSpaces = 0;
|
||||
|
||||
// Calculate trailing newlines and spaces.
|
||||
for (int i = fragmentStart; i < fragmentEnd; i++) {
|
||||
final int codeUnit = text.codeUnitAt(i);
|
||||
if (_kNewlines.contains(codeUnit)) {
|
||||
trailingNewlines++;
|
||||
trailingSpaces++;
|
||||
} else if (_kSpaces.contains(codeUnit)) {
|
||||
trailingSpaces++;
|
||||
} else {
|
||||
// Always break after a sequence of spaces.
|
||||
if (trailingSpaces > 0) {
|
||||
breaks.add(LineBreakFragment(
|
||||
fragmentStart,
|
||||
i,
|
||||
LineBreakType.opportunity,
|
||||
trailingNewlines: trailingNewlines,
|
||||
trailingSpaces: trailingSpaces,
|
||||
));
|
||||
fragmentStart = i;
|
||||
trailingNewlines = 0;
|
||||
trailingSpaces = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
breaks.add(LineBreakFragment(
|
||||
fragmentStart,
|
||||
fragmentEnd,
|
||||
type,
|
||||
trailingNewlines: trailingNewlines,
|
||||
trailingSpaces: trailingSpaces,
|
||||
));
|
||||
fragmentStart = fragmentEnd;
|
||||
}
|
||||
|
||||
if (breaks.isEmpty || breaks.last.type == LineBreakType.mandatory) {
|
||||
breaks.add(LineBreakFragment(text.length, text.length, LineBreakType.endOfText, trailingNewlines: 0, trailingSpaces: 0));
|
||||
}
|
||||
|
||||
return breaks;
|
||||
}
|
||||
|
||||
/// Gets break type from v8BreakIterator.
|
||||
LineBreakType _getBreakType(DomV8BreakIterator iterator) {
|
||||
final int fragmentEnd = iterator.current();
|
||||
|
||||
// I don't know why v8BreakIterator uses the type "none" to mean "soft break".
|
||||
if (iterator.breakType() != 'none') {
|
||||
return LineBreakType.mandatory;
|
||||
}
|
||||
if (fragmentEnd == text.length) {
|
||||
return LineBreakType.endOfText;
|
||||
}
|
||||
return LineBreakType.opportunity;
|
||||
}
|
||||
}
|
||||
|
||||
class LineBreakFragment extends TextFragment {
|
||||
const LineBreakFragment(super.start, super.end, this.type, {
|
||||
required this.trailingNewlines,
|
||||
|
||||
@ -17,7 +17,16 @@ void main() {
|
||||
}
|
||||
|
||||
void testMain() {
|
||||
group('$LineBreakFragmenter', () {
|
||||
groupForEachFragmenter(({required bool isV8}) {
|
||||
List<Line> split(String text) {
|
||||
final LineBreakFragmenter fragmenter =
|
||||
isV8 ? V8LineBreakFragmenter(text) : FWLineBreakFragmenter(text);
|
||||
return <Line>[
|
||||
for (final LineBreakFragment fragment in fragmenter.fragment())
|
||||
Line.fromLineBreakFragment(text, fragment)
|
||||
];
|
||||
}
|
||||
|
||||
test('empty string', () {
|
||||
expect(split(''), <Line>[
|
||||
Line('', endOfText),
|
||||
@ -316,13 +325,15 @@ void testMain() {
|
||||
});
|
||||
|
||||
test('comprehensive test', () {
|
||||
final List<TestCase> testCollection =
|
||||
parseRawTestData(rawLineBreakTestData);
|
||||
final List<TestCase> testCollection = parseRawTestData(rawLineBreakTestData, isV8: isV8);
|
||||
for (int t = 0; t < testCollection.length; t++) {
|
||||
final TestCase testCase = testCollection[t];
|
||||
|
||||
final String text = testCase.toText();
|
||||
final List<LineBreakFragment> fragments = LineBreakFragmenter(text).fragment();
|
||||
final LineBreakFragmenter fragmenter = isV8
|
||||
? V8LineBreakFragmenter(text)
|
||||
: FWLineBreakFragmenter(text);
|
||||
final List<LineBreakFragment> fragments = fragmenter.fragment();
|
||||
|
||||
// `f` is the index in the `fragments` list.
|
||||
int f = 0;
|
||||
@ -401,6 +412,23 @@ void testMain() {
|
||||
});
|
||||
}
|
||||
|
||||
typedef CreateLineBreakFragmenter = LineBreakFragmenter Function(String text);
|
||||
typedef GroupBody = void Function({required bool isV8});
|
||||
|
||||
void groupForEachFragmenter(GroupBody callback) {
|
||||
group(
|
||||
'$FWLineBreakFragmenter',
|
||||
() => callback(isV8: false),
|
||||
);
|
||||
|
||||
if (domWindow.Intl.v8BreakIterator != null) {
|
||||
group(
|
||||
'$V8LineBreakFragmenter',
|
||||
() => callback(isV8: true),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Holds information about how a line was split from a string.
|
||||
class Line {
|
||||
Line(this.text, this.breakType, {this.nl = 0, this.sp = 0});
|
||||
@ -447,10 +475,3 @@ class Line {
|
||||
return '"$escapedText" ($breakType, nl: $nl, sp: $sp)';
|
||||
}
|
||||
}
|
||||
|
||||
List<Line> split(String text) {
|
||||
return <Line>[
|
||||
for (final LineBreakFragment fragment in LineBreakFragmenter(text).fragment())
|
||||
Line.fromLineBreakFragment(text, fragment)
|
||||
];
|
||||
}
|
||||
|
||||
@ -3,11 +3,11 @@
|
||||
// found in the LICENSE file.
|
||||
|
||||
/// Parses raw test data into a list of [TestCase] objects.
|
||||
List<TestCase> parseRawTestData(String rawTestData) {
|
||||
List<TestCase> parseRawTestData(String rawTestData, {required bool isV8}) {
|
||||
return rawTestData
|
||||
.split('\n')
|
||||
.where(isValidTestCase)
|
||||
.map(_checkReplacement)
|
||||
.map((String line) => _checkReplacement(line, isV8: isV8))
|
||||
.map(_parse)
|
||||
.toList();
|
||||
}
|
||||
@ -16,7 +16,7 @@ bool isValidTestCase(String line) {
|
||||
return line.startsWith('×');
|
||||
}
|
||||
|
||||
String _checkReplacement(String line) {
|
||||
String _checkReplacement(String line, {required bool isV8}) {
|
||||
String replacement = line;
|
||||
|
||||
// Special cases for rules LB8, LB11, LB13, LB14, LB15, LB16, LB17 to allow
|
||||
@ -28,38 +28,91 @@ String _checkReplacement(String line) {
|
||||
.replaceAllMapped(spacesRegex, (Match m) => 'SPACE (SP) ÷ [${m.group(1)}.');
|
||||
}
|
||||
|
||||
// Some test cases contradict rule LB25, so we are fixing them with the few
|
||||
// regexes below.
|
||||
if (!isV8) {
|
||||
// Some test cases contradict rule LB25, so we are fixing them with the few
|
||||
// regexes below.
|
||||
|
||||
final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)');
|
||||
if (replacement.contains(lb25Regex1)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR)
|
||||
.replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
|
||||
.replaceAllMapped(
|
||||
lb25Regex1,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)');
|
||||
if (replacement.contains(lb25Regex1)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR)
|
||||
.replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
|
||||
.replaceAllMapped(
|
||||
lb25Regex1,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
}
|
||||
final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)');
|
||||
if (replacement.contains(lb25Regex2)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
|
||||
.replaceAllMapped(
|
||||
lb25Regex2,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
}
|
||||
final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)');
|
||||
if (replacement.contains(lb25Regex3)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30)
|
||||
.replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30)
|
||||
.replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP)
|
||||
.replaceAllMapped(
|
||||
lb25Regex3,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
}
|
||||
}
|
||||
final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)');
|
||||
if (replacement.contains(lb25Regex2)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
|
||||
.replaceAllMapped(
|
||||
lb25Regex2,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
}
|
||||
final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)');
|
||||
if (replacement.contains(lb25Regex3)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30)
|
||||
.replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30)
|
||||
.replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP)
|
||||
.replaceAllMapped(
|
||||
lb25Regex3,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
|
||||
if (isV8) {
|
||||
// v8BreakIterator deviates from the spec around Hiragana and Katakana
|
||||
// letters.
|
||||
|
||||
final RegExp hiragana21Regex = RegExp(r' × \[21\.03\] (HIRAGANA LETTER|KATAKANA LETTER|KATAKANA-HIRAGANA)');
|
||||
if (replacement.contains(hiragana21Regex) && !replacement.contains('(BB)') && !replacement.contains('(PR)')) {
|
||||
replacement = replacement
|
||||
.replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
|
||||
.replaceAll(' × 30E5', ' ÷ 30E5') // KATAKANA LETTER (CJ)
|
||||
.replaceAll(' × 30FC', ' ÷ 30FC') // KATAKANA-HIRAGANA PROLONGED SOUND MARK (CJ)
|
||||
.replaceAllMapped(
|
||||
hiragana21Regex,
|
||||
(Match m) => ' ÷ [21.03] ${m.group(1)}',
|
||||
);
|
||||
}
|
||||
if (replacement.contains(' × [16.0] HIRAGANA LETTER')) {
|
||||
replacement = replacement
|
||||
.replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
|
||||
.replaceAll(
|
||||
' × [16.0] HIRAGANA LETTER',
|
||||
' ÷ [16.0] HIRAGANA LETTER',
|
||||
);
|
||||
}
|
||||
final RegExp hiraganaPercentRegex = RegExp(r'HIRAGANA .*? ÷ \[999\.0\] PERCENT');
|
||||
if (replacement.contains(hiraganaPercentRegex)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
|
||||
.replaceAll(
|
||||
' ÷ [999.0] PERCENT',
|
||||
' × [999.0] PERCENT',
|
||||
);
|
||||
}
|
||||
|
||||
// v8BreakIterator also deviates from the spec around hyphens, commas and
|
||||
// full stops.
|
||||
|
||||
final RegExp hyphenRegex = RegExp(r'\((HY|IS)\)(.*?) ÷ \[999\.0\] (DIGIT|NUMBER|SECTION|THAI|<reserved-50005>)');
|
||||
if (replacement.contains(hyphenRegex)) {
|
||||
replacement = replacement
|
||||
.replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
|
||||
.replaceAll(' ÷ 0023', ' × 0023') // NUMBER SIGN (AL)
|
||||
.replaceAll(' ÷ 00A7', ' × 00A7') // SECTION SIGN (AI_AL)
|
||||
.replaceAll(' ÷ 0E01', ' × 0E01') // THAI CHARACTER KO KAI (SA_AL)
|
||||
.replaceAll(' ÷ 50005', ' × 50005') // <reserved-50005> (XX_AL)
|
||||
.replaceAllMapped(
|
||||
hyphenRegex,
|
||||
(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return replacement;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user