mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
[web][3/3] Handle surrogate during line break detection (flutter/engine#19745)
This commit is contained in:
parent
80963c291d
commit
6f800ecb40
@ -28,7 +28,12 @@ class LineBreakResult {
|
||||
}
|
||||
|
||||
/// Normalizes properties that behave the same way into one common property.
|
||||
LineCharProperty? _normalizeLineProperty(LineCharProperty? prop) {
|
||||
LineCharProperty _normalizeLineProperty(int? codePoint) {
|
||||
if (codePoint == null) {
|
||||
return LineCharProperty.AL;
|
||||
}
|
||||
|
||||
final LineCharProperty? prop = lineLookup.findForChar(codePoint);
|
||||
// NL behaves exactly the same as BK.
|
||||
// See: https://www.unicode.org/reports/tr14/tr14-45.html#NL
|
||||
if (prop == LineCharProperty.NL) {
|
||||
@ -37,7 +42,8 @@ LineCharProperty? _normalizeLineProperty(LineCharProperty? prop) {
|
||||
// In the absence of extra data (ICU data and language dictionaries), the
|
||||
// following properties will be treated as AL (alphabetic): AI, SA, SG and XX.
|
||||
// See LB1: https://www.unicode.org/reports/tr14/tr14-45.html#LB1
|
||||
if (prop == LineCharProperty.AI ||
|
||||
if (prop == null ||
|
||||
prop == LineCharProperty.AI ||
|
||||
prop == LineCharProperty.SA ||
|
||||
prop == LineCharProperty.SG ||
|
||||
prop == LineCharProperty.XX) {
|
||||
@ -91,7 +97,8 @@ bool _hasEastAsianWidthFWH(int charCode) {
|
||||
/// * https://www.unicode.org/reports/tr14/tr14-45.html#Algorithm
|
||||
/// * https://www.unicode.org/Public/11.0.0/ucd/LineBreak.txt
|
||||
LineBreakResult nextLineBreak(String text, int index) {
|
||||
LineCharProperty? curr = _normalizeLineProperty(lineLookup.find(text, index));
|
||||
int? codePoint = getCodePoint(text, index);
|
||||
LineCharProperty curr = _normalizeLineProperty(codePoint);
|
||||
|
||||
LineCharProperty? prev1;
|
||||
|
||||
@ -117,9 +124,23 @@ LineBreakResult nextLineBreak(String text, int index) {
|
||||
curr = LineCharProperty.AL;
|
||||
}
|
||||
|
||||
int regionalIndicatorCount = 0;
|
||||
|
||||
// Always break at the end of text.
|
||||
// LB3: ! eot
|
||||
while (index < text.length) {
|
||||
// Keep count of the RI (regional indicator) sequence.
|
||||
if (curr == LineCharProperty.RI) {
|
||||
regionalIndicatorCount++;
|
||||
} else {
|
||||
regionalIndicatorCount = 0;
|
||||
}
|
||||
|
||||
if (codePoint != null && codePoint > 0xFFFF) {
|
||||
// Advance `index` one extra step when handling a surrogate pair in the
|
||||
// string.
|
||||
index++;
|
||||
}
|
||||
index++;
|
||||
prev2 = prev1;
|
||||
prev1 = curr;
|
||||
@ -131,7 +152,9 @@ LineBreakResult nextLineBreak(String text, int index) {
|
||||
baseOfSpaceSequence = null;
|
||||
}
|
||||
|
||||
curr = _normalizeLineProperty(lineLookup.find(text, index));
|
||||
codePoint = getCodePoint(text, index);
|
||||
curr = _normalizeLineProperty(codePoint);
|
||||
|
||||
isCurrZWJ = curr == LineCharProperty.ZWJ;
|
||||
|
||||
// Always break after hard line breaks.
|
||||
@ -208,6 +231,10 @@ LineBreakResult nextLineBreak(String text, int index) {
|
||||
// LB10: Treat any remaining combining mark or ZWJ as AL.
|
||||
curr = LineCharProperty.AL;
|
||||
} else {
|
||||
if (prev1 == LineCharProperty.RI) {
|
||||
// Prevent the previous RI from being double-counted.
|
||||
regionalIndicatorCount--;
|
||||
}
|
||||
// Preserve the property of the previous character to treat the sequence
|
||||
// as if it were X.
|
||||
curr = prev1;
|
||||
@ -466,6 +493,24 @@ LineBreakResult nextLineBreak(String text, int index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Break between two regional indicator symbols if and only if there are an
|
||||
// even number of regional indicators preceding the position of the break.
|
||||
// LB30a: sot (RI RI)* RI × RI
|
||||
// [^RI] (RI RI)* RI × RI
|
||||
if (curr == LineCharProperty.RI) {
|
||||
if (regionalIndicatorCount.isOdd) {
|
||||
continue;
|
||||
} else {
|
||||
return LineBreakResult(index, LineBreakType.opportunity);
|
||||
}
|
||||
}
|
||||
|
||||
// Do not break between an emoji base and an emoji modifier.
|
||||
// LB30b: EB × EM
|
||||
if (prev1 == LineCharProperty.EB && curr == LineCharProperty.EM) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Break everywhere else.
|
||||
// LB31: ALL ÷
|
||||
// ÷ ALL
|
||||
|
||||
@ -16,8 +16,7 @@ const double _baselineRatioHack = 1.1662499904632568;
|
||||
typedef CharPredicate = bool Function(int char);
|
||||
|
||||
bool _whitespacePredicate(int char) {
|
||||
final LineCharProperty? prop =
|
||||
_normalizeLineProperty(lineLookup.findForChar(char));
|
||||
final LineCharProperty? prop = _normalizeLineProperty(char);
|
||||
return prop == LineCharProperty.SP ||
|
||||
prop == LineCharProperty.BK ||
|
||||
prop == LineCharProperty.LF ||
|
||||
@ -25,8 +24,7 @@ bool _whitespacePredicate(int char) {
|
||||
}
|
||||
|
||||
bool _newlinePredicate(int char) {
|
||||
final LineCharProperty? prop =
|
||||
_normalizeLineProperty(lineLookup.findForChar(char));
|
||||
final LineCharProperty? prop = _normalizeLineProperty(char);
|
||||
return prop == LineCharProperty.BK || prop == LineCharProperty.LF || prop == LineCharProperty.CR;
|
||||
}
|
||||
|
||||
|
||||
@ -57,6 +57,44 @@ class UnicodeRange<P> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether the given char code is a UTF-16 surrogate.
|
||||
///
|
||||
/// See:
|
||||
/// - http://www.unicode.org/faq//utf_bom.html#utf16-2
|
||||
bool isUtf16Surrogate(int char) {
|
||||
return char & 0xF800 == 0xD800;
|
||||
}
|
||||
|
||||
/// Combines a pair of UTF-16 surrogate into a single character code point.
|
||||
///
|
||||
/// The surrogate pair is expected to start at [index] in the [text].
|
||||
///
|
||||
/// See:
|
||||
/// - http://www.unicode.org/faq//utf_bom.html#utf16-3
|
||||
int combineSurrogatePair(String text, int index) {
|
||||
final int hi = text.codeUnitAt(index);
|
||||
final int lo = text.codeUnitAt(index + 1);
|
||||
|
||||
int x = (hi & ((1 << 6) - 1)) << 10 | lo & ((1 << 10) - 1);
|
||||
int w = (hi >> 6) & ((1 << 5) - 1);
|
||||
int u = w + 1;
|
||||
return u << 16 | x;
|
||||
}
|
||||
|
||||
/// Returns the code point from [text] at [index] and handles surrogate pairs
|
||||
/// for cases that involve two UTF-16 codes.
|
||||
int? getCodePoint(String text, int index) {
|
||||
if (index < 0 || index >= text.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final int char = text.codeUnitAt(index);
|
||||
if (isUtf16Surrogate(char) && index < text.length - 1) {
|
||||
return combineSurrogatePair(text, index);
|
||||
}
|
||||
return char;
|
||||
}
|
||||
|
||||
/// Given a list of [UnicodeRange]s, this class performs efficient lookup
|
||||
/// to find which range a value falls into.
|
||||
///
|
||||
@ -88,11 +126,9 @@ class UnicodePropertyLookup<P> {
|
||||
/// located at that [index].
|
||||
///
|
||||
/// If the [index] is out of range, null will be returned.
|
||||
P? find(String? text, int index) {
|
||||
if (index < 0 || index >= text!.length) {
|
||||
return null;
|
||||
}
|
||||
return findForChar(text.codeUnitAt(index));
|
||||
P? find(String text, int index) {
|
||||
final int? codePoint = getCodePoint(text, index);
|
||||
return codePoint == null ? null : findForChar(codePoint);
|
||||
}
|
||||
|
||||
/// Takes one character as an integer code unit and returns its property.
|
||||
|
||||
@ -168,8 +168,16 @@ void main() {
|
||||
final String text = testCase.toText();
|
||||
|
||||
int lastLineBreak = 0;
|
||||
for (int i = 0; i < testCase.signs.length; i++) {
|
||||
final Sign sign = testCase.signs[i];
|
||||
int surrogateCount = 0;
|
||||
// `s` is the index in the `testCase.signs` list.
|
||||
for (int s = 0; s < testCase.signs.length; s++) {
|
||||
// `i` is the index in the `text`.
|
||||
final int i = s + surrogateCount;
|
||||
if (s < testCase.chars.length && testCase.chars[s].isSurrogatePair) {
|
||||
surrogateCount++;
|
||||
}
|
||||
|
||||
final Sign sign = testCase.signs[s];
|
||||
final LineBreakResult result = nextLineBreak(text, lastLineBreak);
|
||||
if (sign.isBreakOpportunity) {
|
||||
// The line break should've been found at index `i`.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user