mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
367 lines
13 KiB
C++
367 lines
13 KiB
C++
/*
|
|
* Copyright (C) 2014 Google Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following disclaimer
|
|
* in the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Google Inc. nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "flutter/sky/engine/platform/fonts/Character.h"
|
|
|
|
#include "flutter/sky/engine/platform/fonts/FontPlatformFeatures.h"
|
|
#include "flutter/sky/engine/wtf/StdLibExtras.h"
|
|
#include "flutter/sky/engine/wtf/text/StringBuilder.h"
|
|
|
|
using namespace WTF;
|
|
using namespace Unicode;
|
|
|
|
namespace blink {
|
|
|
|
static const UChar32 cjkIsolatedSymbolsArray[] = {
|
|
// 0x2C7 Caron, Mandarin Chinese 3rd Tone
|
|
0x2C7,
|
|
// 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
|
|
0x2CA,
|
|
// 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
|
|
0x2CB,
|
|
// 0x2D9 Dot Above, Mandarin Chinese 5th Tone
|
|
0x2D9, 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048,
|
|
0x2049, 0x2051, 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A,
|
|
0x2113, 0x2116, 0x2121, 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F,
|
|
0x2189, 0x2307, 0x2312, 0x23CE, 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA,
|
|
0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25B6, 0x25B7, 0x25BC, 0x25BD, 0x25C0,
|
|
0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25CC, 0x25EF, 0x2605, 0x2606,
|
|
0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26BD, 0x26BE, 0x2713,
|
|
0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE12, 0xFE19,
|
|
0xFF1D,
|
|
// Emoji.
|
|
0x1F100};
|
|
|
|
// Takes a flattened list of closed intervals
|
|
template <class T, size_t size>
|
|
bool valueInIntervalList(const T (&intervalList)[size], const T& value) {
|
|
const T* bound =
|
|
std::upper_bound(&intervalList[0], &intervalList[size], value);
|
|
if ((bound - intervalList) % 2 == 1)
|
|
return true;
|
|
return bound > intervalList && *(bound - 1) == value;
|
|
}
|
|
|
|
CodePath Character::characterRangeCodePath(const UChar* characters,
|
|
unsigned len) {
|
|
static const UChar complexCodePathRanges[] = {
|
|
// U+02E5 through U+02E9 (Modifier Letters : Tone letters)
|
|
0x2E5, 0x2E9,
|
|
// U+0300 through U+036F Combining diacritical marks
|
|
0x300, 0x36F,
|
|
// U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...
|
|
0x0591, 0x05BD,
|
|
// ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
|
|
0x05BF, 0x05CF,
|
|
// U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic,
|
|
// Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
|
|
// Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
|
|
0x0600, 0x109F,
|
|
// U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left
|
|
// here if you precompose; Modern Korean will be precomposed as a result
|
|
// of step A)
|
|
0x1100, 0x11FF,
|
|
// U+135D through U+135F Ethiopic combining marks
|
|
0x135D, 0x135F,
|
|
// U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer,
|
|
// Mongolian
|
|
0x1700, 0x18AF,
|
|
// U+1900 through U+194F Limbu (Unicode 4.0)
|
|
0x1900, 0x194F,
|
|
// U+1980 through U+19DF New Tai Lue
|
|
0x1980, 0x19DF,
|
|
// U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha,
|
|
// Vedic
|
|
0x1A00, 0x1CFF,
|
|
// U+1DC0 through U+1DFF Comining diacritical mark supplement
|
|
0x1DC0, 0x1DFF,
|
|
// U+20D0 through U+20FF Combining marks for symbols
|
|
0x20D0, 0x20FF,
|
|
// U+2CEF through U+2CF1 Combining marks for Coptic
|
|
0x2CEF, 0x2CF1,
|
|
// U+302A through U+302F Ideographic and Hangul Tone marks
|
|
0x302A, 0x302F,
|
|
// U+A67C through U+A67D Combining marks for old Cyrillic
|
|
0xA67C, 0xA67D,
|
|
// U+A6F0 through U+A6F1 Combining mark for Bamum
|
|
0xA6F0, 0xA6F1,
|
|
// U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
|
|
// Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei
|
|
// Mayek
|
|
0xA800, 0xABFF,
|
|
// U+D7B0 through U+D7FF Hangul Jamo Ext. B
|
|
0xD7B0, 0xD7FF,
|
|
// U+FE00 through U+FE0F Unicode variation selectors
|
|
0xFE00, 0xFE0F,
|
|
// U+FE20 through U+FE2F Combining half marks
|
|
0xFE20, 0xFE2F};
|
|
|
|
CodePath result = SimplePath;
|
|
for (unsigned i = 0; i < len; i++) {
|
|
const UChar c = characters[i];
|
|
|
|
// Shortcut for common case
|
|
if (c < 0x2E5)
|
|
continue;
|
|
|
|
// U+1E00 through U+2000 characters with diacritics and stacked diacritics
|
|
if (c >= 0x1E00 && c <= 0x2000) {
|
|
result = SimpleWithGlyphOverflowPath;
|
|
continue;
|
|
}
|
|
|
|
// Surrogate pairs
|
|
if (c > 0xD7FF && c <= 0xDBFF) {
|
|
if (i == len - 1)
|
|
continue;
|
|
|
|
UChar next = characters[++i];
|
|
if (!U16_IS_TRAIL(next))
|
|
continue;
|
|
|
|
UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);
|
|
|
|
if (supplementaryCharacter <
|
|
0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols
|
|
continue;
|
|
if (supplementaryCharacter <= 0x1F1FF)
|
|
return ComplexPath;
|
|
|
|
if (supplementaryCharacter <
|
|
0xE0100) // U+E0100 through U+E01EF Unicode variation selectors.
|
|
continue;
|
|
if (supplementaryCharacter <= 0xE01EF)
|
|
return ComplexPath;
|
|
|
|
// FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and
|
|
// other complex scripts in plane 1 or higher.
|
|
|
|
continue;
|
|
}
|
|
|
|
// Search for other Complex cases
|
|
if (valueInIntervalList(complexCodePathRanges, c))
|
|
return ComplexPath;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool Character::isCJKIdeograph(UChar32 c) {
|
|
static const UChar32 cjkIdeographRanges[] = {
|
|
// CJK Radicals Supplement and Kangxi Radicals.
|
|
0x2E80, 0x2FDF,
|
|
// CJK Strokes.
|
|
0x31C0, 0x31EF,
|
|
// CJK Unified Ideographs Extension A.
|
|
0x3400, 0x4DBF,
|
|
// The basic CJK Unified Ideographs block.
|
|
0x4E00, 0x9FFF,
|
|
// CJK Compatibility Ideographs.
|
|
0xF900, 0xFAFF,
|
|
// CJK Unified Ideographs Extension B.
|
|
0x20000, 0x2A6DF,
|
|
// CJK Unified Ideographs Extension C.
|
|
// CJK Unified Ideographs Extension D.
|
|
0x2A700, 0x2B81F,
|
|
// CJK Compatibility Ideographs Supplement.
|
|
0x2F800, 0x2FA1F};
|
|
static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges);
|
|
|
|
// Early out
|
|
if (c < cjkIdeographRanges[0] ||
|
|
c > cjkIdeographRanges[cjkIdeographRangesCount - 1])
|
|
return false;
|
|
|
|
return valueInIntervalList(cjkIdeographRanges, c);
|
|
}
|
|
|
|
bool Character::isCJKIdeographOrSymbol(UChar32 c) {
|
|
// Likely common case
|
|
if (c < 0x2C7)
|
|
return false;
|
|
|
|
// Hash lookup for isolated symbols (those not part of a contiguous range)
|
|
static HashSet<UChar32>* cjkIsolatedSymbols = 0;
|
|
if (!cjkIsolatedSymbols) {
|
|
cjkIsolatedSymbols = new HashSet<UChar32>();
|
|
for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i)
|
|
cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
|
|
}
|
|
if (cjkIsolatedSymbols->contains(c))
|
|
return true;
|
|
|
|
if (isCJKIdeograph(c))
|
|
return true;
|
|
|
|
static const UChar32 cjkSymbolRanges[] = {
|
|
0x2156, 0x215A, 0x2160, 0x216B, 0x2170, 0x217B, 0x23BE, 0x23CC, 0x2460,
|
|
0x2492, 0x249C, 0x24FF, 0x25CE, 0x25D3, 0x25E2, 0x25E6, 0x2600, 0x2603,
|
|
0x2660, 0x266F, 0x2672, 0x267D, 0x2776, 0x277F,
|
|
// Ideographic Description Characters, with CJK Symbols and Punctuation,
|
|
// excluding 0x3030. Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 ..
|
|
// 0x30FF, Bopomofo 0x3100 .. 0x312F
|
|
0x2FF0, 0x302F, 0x3031, 0x312F,
|
|
// More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
|
|
0x3190, 0x31BF,
|
|
// Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
|
|
// CJK Compatibility (0x3300 .. 0x33FF).
|
|
0x3200, 0x33FF, 0xF860, 0xF862,
|
|
// CJK Compatibility Forms.
|
|
0xFE30, 0xFE4F,
|
|
// Halfwidth and Fullwidth Forms
|
|
// Usually only used in CJK
|
|
0xFF00, 0xFF0C, 0xFF0E, 0xFF1A, 0xFF1F, 0xFFEF,
|
|
// Emoji.
|
|
0x1F110, 0x1F129, 0x1F130, 0x1F149, 0x1F150, 0x1F169, 0x1F170, 0x1F189,
|
|
0x1F200, 0x1F6FF};
|
|
|
|
return valueInIntervalList(cjkSymbolRanges, c);
|
|
}
|
|
|
|
unsigned Character::expansionOpportunityCount(const LChar* characters,
|
|
size_t length,
|
|
TextDirection direction,
|
|
bool& isAfterExpansion) {
|
|
unsigned count = 0;
|
|
if (direction == LTR) {
|
|
for (size_t i = 0; i < length; ++i) {
|
|
if (treatAsSpace(characters[i])) {
|
|
count++;
|
|
isAfterExpansion = true;
|
|
} else {
|
|
isAfterExpansion = false;
|
|
}
|
|
}
|
|
} else {
|
|
for (size_t i = length; i > 0; --i) {
|
|
if (treatAsSpace(characters[i - 1])) {
|
|
count++;
|
|
isAfterExpansion = true;
|
|
} else {
|
|
isAfterExpansion = false;
|
|
}
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
unsigned Character::expansionOpportunityCount(const UChar* characters,
|
|
size_t length,
|
|
TextDirection direction,
|
|
bool& isAfterExpansion) {
|
|
static bool expandAroundIdeographs =
|
|
FontPlatformFeatures::canExpandAroundIdeographsInComplexText();
|
|
unsigned count = 0;
|
|
if (direction == LTR) {
|
|
for (size_t i = 0; i < length; ++i) {
|
|
UChar32 character = characters[i];
|
|
if (treatAsSpace(character)) {
|
|
count++;
|
|
isAfterExpansion = true;
|
|
continue;
|
|
}
|
|
if (U16_IS_LEAD(character) && i + 1 < length &&
|
|
U16_IS_TRAIL(characters[i + 1])) {
|
|
character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]);
|
|
i++;
|
|
}
|
|
if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
|
|
if (!isAfterExpansion)
|
|
count++;
|
|
count++;
|
|
isAfterExpansion = true;
|
|
continue;
|
|
}
|
|
isAfterExpansion = false;
|
|
}
|
|
} else {
|
|
for (size_t i = length; i > 0; --i) {
|
|
UChar32 character = characters[i - 1];
|
|
if (treatAsSpace(character)) {
|
|
count++;
|
|
isAfterExpansion = true;
|
|
continue;
|
|
}
|
|
if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) {
|
|
character = U16_GET_SUPPLEMENTARY(characters[i - 2], character);
|
|
i--;
|
|
}
|
|
if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
|
|
if (!isAfterExpansion)
|
|
count++;
|
|
count++;
|
|
isAfterExpansion = true;
|
|
continue;
|
|
}
|
|
isAfterExpansion = false;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
bool Character::canReceiveTextEmphasis(UChar32 c) {
|
|
CharCategory category = Unicode::category(c);
|
|
if (category & (Separator_Space | Separator_Line | Separator_Paragraph |
|
|
Other_NotAssigned | Other_Control | Other_Format))
|
|
return false;
|
|
|
|
// Additional word-separator characters listed in CSS Text Level 3 Editor's
|
|
// Draft 3 November 2010.
|
|
if (c == ethiopicWordspace || c == aegeanWordSeparatorLine ||
|
|
c == aegeanWordSeparatorDot || c == ugariticWordDivider ||
|
|
c == tibetanMarkIntersyllabicTsheg || c == tibetanMarkDelimiterTshegBstar)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline String normalizeSpacesInternal(const CharacterType* characters,
|
|
unsigned length) {
|
|
StringBuilder normalized;
|
|
normalized.reserveCapacity(length);
|
|
|
|
for (unsigned i = 0; i < length; ++i)
|
|
normalized.append(Character::normalizeSpaces(characters[i]));
|
|
|
|
return normalized.toString();
|
|
}
|
|
|
|
String Character::normalizeSpaces(const LChar* characters, unsigned length) {
|
|
return normalizeSpacesInternal(characters, length);
|
|
}
|
|
|
|
String Character::normalizeSpaces(const UChar* characters, unsigned length) {
|
|
return normalizeSpacesInternal(characters, length);
|
|
}
|
|
|
|
} // namespace blink
|