mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
This caused us to lose our gn check certification. :( Turns out gn check was just ignoring all the header paths it didn't understand and so gn check passing for sky wasn't meaning much. I tried to straighten out some of the mess in this CL, but its going to take several more rounds of massaging before gn check passes again. On the bright side (almost) all of our headers are absolute now. Turns out my script (attached to the bug) didn't notice ../ includes but I'll fix that in the next patch. R=abarth@chromium.org BUG=435361 Review URL: https://codereview.chromium.org/746023002
463 lines
15 KiB
C++
463 lines
15 KiB
C++
/*
|
|
* Copyright (C) 2007 Apple Computer, Inc.
|
|
*
|
|
* Portions are Copyright (C) 1998 Netscape Communications Corporation.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms
|
|
* of either the Mozilla Public License Version 1.1, found at
|
|
* http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
|
|
* License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
|
|
* (the "GPL"), in which case the provisions of the MPL or the GPL are
|
|
* applicable instead of those above. If you wish to allow use of your
|
|
* version of this file only under the terms of one of those two
|
|
* licenses (the MPL or the GPL) and not to allow others to use your
|
|
* version of this file under the LGPL, indicate your decision by
|
|
* deletingthe provisions above and replace them with the notice and
|
|
* other provisions required by the MPL or the GPL, as the case may be.
|
|
* If you do not delete the provisions above, a recipient may use your
|
|
* version of this file under any of the LGPL, the MPL or the GPL.
|
|
*/
|
|
|
|
#include "sky/engine/config.h"
|
|
#include "sky/engine/platform/text/UnicodeRange.h"
|
|
|
|
namespace blink {
|
|
|
|
// This table depends on unicode range definitions.
|
|
// Each item's index must correspond to a unicode range value
|
|
// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
|
|
static const char* const gUnicodeRangeToLangGroupTable[] =
|
|
{
|
|
"x-cyrillic",
|
|
"el",
|
|
"tr",
|
|
"he",
|
|
"ar",
|
|
"x-baltic",
|
|
"th",
|
|
"ko",
|
|
"ja",
|
|
"zh-CN",
|
|
"zh-TW",
|
|
"x-devanagari",
|
|
"x-tamil",
|
|
"x-armn",
|
|
"x-beng",
|
|
"x-cans",
|
|
"x-ethi",
|
|
"x-geor",
|
|
"x-gujr",
|
|
"x-guru",
|
|
"x-khmr",
|
|
"x-mlym"
|
|
};
|
|
|
|
/**********************************************************************
|
|
* Unicode subranges as defined in unicode 3.0
|
|
* x-western, x-central-euro, tr, x-baltic -> latin
|
|
* 0000 - 036f
|
|
* 1e00 - 1eff
|
|
* 2000 - 206f (general punctuation)
|
|
* 20a0 - 20cf (currency symbols)
|
|
* 2100 - 214f (letterlike symbols)
|
|
* 2150 - 218f (Number Forms)
|
|
* el -> greek
|
|
* 0370 - 03ff
|
|
* 1f00 - 1fff
|
|
* x-cyrillic -> cyrillic
|
|
* 0400 - 04ff
|
|
* he -> hebrew
|
|
* 0590 - 05ff
|
|
* ar -> arabic
|
|
* 0600 - 06ff
|
|
* fb50 - fdff (arabic presentation forms)
|
|
* fe70 - feff (arabic presentation forms b)
|
|
* th - thai
|
|
* 0e00 - 0e7f
|
|
* ko -> korean
|
|
* ac00 - d7af (hangul Syllables)
|
|
* 1100 - 11ff (jamo)
|
|
* 3130 - 318f (hangul compatibility jamo)
|
|
* ja
|
|
* 3040 - 309f (hiragana)
|
|
* 30a0 - 30ff (katakana)
|
|
* zh-CN
|
|
* zh-TW
|
|
*
|
|
* CJK
|
|
* 3100 - 312f (bopomofo)
|
|
* 31a0 - 31bf (bopomofo extended)
|
|
* 3000 - 303f (CJK Symbols and Punctuation)
|
|
* 2e80 - 2eff (CJK radicals supplement)
|
|
* 2f00 - 2fdf (Kangxi Radicals)
|
|
* 2ff0 - 2fff (Ideographic Description Characters)
|
|
* 3190 - 319f (kanbun)
|
|
* 3200 - 32ff (Enclosed CJK letters and Months)
|
|
* 3300 - 33ff (CJK compatibility)
|
|
* 3400 - 4dbf (CJK Unified Ideographs Extension A)
|
|
* 4e00 - 9faf (CJK Unified Ideographs)
|
|
* f900 - fa5f (CJK Compatibility Ideographs)
|
|
* fe30 - fe4f (CJK compatibility Forms)
|
|
* ff00 - ffef (halfwidth and fullwidth forms)
|
|
*
|
|
* Armenian
|
|
* 0530 - 058f
|
|
* Sriac
|
|
* 0700 - 074f
|
|
* Thaana
|
|
* 0780 - 07bf
|
|
* Devanagari
|
|
* 0900 - 097f
|
|
* Bengali
|
|
* 0980 - 09ff
|
|
* Gurmukhi
|
|
* 0a00 - 0a7f
|
|
* Gujarati
|
|
* 0a80 - 0aff
|
|
* Oriya
|
|
* 0b00 - 0b7f
|
|
* Tamil
|
|
* 0b80 - 0bff
|
|
* Telugu
|
|
* 0c00 - 0c7f
|
|
* Kannada
|
|
* 0c80 - 0cff
|
|
* Malayalam
|
|
* 0d00 - 0d7f
|
|
* Sinhala
|
|
* 0d80 - 0def
|
|
* Lao
|
|
* 0e80 - 0eff
|
|
* Tibetan
|
|
* 0f00 - 0fbf
|
|
* Myanmar
|
|
* 1000 - 109f
|
|
* Georgian
|
|
* 10a0 - 10ff
|
|
* Ethiopic
|
|
* 1200 - 137f
|
|
* Cherokee
|
|
* 13a0 - 13ff
|
|
* Canadian Aboriginal Syllabics
|
|
* 1400 - 167f
|
|
* Ogham
|
|
* 1680 - 169f
|
|
* Runic
|
|
* 16a0 - 16ff
|
|
* Khmer
|
|
* 1780 - 17ff
|
|
* Mongolian
|
|
* 1800 - 18af
|
|
* Misc - superscripts and subscripts
|
|
* 2070 - 209f
|
|
* Misc - Combining Diacritical Marks for Symbols
|
|
* 20d0 - 20ff
|
|
* Misc - Arrows
|
|
* 2190 - 21ff
|
|
* Misc - Mathematical Operators
|
|
* 2200 - 22ff
|
|
* Misc - Miscellaneous Technical
|
|
* 2300 - 23ff
|
|
* Misc - Control picture
|
|
* 2400 - 243f
|
|
* Misc - Optical character recognition
|
|
* 2440 - 2450
|
|
* Misc - Enclose Alphanumerics
|
|
* 2460 - 24ff
|
|
* Misc - Box Drawing
|
|
* 2500 - 257f
|
|
* Misc - Block Elements
|
|
* 2580 - 259f
|
|
* Misc - Geometric Shapes
|
|
* 25a0 - 25ff
|
|
* Misc - Miscellaneous Symbols
|
|
* 2600 - 267f
|
|
* Misc - Dingbats
|
|
* 2700 - 27bf
|
|
* Misc - Braille Patterns
|
|
* 2800 - 28ff
|
|
* Yi Syllables
|
|
* a000 - a48f
|
|
* Yi radicals
|
|
* a490 - a4cf
|
|
* Alphabetic Presentation Forms
|
|
* fb00 - fb4f
|
|
* Misc - Combining half Marks
|
|
* fe20 - fe2f
|
|
* Misc - small form variants
|
|
* fe50 - fe6f
|
|
* Misc - Specials
|
|
* fff0 - ffff
|
|
*********************************************************************/
|
|
|
|
static const unsigned cNumSubTables = 9;
|
|
static const unsigned cSubTableSize = 16;
|
|
|
|
static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
|
|
{
|
|
{ // table for X---
|
|
cRangeTableBase+1, //u0xxx
|
|
cRangeTableBase+2, //u1xxx
|
|
cRangeTableBase+3, //u2xxx
|
|
cRangeSetCJK, //u3xxx
|
|
cRangeSetCJK, //u4xxx
|
|
cRangeSetCJK, //u5xxx
|
|
cRangeSetCJK, //u6xxx
|
|
cRangeSetCJK, //u7xxx
|
|
cRangeSetCJK, //u8xxx
|
|
cRangeSetCJK, //u9xxx
|
|
cRangeTableBase+4, //uaxxx
|
|
cRangeKorean, //ubxxx
|
|
cRangeKorean, //ucxxx
|
|
cRangeTableBase+5, //udxxx
|
|
cRangePrivate, //uexxx
|
|
cRangeTableBase+6 //ufxxx
|
|
},
|
|
{ //table for 0X--
|
|
cRangeSetLatin, //u00xx
|
|
cRangeSetLatin, //u01xx
|
|
cRangeSetLatin, //u02xx
|
|
cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
|
|
cRangeCyrillic, //u04xx
|
|
cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
|
|
cRangeArabic, //u06xx
|
|
cRangeTertiaryTable, //u07xx
|
|
cRangeUnassigned, //u08xx
|
|
cRangeTertiaryTable, //u09xx
|
|
cRangeTertiaryTable, //u0axx
|
|
cRangeTertiaryTable, //u0bxx
|
|
cRangeTertiaryTable, //u0cxx
|
|
cRangeTertiaryTable, //u0dxx
|
|
cRangeTertiaryTable, //u0exx
|
|
cRangeTibetan, //u0fxx
|
|
},
|
|
{ //table for 1x--
|
|
cRangeTertiaryTable, //u10xx
|
|
cRangeKorean, //u11xx
|
|
cRangeEthiopic, //u12xx
|
|
cRangeTertiaryTable, //u13xx
|
|
cRangeCanadian, //u14xx
|
|
cRangeCanadian, //u15xx
|
|
cRangeTertiaryTable, //u16xx
|
|
cRangeKhmer, //u17xx
|
|
cRangeMongolian, //u18xx
|
|
cRangeUnassigned, //u19xx
|
|
cRangeUnassigned, //u1axx
|
|
cRangeUnassigned, //u1bxx
|
|
cRangeUnassigned, //u1cxx
|
|
cRangeUnassigned, //u1dxx
|
|
cRangeSetLatin, //u1exx
|
|
cRangeGreek, //u1fxx
|
|
},
|
|
{ //table for 2x--
|
|
cRangeSetLatin, //u20xx
|
|
cRangeSetLatin, //u21xx
|
|
cRangeMathOperators, //u22xx
|
|
cRangeMiscTechnical, //u23xx
|
|
cRangeControlOpticalEnclose, //u24xx
|
|
cRangeBoxBlockGeometrics, //u25xx
|
|
cRangeMiscSymbols, //u26xx
|
|
cRangeDingbats, //u27xx
|
|
cRangeBraillePattern, //u28xx
|
|
cRangeUnassigned, //u29xx
|
|
cRangeUnassigned, //u2axx
|
|
cRangeUnassigned, //u2bxx
|
|
cRangeUnassigned, //u2cxx
|
|
cRangeUnassigned, //u2dxx
|
|
cRangeSetCJK, //u2exx
|
|
cRangeSetCJK, //u2fxx
|
|
},
|
|
{ //table for ax--
|
|
cRangeYi, //ua0xx
|
|
cRangeYi, //ua1xx
|
|
cRangeYi, //ua2xx
|
|
cRangeYi, //ua3xx
|
|
cRangeYi, //ua4xx
|
|
cRangeUnassigned, //ua5xx
|
|
cRangeUnassigned, //ua6xx
|
|
cRangeUnassigned, //ua7xx
|
|
cRangeUnassigned, //ua8xx
|
|
cRangeUnassigned, //ua9xx
|
|
cRangeUnassigned, //uaaxx
|
|
cRangeUnassigned, //uabxx
|
|
cRangeKorean, //uacxx
|
|
cRangeKorean, //uadxx
|
|
cRangeKorean, //uaexx
|
|
cRangeKorean, //uafxx
|
|
},
|
|
{ //table for dx--
|
|
cRangeKorean, //ud0xx
|
|
cRangeKorean, //ud1xx
|
|
cRangeKorean, //ud2xx
|
|
cRangeKorean, //ud3xx
|
|
cRangeKorean, //ud4xx
|
|
cRangeKorean, //ud5xx
|
|
cRangeKorean, //ud6xx
|
|
cRangeKorean, //ud7xx
|
|
cRangeSurrogate, //ud8xx
|
|
cRangeSurrogate, //ud9xx
|
|
cRangeSurrogate, //udaxx
|
|
cRangeSurrogate, //udbxx
|
|
cRangeSurrogate, //udcxx
|
|
cRangeSurrogate, //uddxx
|
|
cRangeSurrogate, //udexx
|
|
cRangeSurrogate, //udfxx
|
|
},
|
|
{ // table for fx--
|
|
cRangePrivate, //uf0xx
|
|
cRangePrivate, //uf1xx
|
|
cRangePrivate, //uf2xx
|
|
cRangePrivate, //uf3xx
|
|
cRangePrivate, //uf4xx
|
|
cRangePrivate, //uf5xx
|
|
cRangePrivate, //uf6xx
|
|
cRangePrivate, //uf7xx
|
|
cRangePrivate, //uf8xx
|
|
cRangeSetCJK, //uf9xx
|
|
cRangeSetCJK, //ufaxx
|
|
cRangeArabic, //ufbxx, includes alphabic presentation form
|
|
cRangeArabic, //ufcxx
|
|
cRangeArabic, //ufdxx
|
|
cRangeArabic, //ufexx, includes Combining half marks,
|
|
// CJK compatibility forms,
|
|
// CJK compatibility forms,
|
|
// small form variants
|
|
cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials
|
|
},
|
|
{ //table for 0x0500 - 0x05ff
|
|
cRangeCyrillic, //u050x
|
|
cRangeCyrillic, //u051x
|
|
cRangeCyrillic, //u052x
|
|
cRangeArmenian, //u053x
|
|
cRangeArmenian, //u054x
|
|
cRangeArmenian, //u055x
|
|
cRangeArmenian, //u056x
|
|
cRangeArmenian, //u057x
|
|
cRangeArmenian, //u058x
|
|
cRangeHebrew, //u059x
|
|
cRangeHebrew, //u05ax
|
|
cRangeHebrew, //u05bx
|
|
cRangeHebrew, //u05cx
|
|
cRangeHebrew, //u05dx
|
|
cRangeHebrew, //u05ex
|
|
cRangeHebrew, //u05fx
|
|
},
|
|
{ //table for 0xff00 - 0xffff
|
|
cRangeSetCJK, //uff0x, fullwidth latin
|
|
cRangeSetCJK, //uff1x, fullwidth latin
|
|
cRangeSetCJK, //uff2x, fullwidth latin
|
|
cRangeSetCJK, //uff3x, fullwidth latin
|
|
cRangeSetCJK, //uff4x, fullwidth latin
|
|
cRangeSetCJK, //uff5x, fullwidth latin
|
|
cRangeSetCJK, //uff6x, halfwidth katakana
|
|
cRangeSetCJK, //uff7x, halfwidth katakana
|
|
cRangeSetCJK, //uff8x, halfwidth katakana
|
|
cRangeSetCJK, //uff9x, halfwidth katakana
|
|
cRangeSetCJK, //uffax, halfwidth hangul jamo
|
|
cRangeSetCJK, //uffbx, halfwidth hangul jamo
|
|
cRangeSetCJK, //uffcx, halfwidth hangul jamo
|
|
cRangeSetCJK, //uffdx, halfwidth hangul jamo
|
|
cRangeSetCJK, //uffex, fullwidth symbols
|
|
cRangeSpecials, //ufffx, Specials
|
|
},
|
|
};
|
|
|
|
// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
|
|
// code points so that the number of entries in the tertiary range
|
|
// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
|
|
// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
|
|
// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
|
|
static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);
|
|
|
|
static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
|
|
{ //table for 0x0700 - 0x1600
|
|
cRangeSyriac, //u070x
|
|
cRangeThaana, //u078x
|
|
cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)
|
|
cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)
|
|
cRangeDevanagari, //u090x
|
|
cRangeBengali, //u098x
|
|
cRangeGurmukhi, //u0a0x
|
|
cRangeGujarati, //u0a8x
|
|
cRangeOriya, //u0b0x
|
|
cRangeTamil, //u0b8x
|
|
cRangeTelugu, //u0c0x
|
|
cRangeKannada, //u0c8x
|
|
cRangeMalayalam, //u0d0x
|
|
cRangeSinhala, //u0d8x
|
|
cRangeThai, //u0e0x
|
|
cRangeLao, //u0e8x
|
|
cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)
|
|
cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)
|
|
cRangeMyanmar, //u100x
|
|
cRangeGeorgian, //u108x
|
|
cRangeKorean, //u110x place holder(resolved in the 2ndary tab.)
|
|
cRangeKorean, //u118x place holder(resolved in the 2ndary tab.)
|
|
cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)
|
|
cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)
|
|
cRangeEthiopic, //u130x
|
|
cRangeCherokee, //u138x
|
|
cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)
|
|
cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)
|
|
cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)
|
|
cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)
|
|
cRangeCanadian, //u160x
|
|
cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic
|
|
};
|
|
|
|
// A two level index is almost enough for locating a range, with the
|
|
// exception of u03xx and u05xx. Since we don't really care about range for
|
|
// combining diacritical marks in our font application, they are
|
|
// not discriminated further. Future adoption of this method for other use
|
|
// should be aware of this limitation. The implementation can be extended if
|
|
// there is such a need.
|
|
// For Indic, Southeast Asian scripts and some other scripts between
|
|
// U+0700 and U+16FF, it's extended to the third level.
|
|
unsigned findCharUnicodeRange(UChar32 ch)
|
|
{
|
|
if (ch >= 0xFFFF)
|
|
return 0;
|
|
|
|
unsigned range;
|
|
|
|
//search the first table
|
|
range = gUnicodeSubrangeTable[0][ch >> 12];
|
|
|
|
if (range < cRangeTableBase)
|
|
// we try to get a specific range
|
|
return range;
|
|
|
|
// otherwise, we have one more table to look at
|
|
range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
|
|
if (range < cRangeTableBase)
|
|
return range;
|
|
if (range < cRangeTertiaryTable)
|
|
return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];
|
|
|
|
// Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
|
|
return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
|
|
}
|
|
|
|
const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
|
|
{
|
|
if (cRangeSpecificItemNum > unicodeRange)
|
|
return gUnicodeRangeToLangGroupTable[unicodeRange];
|
|
return 0;
|
|
}
|
|
|
|
}
|