mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
This caused us to lose our gn check certification. :( Turns out gn check was just ignoring all the header paths it didn't understand and so gn check passing for sky wasn't meaning much. I tried to straighten out some of the mess in this CL, but its going to take several more rounds of massaging before gn check passes again. On the bright side (almost) all of our headers are absolute now. Turns out my script (attached to the bug) didn't notice ../ includes but I'll fix that in the next patch. R=abarth@chromium.org BUG=435361 Review URL: https://codereview.chromium.org/746023002
416 lines
15 KiB
C++
416 lines
15 KiB
C++
/*
|
|
* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
|
|
* Copyright (C) 2005 Alexey Proskuryakov.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "sky/engine/config.h"
|
|
#include "sky/engine/platform/text/UnicodeUtilities.h"
|
|
|
|
#include <unicode/unorm.h>
|
|
#include "sky/engine/wtf/text/StringBuffer.h"
|
|
#include "sky/engine/wtf/unicode/CharacterNames.h"
|
|
|
|
using namespace WTF::Unicode;
|
|
|
|
namespace blink {
|
|
|
|
enum VoicedSoundMarkType {
|
|
NoVoicedSoundMark,
|
|
VoicedSoundMark,
|
|
SemiVoicedSoundMark
|
|
};
|
|
|
|
template <typename CharType>
|
|
static inline CharType foldQuoteMarkOrSoftHyphen(CharType c)
|
|
{
|
|
switch (static_cast<UChar>(c)) {
|
|
case hebrewPunctuationGershayim:
|
|
case leftDoubleQuotationMark:
|
|
case rightDoubleQuotationMark:
|
|
return '"';
|
|
case hebrewPunctuationGeresh:
|
|
case leftSingleQuotationMark:
|
|
case rightSingleQuotationMark:
|
|
return '\'';
|
|
case softHyphen:
|
|
// Replace soft hyphen with an ignorable character so that their presence or absence will
|
|
// not affect string comparison.
|
|
return 0;
|
|
default:
|
|
return c;
|
|
}
|
|
}
|
|
|
|
void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length)
|
|
{
|
|
for (size_t i = 0; i < length; ++i)
|
|
data[i] = foldQuoteMarkOrSoftHyphen(data[i]);
|
|
}
|
|
|
|
void foldQuoteMarksAndSoftHyphens(String& s)
|
|
{
|
|
s.replace(hebrewPunctuationGeresh, '\'');
|
|
s.replace(hebrewPunctuationGershayim, '"');
|
|
s.replace(leftDoubleQuotationMark, '"');
|
|
s.replace(leftSingleQuotationMark, '\'');
|
|
s.replace(rightDoubleQuotationMark, '"');
|
|
s.replace(rightSingleQuotationMark, '\'');
|
|
// Replace soft hyphen with an ignorable character so that their presence or absence will
|
|
// not affect string comparison.
|
|
s.replace(softHyphen, 0);
|
|
}
|
|
|
|
static bool isNonLatin1Separator(UChar32 character)
|
|
{
|
|
ASSERT_ARG(character, character >= 256);
|
|
|
|
return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
|
|
}
|
|
|
|
bool isSeparator(UChar32 character)
|
|
{
|
|
static const bool latin1SeparatorTable[256] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ?
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // `
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
if (character < 256)
|
|
return latin1SeparatorTable[character];
|
|
|
|
return isNonLatin1Separator(character);
|
|
}
|
|
|
|
// ICU's search ignores the distinction between small kana letters and ones
|
|
// that are not small, and also characters that differ only in the voicing
|
|
// marks when considering only primary collation strength differences.
|
|
// This is not helpful for end users, since these differences make words
|
|
// distinct, so for our purposes we need these to be considered.
|
|
// The Unicode folks do not think the collation algorithm should be
|
|
// changed. To work around this, we would like to tailor the ICU searcher,
|
|
// but we can't get that to work yet. So instead, we check for cases where
|
|
// these differences occur, and skip those matches.
|
|
|
|
// We refer to the above technique as the "kana workaround". The next few
|
|
// functions are helper functinos for the kana workaround.
|
|
|
|
bool isKanaLetter(UChar character)
|
|
{
|
|
// Hiragana letters.
|
|
if (character >= 0x3041 && character <= 0x3096)
|
|
return true;
|
|
|
|
// Katakana letters.
|
|
if (character >= 0x30A1 && character <= 0x30FA)
|
|
return true;
|
|
if (character >= 0x31F0 && character <= 0x31FF)
|
|
return true;
|
|
|
|
// Halfwidth katakana letters.
|
|
if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isSmallKanaLetter(UChar character)
|
|
{
|
|
ASSERT(isKanaLetter(character));
|
|
|
|
switch (character) {
|
|
case 0x3041: // HIRAGANA LETTER SMALL A
|
|
case 0x3043: // HIRAGANA LETTER SMALL I
|
|
case 0x3045: // HIRAGANA LETTER SMALL U
|
|
case 0x3047: // HIRAGANA LETTER SMALL E
|
|
case 0x3049: // HIRAGANA LETTER SMALL O
|
|
case 0x3063: // HIRAGANA LETTER SMALL TU
|
|
case 0x3083: // HIRAGANA LETTER SMALL YA
|
|
case 0x3085: // HIRAGANA LETTER SMALL YU
|
|
case 0x3087: // HIRAGANA LETTER SMALL YO
|
|
case 0x308E: // HIRAGANA LETTER SMALL WA
|
|
case 0x3095: // HIRAGANA LETTER SMALL KA
|
|
case 0x3096: // HIRAGANA LETTER SMALL KE
|
|
case 0x30A1: // KATAKANA LETTER SMALL A
|
|
case 0x30A3: // KATAKANA LETTER SMALL I
|
|
case 0x30A5: // KATAKANA LETTER SMALL U
|
|
case 0x30A7: // KATAKANA LETTER SMALL E
|
|
case 0x30A9: // KATAKANA LETTER SMALL O
|
|
case 0x30C3: // KATAKANA LETTER SMALL TU
|
|
case 0x30E3: // KATAKANA LETTER SMALL YA
|
|
case 0x30E5: // KATAKANA LETTER SMALL YU
|
|
case 0x30E7: // KATAKANA LETTER SMALL YO
|
|
case 0x30EE: // KATAKANA LETTER SMALL WA
|
|
case 0x30F5: // KATAKANA LETTER SMALL KA
|
|
case 0x30F6: // KATAKANA LETTER SMALL KE
|
|
case 0x31F0: // KATAKANA LETTER SMALL KU
|
|
case 0x31F1: // KATAKANA LETTER SMALL SI
|
|
case 0x31F2: // KATAKANA LETTER SMALL SU
|
|
case 0x31F3: // KATAKANA LETTER SMALL TO
|
|
case 0x31F4: // KATAKANA LETTER SMALL NU
|
|
case 0x31F5: // KATAKANA LETTER SMALL HA
|
|
case 0x31F6: // KATAKANA LETTER SMALL HI
|
|
case 0x31F7: // KATAKANA LETTER SMALL HU
|
|
case 0x31F8: // KATAKANA LETTER SMALL HE
|
|
case 0x31F9: // KATAKANA LETTER SMALL HO
|
|
case 0x31FA: // KATAKANA LETTER SMALL MU
|
|
case 0x31FB: // KATAKANA LETTER SMALL RA
|
|
case 0x31FC: // KATAKANA LETTER SMALL RI
|
|
case 0x31FD: // KATAKANA LETTER SMALL RU
|
|
case 0x31FE: // KATAKANA LETTER SMALL RE
|
|
case 0x31FF: // KATAKANA LETTER SMALL RO
|
|
case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
|
|
case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
|
|
case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
|
|
case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
|
|
case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
|
|
case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
|
|
case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
|
|
case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
|
|
case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
|
|
{
|
|
ASSERT(isKanaLetter(character));
|
|
|
|
switch (character) {
|
|
case 0x304C: // HIRAGANA LETTER GA
|
|
case 0x304E: // HIRAGANA LETTER GI
|
|
case 0x3050: // HIRAGANA LETTER GU
|
|
case 0x3052: // HIRAGANA LETTER GE
|
|
case 0x3054: // HIRAGANA LETTER GO
|
|
case 0x3056: // HIRAGANA LETTER ZA
|
|
case 0x3058: // HIRAGANA LETTER ZI
|
|
case 0x305A: // HIRAGANA LETTER ZU
|
|
case 0x305C: // HIRAGANA LETTER ZE
|
|
case 0x305E: // HIRAGANA LETTER ZO
|
|
case 0x3060: // HIRAGANA LETTER DA
|
|
case 0x3062: // HIRAGANA LETTER DI
|
|
case 0x3065: // HIRAGANA LETTER DU
|
|
case 0x3067: // HIRAGANA LETTER DE
|
|
case 0x3069: // HIRAGANA LETTER DO
|
|
case 0x3070: // HIRAGANA LETTER BA
|
|
case 0x3073: // HIRAGANA LETTER BI
|
|
case 0x3076: // HIRAGANA LETTER BU
|
|
case 0x3079: // HIRAGANA LETTER BE
|
|
case 0x307C: // HIRAGANA LETTER BO
|
|
case 0x3094: // HIRAGANA LETTER VU
|
|
case 0x30AC: // KATAKANA LETTER GA
|
|
case 0x30AE: // KATAKANA LETTER GI
|
|
case 0x30B0: // KATAKANA LETTER GU
|
|
case 0x30B2: // KATAKANA LETTER GE
|
|
case 0x30B4: // KATAKANA LETTER GO
|
|
case 0x30B6: // KATAKANA LETTER ZA
|
|
case 0x30B8: // KATAKANA LETTER ZI
|
|
case 0x30BA: // KATAKANA LETTER ZU
|
|
case 0x30BC: // KATAKANA LETTER ZE
|
|
case 0x30BE: // KATAKANA LETTER ZO
|
|
case 0x30C0: // KATAKANA LETTER DA
|
|
case 0x30C2: // KATAKANA LETTER DI
|
|
case 0x30C5: // KATAKANA LETTER DU
|
|
case 0x30C7: // KATAKANA LETTER DE
|
|
case 0x30C9: // KATAKANA LETTER DO
|
|
case 0x30D0: // KATAKANA LETTER BA
|
|
case 0x30D3: // KATAKANA LETTER BI
|
|
case 0x30D6: // KATAKANA LETTER BU
|
|
case 0x30D9: // KATAKANA LETTER BE
|
|
case 0x30DC: // KATAKANA LETTER BO
|
|
case 0x30F4: // KATAKANA LETTER VU
|
|
case 0x30F7: // KATAKANA LETTER VA
|
|
case 0x30F8: // KATAKANA LETTER VI
|
|
case 0x30F9: // KATAKANA LETTER VE
|
|
case 0x30FA: // KATAKANA LETTER VO
|
|
return VoicedSoundMark;
|
|
case 0x3071: // HIRAGANA LETTER PA
|
|
case 0x3074: // HIRAGANA LETTER PI
|
|
case 0x3077: // HIRAGANA LETTER PU
|
|
case 0x307A: // HIRAGANA LETTER PE
|
|
case 0x307D: // HIRAGANA LETTER PO
|
|
case 0x30D1: // KATAKANA LETTER PA
|
|
case 0x30D4: // KATAKANA LETTER PI
|
|
case 0x30D7: // KATAKANA LETTER PU
|
|
case 0x30DA: // KATAKANA LETTER PE
|
|
case 0x30DD: // KATAKANA LETTER PO
|
|
return SemiVoicedSoundMark;
|
|
}
|
|
return NoVoicedSoundMark;
|
|
}
|
|
|
|
static inline bool isCombiningVoicedSoundMark(UChar character)
|
|
{
|
|
switch (character) {
|
|
case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
|
|
case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool containsKanaLetters(const String& pattern)
|
|
{
|
|
const unsigned length = pattern.length();
|
|
for (unsigned i = 0; i < length; ++i) {
|
|
if (isKanaLetter(pattern[i]))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void normalizeCharactersIntoNFCForm(const UChar* characters, unsigned length, Vector<UChar>& buffer)
|
|
{
|
|
ASSERT(length);
|
|
|
|
buffer.resize(length);
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
|
|
ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
|
|
ASSERT(bufferSize);
|
|
|
|
buffer.resize(bufferSize);
|
|
|
|
if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
|
|
return;
|
|
|
|
status = U_ZERO_ERROR;
|
|
unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
|
|
ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
|
|
}
|
|
|
|
// This function returns kNotFound if |first| and |second| contain different Kana letters.
|
|
// If |first| and |second| contain the same Kana letter
|
|
// then function returns offset in characters from |first|.
|
|
// Pointers to both strings increase simultaneously so so it is possible to use one offset value.
|
|
static inline size_t compareKanaLetterAndComposedVoicedSoundMarks(const UChar* first, const UChar* firstEnd, const UChar* second, const UChar* secondEnd)
|
|
{
|
|
const UChar* start = first;
|
|
// Check for differences in the kana letter character itself.
|
|
if (isSmallKanaLetter(*first) != isSmallKanaLetter(*second))
|
|
return kNotFound;
|
|
if (composedVoicedSoundMark(*first) != composedVoicedSoundMark(*second))
|
|
return kNotFound;
|
|
++first;
|
|
++second;
|
|
|
|
// Check for differences in combining voiced sound marks found after the letter.
|
|
while (true) {
|
|
const bool secondIsNotSoundMark = second == secondEnd || !isCombiningVoicedSoundMark(*second);
|
|
if (first == firstEnd || !isCombiningVoicedSoundMark(*first)) {
|
|
return secondIsNotSoundMark ? first - start : kNotFound;
|
|
}
|
|
if (secondIsNotSoundMark)
|
|
return kNotFound;
|
|
if (*first != *second)
|
|
return kNotFound;
|
|
++first;
|
|
++second;
|
|
}
|
|
}
|
|
|
|
bool checkOnlyKanaLettersInStrings(const UChar* firstData, unsigned firstLength, const UChar* secondData, unsigned secondLength)
|
|
{
|
|
const UChar* a = firstData;
|
|
const UChar* aEnd = firstData + firstLength;
|
|
|
|
const UChar* b = secondData;
|
|
const UChar* bEnd = secondData + secondLength;
|
|
while (true) {
|
|
// Skip runs of non-kana-letter characters. This is necessary so we can
|
|
// correctly handle strings where the |firstData| and |secondData| have different-length
|
|
// runs of characters that match, while still double checking the correctness
|
|
// of matches of kana letters with other kana letters.
|
|
while (a != aEnd && !isKanaLetter(*a))
|
|
++a;
|
|
while (b != bEnd && !isKanaLetter(*b))
|
|
++b;
|
|
|
|
// If we reached the end of either the target or the match, we should have
|
|
// reached the end of both; both should have the same number of kana letters.
|
|
if (a == aEnd || b == bEnd) {
|
|
return a == aEnd && b == bEnd;
|
|
}
|
|
|
|
// Check that single Kana letters in |a| and |b| are the same.
|
|
const size_t offset = compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);
|
|
if (offset == kNotFound)
|
|
return false;
|
|
|
|
// Update values of |a| and |b| after comparing.
|
|
a += offset;
|
|
b += offset;
|
|
}
|
|
}
|
|
|
|
bool checkKanaStringsEqual(const UChar* firstData, unsigned firstLength, const UChar* secondData, unsigned secondLength)
|
|
{
|
|
const UChar* a = firstData;
|
|
const UChar* aEnd = firstData + firstLength;
|
|
|
|
const UChar* b = secondData;
|
|
const UChar* bEnd = secondData + secondLength;
|
|
while (true) {
|
|
// Check for non-kana-letter characters.
|
|
while (a != aEnd && !isKanaLetter(*a) && b != bEnd && !isKanaLetter(*b)) {
|
|
if (*a++ != *b++)
|
|
return false;
|
|
}
|
|
|
|
// If we reached the end of either the target or the match, we should have
|
|
// reached the end of both; both should have the same number of kana letters.
|
|
if (a == aEnd || b == bEnd) {
|
|
return a == aEnd && b == bEnd;
|
|
}
|
|
|
|
if (isKanaLetter(*a) != isKanaLetter(*b))
|
|
return false;
|
|
|
|
// Check that single Kana letters in |a| and |b| are the same.
|
|
const size_t offset = compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd);
|
|
if (offset == kNotFound)
|
|
return false;
|
|
|
|
// Update values of |a| and |b| after comparing.
|
|
a += offset;
|
|
b += offset;
|
|
}
|
|
}
|
|
|
|
}
|