mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
This CL also removes the HTML parser and the HTML import system. TBR=eseidel@google.com Review URL: https://codereview.chromium.org/1215103007.
528 lines
15 KiB
C++
528 lines
15 KiB
C++
// Copyright 2014 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#include "sky/engine/core/css/parser/MediaQueryTokenizer.h"
|
|
|
|
namespace blink {
|
|
#include "core/MediaQueryTokenizerCodepoints.cpp"
|
|
}
|
|
|
|
#include "sky/engine/core/css/parser/MediaQueryInputStream.h"
|
|
#include "sky/engine/core/html/parser/HTMLParserIdioms.h"
|
|
#include "sky/engine/wtf/text/StringBuilder.h"
|
|
#include "sky/engine/wtf/unicode/CharacterNames.h"
|
|
|
|
namespace blink {
|
|
|
|
// http://dev.w3.org/csswg/css-syntax/#name-start-code-point
|
|
static bool isNameStart(UChar c)
|
|
{
|
|
if (isASCIIAlpha(c))
|
|
return true;
|
|
if (c == '_')
|
|
return true;
|
|
return !isASCII(c);
|
|
}
|
|
|
|
// http://dev.w3.org/csswg/css-syntax/#name-code-point
|
|
static bool isNameChar(UChar c)
|
|
{
|
|
return isNameStart(c) || isASCIIDigit(c) || c == '-';
|
|
}
|
|
|
|
// http://dev.w3.org/csswg/css-syntax/#check-if-two-code-points-are-a-valid-escape
|
|
static bool twoCharsAreValidEscape(UChar first, UChar second)
|
|
{
|
|
return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker));
|
|
}
|
|
|
|
MediaQueryTokenizer::MediaQueryTokenizer(MediaQueryInputStream& inputStream)
|
|
: m_input(inputStream)
|
|
{
|
|
}
|
|
|
|
void MediaQueryTokenizer::reconsume(UChar c)
|
|
{
|
|
m_input.pushBack(c);
|
|
}
|
|
|
|
UChar MediaQueryTokenizer::consume()
|
|
{
|
|
UChar current = m_input.nextInputChar();
|
|
m_input.advance();
|
|
return current;
|
|
}
|
|
|
|
void MediaQueryTokenizer::consume(unsigned offset)
|
|
{
|
|
m_input.advance(offset);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
|
|
{
|
|
// CSS Tokenization is currently lossy, but we could record
|
|
// the exact whitespace instead of discarding it here.
|
|
consumeUntilNonWhitespace();
|
|
return MediaQueryToken(WhitespaceToken);
|
|
}
|
|
|
|
static bool popIfBlockMatches(Vector<MediaQueryTokenType>& blockStack, MediaQueryTokenType type)
|
|
{
|
|
if (!blockStack.isEmpty() && blockStack.last() == type) {
|
|
blockStack.removeLast();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::blockStart(MediaQueryTokenType type)
|
|
{
|
|
m_blockStack.append(type);
|
|
return MediaQueryToken(type, MediaQueryToken::BlockStart);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::blockStart(MediaQueryTokenType blockType, MediaQueryTokenType type, String name)
|
|
{
|
|
m_blockStack.append(blockType);
|
|
return MediaQueryToken(type, name, MediaQueryToken::BlockStart);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::blockEnd(MediaQueryTokenType type, MediaQueryTokenType startType)
|
|
{
|
|
if (popIfBlockMatches(m_blockStack, startType))
|
|
return MediaQueryToken(type, MediaQueryToken::BlockEnd);
|
|
return MediaQueryToken(type);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc)
|
|
{
|
|
return blockStart(LeftParenthesisToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc)
|
|
{
|
|
return blockEnd(RightParenthesisToken, LeftParenthesisToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::leftBracket(UChar cc)
|
|
{
|
|
return blockStart(LeftBracketToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::rightBracket(UChar cc)
|
|
{
|
|
return blockEnd(RightBracketToken, LeftBracketToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::leftBrace(UChar cc)
|
|
{
|
|
return blockStart(LeftBraceToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::rightBrace(UChar cc)
|
|
{
|
|
return blockEnd(RightBraceToken, LeftBraceToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
|
|
{
|
|
if (nextCharsAreNumber(cc)) {
|
|
reconsume(cc);
|
|
return consumeNumericToken();
|
|
}
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::asterisk(UChar cc)
|
|
{
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
|
|
{
|
|
return MediaQueryToken(CommaToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
|
|
{
|
|
if (nextCharsAreNumber(cc)) {
|
|
reconsume(cc);
|
|
return consumeNumericToken();
|
|
}
|
|
if (nextCharsAreIdentifier(cc)) {
|
|
reconsume(cc);
|
|
return consumeIdentLikeToken();
|
|
}
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
|
|
{
|
|
if (consumeIfNext('*')) {
|
|
// We're intentionally deviating from the spec here, by creating tokens for CSS comments.
|
|
return consumeUntilCommentEndFound()? MediaQueryToken(CommentToken): MediaQueryToken(EOFToken);
|
|
}
|
|
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
|
|
{
|
|
return MediaQueryToken(ColonToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
|
|
{
|
|
return MediaQueryToken(SemicolonToken);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
|
|
{
|
|
if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
|
|
reconsume(cc);
|
|
return consumeIdentLikeToken();
|
|
}
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
|
|
{
|
|
reconsume(cc);
|
|
return consumeNumericToken();
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
|
|
{
|
|
reconsume(cc);
|
|
return consumeIdentLikeToken();
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::stringStart(UChar cc)
|
|
{
|
|
return consumeStringTokenUntil(cc);
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
|
|
{
|
|
return MediaQueryToken(EOFToken);
|
|
}
|
|
|
|
void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTokens)
|
|
{
|
|
// According to the spec, we should perform preprocessing here.
|
|
// See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
|
|
//
|
|
// However, we can skip this step since:
|
|
// * We're using HTML spaces (which accept \r and \f as a valid white space)
|
|
// * Do not count white spaces
|
|
// * consumeEscape replaces NULLs for replacement characters
|
|
|
|
if (string.isEmpty())
|
|
return;
|
|
|
|
MediaQueryInputStream input(string);
|
|
MediaQueryTokenizer tokenizer(input);
|
|
while (true) {
|
|
MediaQueryToken token = tokenizer.nextToken();
|
|
outTokens.append(token);
|
|
if (token.type() == EOFToken)
|
|
return;
|
|
}
|
|
}
|
|
|
|
MediaQueryToken MediaQueryTokenizer::nextToken()
|
|
{
|
|
// Unlike the HTMLTokenizer, the CSS Syntax spec is written
|
|
// as a stateless, (fixed-size) look-ahead tokenizer.
|
|
// We could move to the stateful model and instead create
|
|
// states for all the "next 3 codepoints are X" cases.
|
|
// State-machine tokenizers are easier to write to handle
|
|
// incremental tokenization of partial sources.
|
|
// However, for now we follow the spec exactly.
|
|
UChar cc = consume();
|
|
CodePoint codePointFunc = 0;
|
|
|
|
if (isASCII(cc)) {
|
|
ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);
|
|
codePointFunc = codePoints[cc];
|
|
} else {
|
|
codePointFunc = &MediaQueryTokenizer::nameStart;
|
|
}
|
|
|
|
if (codePointFunc)
|
|
return ((this)->*(codePointFunc))(cc);
|
|
return MediaQueryToken(DelimiterToken, cc);
|
|
}
|
|
|
|
static int getSign(MediaQueryInputStream& input, unsigned& offset)
|
|
{
|
|
int sign = 1;
|
|
if (input.nextInputChar() == '+') {
|
|
++offset;
|
|
} else if (input.peek(offset) == '-') {
|
|
sign = -1;
|
|
++offset;
|
|
}
|
|
return sign;
|
|
}
|
|
|
|
static unsigned long long getInteger(MediaQueryInputStream& input, unsigned& offset)
|
|
{
|
|
unsigned intStartPos = offset;
|
|
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
|
|
unsigned intEndPos = offset;
|
|
return input.getUInt(intStartPos, intEndPos);
|
|
}
|
|
|
|
static double getFraction(MediaQueryInputStream& input, unsigned& offset, unsigned& digitsNumber)
|
|
{
|
|
unsigned fractionStartPos = 0;
|
|
unsigned fractionEndPos = 0;
|
|
if (input.peek(offset) == '.' && isASCIIDigit(input.peek(++offset))) {
|
|
fractionStartPos = offset - 1;
|
|
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
|
|
fractionEndPos = offset;
|
|
}
|
|
digitsNumber = fractionEndPos- fractionStartPos;
|
|
return input.getDouble(fractionStartPos, fractionEndPos);
|
|
}
|
|
|
|
static unsigned long long getExponent(MediaQueryInputStream& input, unsigned& offset, int& sign)
|
|
{
|
|
unsigned exponentStartPos = 0;
|
|
unsigned exponentEndPos = 0;
|
|
if ((input.peek(offset) == 'E' || input.peek(offset) == 'e')) {
|
|
int offsetBeforeExponent = offset;
|
|
++offset;
|
|
if (input.peek(offset) == '+') {
|
|
++offset;
|
|
} else if (input.peek(offset) =='-') {
|
|
sign = -1;
|
|
++offset;
|
|
}
|
|
exponentStartPos = offset;
|
|
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
|
|
exponentEndPos = offset;
|
|
if (exponentEndPos == exponentStartPos)
|
|
offset = offsetBeforeExponent;
|
|
}
|
|
return input.getUInt(exponentStartPos, exponentEndPos);
|
|
}
|
|
|
|
// This method merges the following spec sections for efficiency
|
|
// http://www.w3.org/TR/css3-syntax/#consume-a-number
|
|
// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
|
|
MediaQueryToken MediaQueryTokenizer::consumeNumber()
|
|
{
|
|
ASSERT(nextCharsAreNumber());
|
|
NumericValueType type = IntegerValueType;
|
|
double value = 0;
|
|
unsigned offset = 0;
|
|
int exponentSign = 1;
|
|
unsigned fractionDigits;
|
|
int sign = getSign(m_input, offset);
|
|
unsigned long long integerPart = getInteger(m_input, offset);
|
|
double fractionPart = getFraction(m_input, offset, fractionDigits);
|
|
unsigned long long exponentPart = getExponent(m_input, offset, exponentSign);
|
|
double exponent = pow(10, (float)exponentSign * (double)exponentPart);
|
|
value = (double)sign * ((double)integerPart + fractionPart) * exponent;
|
|
|
|
m_input.advance(offset);
|
|
if (fractionDigits > 0)
|
|
type = NumberValueType;
|
|
|
|
return MediaQueryToken(NumberToken, value, type);
|
|
}
|
|
|
|
// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
|
|
MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
|
|
{
|
|
MediaQueryToken token = consumeNumber();
|
|
if (nextCharsAreIdentifier())
|
|
token.convertToDimensionWithUnit(consumeName());
|
|
else if (consumeIfNext('%'))
|
|
token.convertToPercentage();
|
|
return token;
|
|
}
|
|
|
|
// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
|
|
MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
|
|
{
|
|
String name = consumeName();
|
|
if (consumeIfNext('(')) {
|
|
return blockStart(LeftParenthesisToken, FunctionToken, name);
|
|
}
|
|
return MediaQueryToken(IdentToken, name);
|
|
}
|
|
|
|
static bool isNewLine(UChar cc)
|
|
{
|
|
// We check \r and \f here, since we have no preprocessing stage
|
|
return (cc == '\r' || cc == '\n' || cc == '\f');
|
|
}
|
|
|
|
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
|
|
MediaQueryToken MediaQueryTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
|
|
{
|
|
StringBuilder output;
|
|
while (true) {
|
|
UChar cc = consume();
|
|
if (cc == endingCodePoint || cc == kEndOfFileMarker) {
|
|
// The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF
|
|
if (cc == kEndOfFileMarker)
|
|
reconsume(cc);
|
|
return MediaQueryToken(StringToken, output.toString());
|
|
}
|
|
if (isNewLine(cc)) {
|
|
reconsume(cc);
|
|
return MediaQueryToken(BadStringToken);
|
|
}
|
|
if (cc == '\\') {
|
|
if (m_input.nextInputChar() == kEndOfFileMarker)
|
|
continue;
|
|
if (isNewLine(m_input.nextInputChar()))
|
|
consume();
|
|
else
|
|
output.append(consumeEscape());
|
|
} else {
|
|
output.append(cc);
|
|
}
|
|
}
|
|
}
|
|
|
|
void MediaQueryTokenizer::consumeUntilNonWhitespace()
|
|
{
|
|
// Using HTML space here rather than CSS space since we don't do preprocessing
|
|
while (isHTMLSpace<UChar>(m_input.nextInputChar()))
|
|
consume();
|
|
}
|
|
|
|
bool MediaQueryTokenizer::consumeUntilCommentEndFound()
|
|
{
|
|
UChar c = consume();
|
|
while (true) {
|
|
if (c == kEndOfFileMarker)
|
|
return false;
|
|
if (c != '*') {
|
|
c = consume();
|
|
continue;
|
|
}
|
|
c = consume();
|
|
if (c == '/')
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool MediaQueryTokenizer::consumeIfNext(UChar character)
|
|
{
|
|
if (m_input.nextInputChar() == character) {
|
|
consume();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// http://www.w3.org/TR/css3-syntax/#consume-a-name
|
|
String MediaQueryTokenizer::consumeName()
|
|
{
|
|
// FIXME: Is this as efficient as it can be?
|
|
// The possibility of escape chars mandates a copy AFAICT.
|
|
StringBuilder result;
|
|
while (true) {
|
|
UChar cc = consume();
|
|
if (isNameChar(cc)) {
|
|
result.append(cc);
|
|
continue;
|
|
}
|
|
if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
|
|
result.append(consumeEscape());
|
|
continue;
|
|
}
|
|
reconsume(cc);
|
|
return result.toString();
|
|
}
|
|
}
|
|
|
|
// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point
|
|
UChar MediaQueryTokenizer::consumeEscape()
|
|
{
|
|
UChar cc = consume();
|
|
ASSERT(cc != '\n');
|
|
if (isASCIIHexDigit(cc)) {
|
|
unsigned consumedHexDigits = 1;
|
|
StringBuilder hexChars;
|
|
hexChars.append(cc);
|
|
while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) {
|
|
cc = consume();
|
|
hexChars.append(cc);
|
|
consumedHexDigits++;
|
|
};
|
|
bool ok = false;
|
|
UChar codePoint = hexChars.toString().toUIntStrict(&ok, 16);
|
|
if (!ok)
|
|
return WTF::Unicode::replacementCharacter;
|
|
return codePoint;
|
|
}
|
|
|
|
// Replaces NULLs with replacement characters, since we do not perform preprocessing
|
|
if (cc == kEndOfFileMarker)
|
|
return WTF::Unicode::replacementCharacter;
|
|
return cc;
|
|
}
|
|
|
|
bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
|
|
{
|
|
if (m_input.leftChars() < 1)
|
|
return false;
|
|
return twoCharsAreValidEscape(m_input.nextInputChar(), m_input.peek(1));
|
|
}
|
|
|
|
// http://www.w3.org/TR/css3-syntax/#starts-with-a-number
|
|
bool MediaQueryTokenizer::nextCharsAreNumber(UChar first)
|
|
{
|
|
UChar second = m_input.nextInputChar();
|
|
if (isASCIIDigit(first))
|
|
return true;
|
|
if (first == '+' || first == '-')
|
|
return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input.peek(1))));
|
|
if (first =='.')
|
|
return (isASCIIDigit(second));
|
|
return false;
|
|
}
|
|
|
|
bool MediaQueryTokenizer::nextCharsAreNumber()
|
|
{
|
|
UChar first = consume();
|
|
bool areNumber = nextCharsAreNumber(first);
|
|
reconsume(first);
|
|
return areNumber;
|
|
}
|
|
|
|
// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
|
|
bool MediaQueryTokenizer::nextCharsAreIdentifier(UChar first)
|
|
{
|
|
UChar second = m_input.nextInputChar();
|
|
if (isNameStart(first) || twoCharsAreValidEscape(first, second))
|
|
return true;
|
|
|
|
if (first == '-') {
|
|
if (isNameStart(m_input.nextInputChar()))
|
|
return true;
|
|
return nextTwoCharsAreValidEscape();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool MediaQueryTokenizer::nextCharsAreIdentifier()
|
|
{
|
|
UChar first = consume();
|
|
bool areIdentifier = nextCharsAreIdentifier(first);
|
|
reconsume(first);
|
|
return areIdentifier;
|
|
}
|
|
|
|
} // namespace blink
|