flutter_flutter/engine/core/css/parser/MediaQueryTokenizer.cpp
Eric Seidel e0fd75b5ab Make absolute and sort all Sky headers
This caused us to lose our gn check certification. :(

Turns out gn check was just ignoring all the header
paths it didn't understand and so gn check passing
for sky wasn't meaning much.  I tried to straighten
out some of the mess in this CL, but its going to take
several more rounds of massaging before gn check
passes again.  On the bright side (almost) all of
our headers are absolute now.  Turns out my script
(attached to the bug) didn't notice ../ includes
but I'll fix that in the next patch.

R=abarth@chromium.org
BUG=435361

Review URL: https://codereview.chromium.org/746023002
2014-11-20 17:42:05 -08:00

528 lines
15 KiB
C++

// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "sky/engine/config.h"
#include "sky/engine/core/css/parser/MediaQueryTokenizer.h"
namespace blink {
#include "core/MediaQueryTokenizerCodepoints.cpp"
}
#include "sky/engine/core/css/parser/MediaQueryInputStream.h"
#include "sky/engine/core/html/parser/HTMLParserIdioms.h"
#include "sky/engine/wtf/unicode/CharacterNames.h"
namespace blink {
// http://dev.w3.org/csswg/css-syntax/#name-start-code-point
static bool isNameStart(UChar c)
{
if (isASCIIAlpha(c))
return true;
if (c == '_')
return true;
return !isASCII(c);
}
// http://dev.w3.org/csswg/css-syntax/#name-code-point
static bool isNameChar(UChar c)
{
return isNameStart(c) || isASCIIDigit(c) || c == '-';
}
// http://dev.w3.org/csswg/css-syntax/#check-if-two-code-points-are-a-valid-escape
static bool twoCharsAreValidEscape(UChar first, UChar second)
{
return ((first == '\\') && (second != '\n') && (second != kEndOfFileMarker));
}
MediaQueryTokenizer::MediaQueryTokenizer(MediaQueryInputStream& inputStream)
: m_input(inputStream)
{
}
void MediaQueryTokenizer::reconsume(UChar c)
{
m_input.pushBack(c);
}
UChar MediaQueryTokenizer::consume()
{
UChar current = m_input.nextInputChar();
m_input.advance();
return current;
}
void MediaQueryTokenizer::consume(unsigned offset)
{
m_input.advance(offset);
}
MediaQueryToken MediaQueryTokenizer::whiteSpace(UChar cc)
{
// CSS Tokenization is currently lossy, but we could record
// the exact whitespace instead of discarding it here.
consumeUntilNonWhitespace();
return MediaQueryToken(WhitespaceToken);
}
static bool popIfBlockMatches(Vector<MediaQueryTokenType>& blockStack, MediaQueryTokenType type)
{
if (!blockStack.isEmpty() && blockStack.last() == type) {
blockStack.removeLast();
return true;
}
return false;
}
MediaQueryToken MediaQueryTokenizer::blockStart(MediaQueryTokenType type)
{
m_blockStack.append(type);
return MediaQueryToken(type, MediaQueryToken::BlockStart);
}
MediaQueryToken MediaQueryTokenizer::blockStart(MediaQueryTokenType blockType, MediaQueryTokenType type, String name)
{
m_blockStack.append(blockType);
return MediaQueryToken(type, name, MediaQueryToken::BlockStart);
}
MediaQueryToken MediaQueryTokenizer::blockEnd(MediaQueryTokenType type, MediaQueryTokenType startType)
{
if (popIfBlockMatches(m_blockStack, startType))
return MediaQueryToken(type, MediaQueryToken::BlockEnd);
return MediaQueryToken(type);
}
MediaQueryToken MediaQueryTokenizer::leftParenthesis(UChar cc)
{
return blockStart(LeftParenthesisToken);
}
MediaQueryToken MediaQueryTokenizer::rightParenthesis(UChar cc)
{
return blockEnd(RightParenthesisToken, LeftParenthesisToken);
}
MediaQueryToken MediaQueryTokenizer::leftBracket(UChar cc)
{
return blockStart(LeftBracketToken);
}
MediaQueryToken MediaQueryTokenizer::rightBracket(UChar cc)
{
return blockEnd(RightBracketToken, LeftBracketToken);
}
MediaQueryToken MediaQueryTokenizer::leftBrace(UChar cc)
{
return blockStart(LeftBraceToken);
}
MediaQueryToken MediaQueryTokenizer::rightBrace(UChar cc)
{
return blockEnd(RightBraceToken, LeftBraceToken);
}
MediaQueryToken MediaQueryTokenizer::plusOrFullStop(UChar cc)
{
if (nextCharsAreNumber(cc)) {
reconsume(cc);
return consumeNumericToken();
}
return MediaQueryToken(DelimiterToken, cc);
}
MediaQueryToken MediaQueryTokenizer::asterisk(UChar cc)
{
return MediaQueryToken(DelimiterToken, cc);
}
MediaQueryToken MediaQueryTokenizer::comma(UChar cc)
{
return MediaQueryToken(CommaToken);
}
MediaQueryToken MediaQueryTokenizer::hyphenMinus(UChar cc)
{
if (nextCharsAreNumber(cc)) {
reconsume(cc);
return consumeNumericToken();
}
if (nextCharsAreIdentifier(cc)) {
reconsume(cc);
return consumeIdentLikeToken();
}
return MediaQueryToken(DelimiterToken, cc);
}
MediaQueryToken MediaQueryTokenizer::solidus(UChar cc)
{
if (consumeIfNext('*')) {
// We're intentionally deviating from the spec here, by creating tokens for CSS comments.
return consumeUntilCommentEndFound()? MediaQueryToken(CommentToken): MediaQueryToken(EOFToken);
}
return MediaQueryToken(DelimiterToken, cc);
}
MediaQueryToken MediaQueryTokenizer::colon(UChar cc)
{
return MediaQueryToken(ColonToken);
}
MediaQueryToken MediaQueryTokenizer::semiColon(UChar cc)
{
return MediaQueryToken(SemicolonToken);
}
MediaQueryToken MediaQueryTokenizer::reverseSolidus(UChar cc)
{
if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
reconsume(cc);
return consumeIdentLikeToken();
}
return MediaQueryToken(DelimiterToken, cc);
}
MediaQueryToken MediaQueryTokenizer::asciiDigit(UChar cc)
{
reconsume(cc);
return consumeNumericToken();
}
MediaQueryToken MediaQueryTokenizer::nameStart(UChar cc)
{
reconsume(cc);
return consumeIdentLikeToken();
}
MediaQueryToken MediaQueryTokenizer::stringStart(UChar cc)
{
return consumeStringTokenUntil(cc);
}
MediaQueryToken MediaQueryTokenizer::endOfFile(UChar cc)
{
return MediaQueryToken(EOFToken);
}
void MediaQueryTokenizer::tokenize(String string, Vector<MediaQueryToken>& outTokens)
{
// According to the spec, we should perform preprocessing here.
// See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
//
// However, we can skip this step since:
// * We're using HTML spaces (which accept \r and \f as a valid white space)
// * Do not count white spaces
// * consumeEscape replaces NULLs for replacement characters
if (string.isEmpty())
return;
MediaQueryInputStream input(string);
MediaQueryTokenizer tokenizer(input);
while (true) {
MediaQueryToken token = tokenizer.nextToken();
outTokens.append(token);
if (token.type() == EOFToken)
return;
}
}
MediaQueryToken MediaQueryTokenizer::nextToken()
{
// Unlike the HTMLTokenizer, the CSS Syntax spec is written
// as a stateless, (fixed-size) look-ahead tokenizer.
// We could move to the stateful model and instead create
// states for all the "next 3 codepoints are X" cases.
// State-machine tokenizers are easier to write to handle
// incremental tokenization of partial sources.
// However, for now we follow the spec exactly.
UChar cc = consume();
CodePoint codePointFunc = 0;
if (isASCII(cc)) {
ASSERT_WITH_SECURITY_IMPLICATION(cc < codePointsNumber);
codePointFunc = codePoints[cc];
} else {
codePointFunc = &MediaQueryTokenizer::nameStart;
}
if (codePointFunc)
return ((this)->*(codePointFunc))(cc);
return MediaQueryToken(DelimiterToken, cc);
}
static int getSign(MediaQueryInputStream& input, unsigned& offset)
{
int sign = 1;
if (input.nextInputChar() == '+') {
++offset;
} else if (input.peek(offset) == '-') {
sign = -1;
++offset;
}
return sign;
}
static unsigned long long getInteger(MediaQueryInputStream& input, unsigned& offset)
{
unsigned intStartPos = offset;
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
unsigned intEndPos = offset;
return input.getUInt(intStartPos, intEndPos);
}
static double getFraction(MediaQueryInputStream& input, unsigned& offset, unsigned& digitsNumber)
{
unsigned fractionStartPos = 0;
unsigned fractionEndPos = 0;
if (input.peek(offset) == '.' && isASCIIDigit(input.peek(++offset))) {
fractionStartPos = offset - 1;
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
fractionEndPos = offset;
}
digitsNumber = fractionEndPos- fractionStartPos;
return input.getDouble(fractionStartPos, fractionEndPos);
}
static unsigned long long getExponent(MediaQueryInputStream& input, unsigned& offset, int& sign)
{
unsigned exponentStartPos = 0;
unsigned exponentEndPos = 0;
if ((input.peek(offset) == 'E' || input.peek(offset) == 'e')) {
int offsetBeforeExponent = offset;
++offset;
if (input.peek(offset) == '+') {
++offset;
} else if (input.peek(offset) =='-') {
sign = -1;
++offset;
}
exponentStartPos = offset;
offset = input.skipWhilePredicate<isASCIIDigit>(offset);
exponentEndPos = offset;
if (exponentEndPos == exponentStartPos)
offset = offsetBeforeExponent;
}
return input.getUInt(exponentStartPos, exponentEndPos);
}
// This method merges the following spec sections for efficiency
// http://www.w3.org/TR/css3-syntax/#consume-a-number
// http://www.w3.org/TR/css3-syntax/#convert-a-string-to-a-number
MediaQueryToken MediaQueryTokenizer::consumeNumber()
{
ASSERT(nextCharsAreNumber());
NumericValueType type = IntegerValueType;
double value = 0;
unsigned offset = 0;
int exponentSign = 1;
unsigned fractionDigits;
int sign = getSign(m_input, offset);
unsigned long long integerPart = getInteger(m_input, offset);
double fractionPart = getFraction(m_input, offset, fractionDigits);
unsigned long long exponentPart = getExponent(m_input, offset, exponentSign);
double exponent = pow(10, (float)exponentSign * (double)exponentPart);
value = (double)sign * ((double)integerPart + fractionPart) * exponent;
m_input.advance(offset);
if (fractionDigits > 0)
type = NumberValueType;
return MediaQueryToken(NumberToken, value, type);
}
// http://www.w3.org/TR/css3-syntax/#consume-a-numeric-token
MediaQueryToken MediaQueryTokenizer::consumeNumericToken()
{
MediaQueryToken token = consumeNumber();
if (nextCharsAreIdentifier())
token.convertToDimensionWithUnit(consumeName());
else if (consumeIfNext('%'))
token.convertToPercentage();
return token;
}
// http://www.w3.org/TR/css3-syntax/#consume-an-ident-like-token
MediaQueryToken MediaQueryTokenizer::consumeIdentLikeToken()
{
String name = consumeName();
if (consumeIfNext('(')) {
return blockStart(LeftParenthesisToken, FunctionToken, name);
}
return MediaQueryToken(IdentToken, name);
}
static bool isNewLine(UChar cc)
{
// We check \r and \f here, since we have no preprocessing stage
return (cc == '\r' || cc == '\n' || cc == '\f');
}
// http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
MediaQueryToken MediaQueryTokenizer::consumeStringTokenUntil(UChar endingCodePoint)
{
StringBuilder output;
while (true) {
UChar cc = consume();
if (cc == endingCodePoint || cc == kEndOfFileMarker) {
// The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF
if (cc == kEndOfFileMarker)
reconsume(cc);
return MediaQueryToken(StringToken, output.toString());
}
if (isNewLine(cc)) {
reconsume(cc);
return MediaQueryToken(BadStringToken);
}
if (cc == '\\') {
if (m_input.nextInputChar() == kEndOfFileMarker)
continue;
if (isNewLine(m_input.nextInputChar()))
consume();
else
output.append(consumeEscape());
} else {
output.append(cc);
}
}
}
void MediaQueryTokenizer::consumeUntilNonWhitespace()
{
// Using HTML space here rather than CSS space since we don't do preprocessing
while (isHTMLSpace<UChar>(m_input.nextInputChar()))
consume();
}
bool MediaQueryTokenizer::consumeUntilCommentEndFound()
{
UChar c = consume();
while (true) {
if (c == kEndOfFileMarker)
return false;
if (c != '*') {
c = consume();
continue;
}
c = consume();
if (c == '/')
break;
}
return true;
}
bool MediaQueryTokenizer::consumeIfNext(UChar character)
{
if (m_input.nextInputChar() == character) {
consume();
return true;
}
return false;
}
// http://www.w3.org/TR/css3-syntax/#consume-a-name
String MediaQueryTokenizer::consumeName()
{
// FIXME: Is this as efficient as it can be?
// The possibility of escape chars mandates a copy AFAICT.
StringBuilder result;
while (true) {
UChar cc = consume();
if (isNameChar(cc)) {
result.append(cc);
continue;
}
if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) {
result.append(consumeEscape());
continue;
}
reconsume(cc);
return result.toString();
}
}
// http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point
UChar MediaQueryTokenizer::consumeEscape()
{
UChar cc = consume();
ASSERT(cc != '\n');
if (isASCIIHexDigit(cc)) {
unsigned consumedHexDigits = 1;
StringBuilder hexChars;
hexChars.append(cc);
while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.nextInputChar())) {
cc = consume();
hexChars.append(cc);
consumedHexDigits++;
};
bool ok = false;
UChar codePoint = hexChars.toString().toUIntStrict(&ok, 16);
if (!ok)
return WTF::Unicode::replacementCharacter;
return codePoint;
}
// Replaces NULLs with replacement characters, since we do not perform preprocessing
if (cc == kEndOfFileMarker)
return WTF::Unicode::replacementCharacter;
return cc;
}
bool MediaQueryTokenizer::nextTwoCharsAreValidEscape()
{
if (m_input.leftChars() < 1)
return false;
return twoCharsAreValidEscape(m_input.nextInputChar(), m_input.peek(1));
}
// http://www.w3.org/TR/css3-syntax/#starts-with-a-number
bool MediaQueryTokenizer::nextCharsAreNumber(UChar first)
{
UChar second = m_input.nextInputChar();
if (isASCIIDigit(first))
return true;
if (first == '+' || first == '-')
return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input.peek(1))));
if (first =='.')
return (isASCIIDigit(second));
return false;
}
bool MediaQueryTokenizer::nextCharsAreNumber()
{
UChar first = consume();
bool areNumber = nextCharsAreNumber(first);
reconsume(first);
return areNumber;
}
// http://www.w3.org/TR/css3-syntax/#would-start-an-identifier
bool MediaQueryTokenizer::nextCharsAreIdentifier(UChar first)
{
UChar second = m_input.nextInputChar();
if (isNameStart(first) || twoCharsAreValidEscape(first, second))
return true;
if (first == '-') {
if (isNameStart(m_input.nextInputChar()))
return true;
return nextTwoCharsAreValidEscape();
}
return false;
}
bool MediaQueryTokenizer::nextCharsAreIdentifier()
{
UChar first = consume();
bool areIdentifier = nextCharsAreIdentifier(first);
reconsume(first);
return areIdentifier;
}
} // namespace blink