mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
This CL is a rough pass over the HTMLTokenizer to align it with parsing.md. We'll need to do another pass more carefully in the future, but this CL gets us roughly in the right ballpark. We're not handling EOF properly. The parsing.md spec doesn't push the EOF though the parser, which breaks our current way of handling EOF. We do ok if we get EOF in the DataState, and that's enough to pass the tests for now. Also, update camel-case.sky to reflect the fact that the parser doesn't lower-case tag names anymore. R=eseidel@chromium.org Review URL: https://codereview.chromium.org/678263002
107 lines
4.5 KiB
C++
107 lines
4.5 KiB
C++
/*
|
|
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
|
|
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
|
|
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef MarkupTokenizerInlines_h
|
|
#define MarkupTokenizerInlines_h
|
|
|
|
#include "platform/text/SegmentedString.h"
|
|
|
|
namespace blink {
|
|
|
|
inline bool isTokenizerWhitespace(UChar cc)
|
|
{
|
|
return cc == ' ' || cc == '\x0A';
|
|
}
|
|
|
|
inline bool isTokenizerTagName(UChar cc)
|
|
{
|
|
if (cc >= 'a' && cc <= 'z')
|
|
return true;
|
|
if (cc >= 'A' && cc <= 'Z')
|
|
return true;
|
|
if (cc >= '0' && cc <= '9')
|
|
return true;
|
|
return cc == '-' || cc == '_' || cc == '.';
|
|
}
|
|
|
|
inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters)
|
|
{
|
|
while (*expectedCharacters)
|
|
source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
|
|
}
|
|
|
|
inline void advanceStringAndASSERT(SegmentedString& source, const char* expectedCharacters)
|
|
{
|
|
while (*expectedCharacters)
|
|
source.advanceAndASSERT(*expectedCharacters++);
|
|
}
|
|
|
|
#if COMPILER(MSVC)
|
|
// We need to disable the "unreachable code" warning because we want to assert
|
|
// that some code points aren't reached in the state machine.
|
|
#pragma warning(disable: 4702)
|
|
#endif
|
|
|
|
#define BEGIN_STATE(prefix, stateName) case prefix::stateName: stateName:
|
|
#define END_STATE() ASSERT_NOT_REACHED(); break;
|
|
|
|
// We use this macro when the HTML5 spec says "reconsume the current input
|
|
// character in the <mumble> state."
|
|
#define RECONSUME_IN(prefix, stateName) \
|
|
do { \
|
|
m_state = prefix::stateName; \
|
|
goto stateName; \
|
|
} while (false)
|
|
|
|
// We use this macro when the HTML5 spec says "consume the next input
|
|
// character ... and switch to the <mumble> state."
|
|
#define ADVANCE_TO(prefix, stateName) \
|
|
do { \
|
|
m_state = prefix::stateName; \
|
|
if (!m_inputStreamPreprocessor.advance(source)) \
|
|
return haveBufferedCharacterToken(); \
|
|
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
|
|
goto stateName; \
|
|
} while (false)
|
|
|
|
// Sometimes there's more complicated logic in the spec that separates when
|
|
// we consume the next input character and when we switch to a particular
|
|
// state. We handle those cases by advancing the source directly and using
|
|
// this macro to switch to the indicated state.
|
|
#define SWITCH_TO(prefix, stateName) \
|
|
do { \
|
|
m_state = prefix::stateName; \
|
|
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) \
|
|
return haveBufferedCharacterToken(); \
|
|
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
|
|
goto stateName; \
|
|
} while (false)
|
|
|
|
}
|
|
|
|
#endif // MarkupTokenizerInlines_h
|