/* * Copyright (C) 2013 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "core/html/parser/BackgroundHTMLParser.h" #include "core/html/parser/HTMLDocumentParser.h" #include "core/html/parser/HTMLParserIdioms.h" #include "core/html/parser/TextResourceDecoder.h" #include "wtf/MainThread.h" #include "wtf/text/TextPosition.h" namespace blink { // We limit our chucks to 1000 tokens, to make sure the main // thread is never waiting on the parser thread for tokens. // This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408. static const size_t pendingTokenLimit = 1000; #if ENABLE(ASSERT) static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens) { for (size_t i = 0; i < tokens->size(); ++i) ASSERT(tokens->at(i).isSafeToSendToAnotherThread()); } #endif base::WeakPtr BackgroundHTMLParser::create(PassOwnPtr config) { // Caller must free by calling stop(). BackgroundHTMLParser* parser = new BackgroundHTMLParser(config); return parser->m_weakFactory.GetWeakPtr(); } BackgroundHTMLParser::BackgroundHTMLParser(PassOwnPtr config) : m_token(adoptPtr(new HTMLToken)) , m_tokenizer(HTMLTokenizer::create(config->options)) , m_parser(config->parser) , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) , m_decoder(TextResourceDecoder::create()) , m_weakFactory(this) { } BackgroundHTMLParser::~BackgroundHTMLParser() { } void BackgroundHTMLParser::appendRawBytesFromMainThread(PassOwnPtr > buffer) { updateDocument(m_decoder->decode(buffer->data(), buffer->size())); } void BackgroundHTMLParser::appendDecodedBytes(const String& input) { ASSERT(!m_input.isClosed()); m_input.append(SegmentedString(input)); pumpTokenizer(); } void BackgroundHTMLParser::flush() { updateDocument(m_decoder->flush()); } void BackgroundHTMLParser::updateDocument(const String& decodedData) { if (decodedData.isEmpty()) return; appendDecodedBytes(decodedData); } void BackgroundHTMLParser::finish() { markEndOfFile(); pumpTokenizer(); } void BackgroundHTMLParser::stop() { delete this; } void BackgroundHTMLParser::markEndOfFile() { ASSERT(!m_input.isClosed()); m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); m_input.close(); } bool BackgroundHTMLParser::updateTokenizerState(const CompactHTMLToken& token) { if (token.type() == HTMLToken::StartTag) { const String& tagName = token.data(); // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches. if (threadSafeMatch(tagName, HTMLNames::scriptTag)) m_tokenizer->setState(HTMLTokenizer::ScriptDataState); else if (threadSafeMatch(tagName, HTMLNames::styleTag)) m_tokenizer->setState(HTMLTokenizer::RAWTEXTState); } if (token.type() == HTMLToken::EndTag) { const String& tagName = token.data(); if (threadSafeMatch(tagName, HTMLNames::scriptTag)) { m_tokenizer->setState(HTMLTokenizer::DataState); return false; } } return true; } void BackgroundHTMLParser::pumpTokenizer() { while (true) { if (!m_tokenizer->nextToken(m_input, *m_token)) { // We've reached the end of our current input. sendTokensToMainThread(); break; } { CompactHTMLToken token(m_token.get(), TextPosition(m_input.currentLine(), m_input.currentColumn())); m_pendingTokens->append(token); } m_token->clear(); if (!updateTokenizerState(m_pendingTokens->last()) || m_pendingTokens->size() >= pendingTokenLimit) sendTokensToMainThread(); } } void BackgroundHTMLParser::sendTokensToMainThread() { if (m_pendingTokens->isEmpty()) return; #if ENABLE(ASSERT) checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get()); #endif OwnPtr chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk); chunk->tokens = m_pendingTokens.release(); callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, m_parser, chunk.release())); m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); } }