mirror of
https://github.com/flutter/flutter.git
synced 2026-02-20 02:29:02 +08:00
This CL is progress towards deleting the concept of an HTMLElement entirely. We won't actually get all the way there in this CL series, but we're getting closer. This CL also will let us make custom elements just be Elements instead of HTMLElements. R=eseidel@chromium.org Review URL: https://codereview.chromium.org/942933003
335 lines
13 KiB
C++
335 lines
13 KiB
C++
/*
|
|
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
|
|
* Copyright (C) 2011 Apple Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "sky/engine/config.h"
|
|
#include "sky/engine/core/html/parser/HTMLConstructionSite.h"
|
|
|
|
#include <limits>
|
|
#include "gen/sky/core/HTMLElementFactory.h"
|
|
#include "sky/engine/core/dom/DocumentFragment.h"
|
|
#include "sky/engine/core/dom/Element.h"
|
|
#include "sky/engine/core/dom/Text.h"
|
|
#include "sky/engine/core/frame/LocalFrame.h"
|
|
#include "sky/engine/core/html/HTMLScriptElement.h"
|
|
#include "sky/engine/core/html/HTMLTemplateElement.h"
|
|
#include "sky/engine/core/html/parser/AtomicHTMLToken.h"
|
|
#include "sky/engine/core/html/parser/HTMLParserIdioms.h"
|
|
#include "sky/engine/core/html/parser/HTMLToken.h"
|
|
#include "sky/engine/core/loader/FrameLoaderClient.h"
|
|
#include "sky/engine/platform/NotImplemented.h"
|
|
#include "sky/engine/platform/text/TextBreakIterator.h"
|
|
|
|
namespace blink {
|
|
|
|
static const unsigned maximumHTMLParserDOMTreeDepth = 512;
|
|
|
|
static inline void setAttributes(Element* element, AtomicHTMLToken* token)
|
|
{
|
|
element->parserSetAttributes(token->attributes());
|
|
}
|
|
|
|
static bool shouldUseLengthLimit(const ContainerNode& node)
|
|
{
|
|
return !isHTMLScriptElement(node)
|
|
&& !isHTMLStyleElement(node);
|
|
}
|
|
|
|
static unsigned textLengthLimitForContainer(const ContainerNode& node)
|
|
{
|
|
return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
|
|
}
|
|
|
|
static inline bool isAllWhitespace(const String& string)
|
|
{
|
|
return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
|
|
}
|
|
|
|
static inline void insert(HTMLConstructionSiteTask& task)
|
|
{
|
|
if (isHTMLTemplateElement(*task.parent))
|
|
task.parent = toHTMLTemplateElement(task.parent.get())->content();
|
|
task.parent->parserAppendChild(task.child.get());
|
|
}
|
|
|
|
static inline void executeInsertTask(HTMLConstructionSiteTask& task)
|
|
{
|
|
ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
|
|
insert(task);
|
|
}
|
|
|
|
static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
|
|
{
|
|
ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
|
|
ASSERT(task.child->isTextNode());
|
|
|
|
// Merge text nodes into previous ones if possible:
|
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
|
|
Text* newText = toText(task.child.get());
|
|
Node* previousChild = task.parent->lastChild();
|
|
if (previousChild && previousChild->isTextNode()) {
|
|
Text* previousText = toText(previousChild);
|
|
unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
|
|
if (previousText->length() + newText->length() < lengthLimit) {
|
|
previousText->parserAppendData(newText->data());
|
|
return;
|
|
}
|
|
}
|
|
|
|
insert(task);
|
|
}
|
|
|
|
static inline void executeTask(HTMLConstructionSiteTask& task)
|
|
{
|
|
if (task.operation == HTMLConstructionSiteTask::Insert)
|
|
return executeInsertTask(task);
|
|
|
|
ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
|
|
return executeInsertTextTask(task);
|
|
}
|
|
|
|
// This is only needed for TextDocuments where we might have text nodes
|
|
// approaching the default length limit (~64k) and we don't want to
|
|
// break a text node in the middle of a combining character.
|
|
static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
|
|
{
|
|
ASSERT(currentPosition < proposedBreakIndex);
|
|
ASSERT(proposedBreakIndex <= string.length());
|
|
// The end of the string is always a valid break.
|
|
if (proposedBreakIndex == string.length())
|
|
return proposedBreakIndex;
|
|
|
|
// Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
|
|
if (string.is8Bit())
|
|
return proposedBreakIndex;
|
|
|
|
const UChar* breakSearchCharacters = string.characters16() + currentPosition;
|
|
// We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
|
|
unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
|
|
NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
|
|
|
|
if (it.isBreak(proposedBreakIndex - currentPosition))
|
|
return proposedBreakIndex;
|
|
|
|
int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
|
|
if (adjustedBreakIndexInSubstring > 0)
|
|
return currentPosition + adjustedBreakIndexInSubstring;
|
|
// We failed to find a breakable point, let the caller figure out what to do.
|
|
return 0;
|
|
}
|
|
|
|
void HTMLConstructionSite::flushPendingText()
|
|
{
|
|
if (m_pendingText.isEmpty())
|
|
return;
|
|
|
|
PendingText pendingText;
|
|
// Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
|
|
m_pendingText.swap(pendingText);
|
|
ASSERT(m_pendingText.isEmpty());
|
|
|
|
// Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
|
|
// for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
|
|
unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
|
|
|
|
unsigned currentPosition = 0;
|
|
const StringBuilder& string = pendingText.stringBuilder;
|
|
while (currentPosition < string.length()) {
|
|
unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
|
|
unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
|
|
ASSERT(breakIndex <= string.length());
|
|
String substring = string.substring(currentPosition, breakIndex - currentPosition);
|
|
|
|
ASSERT(breakIndex > currentPosition);
|
|
ASSERT(breakIndex - currentPosition == substring.length());
|
|
currentPosition = breakIndex;
|
|
|
|
if (isAllWhitespace(substring)) {
|
|
// Ignore whitespace nodes not inside inside a <t>. If we're splitting
|
|
// a text node this isn't really a whitespace node and we can't ignore
|
|
// it either.
|
|
if (!m_openElements.preserveWhiteSpace() && string.length() == substring.length())
|
|
continue;
|
|
|
|
// Strings composed entirely of whitespace are likely to be repeated.
|
|
// Turn them into AtomicString so we share a single string for each.
|
|
substring = AtomicString(substring).string();
|
|
}
|
|
|
|
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
|
|
task.parent = pendingText.parent;
|
|
task.child = Text::create(task.parent->document(), substring);
|
|
queueTask(task);
|
|
ASSERT(toText(task.child.get())->length() == substring.length());
|
|
}
|
|
}
|
|
|
|
void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
|
|
{
|
|
flushPendingText();
|
|
ASSERT(m_pendingText.isEmpty());
|
|
m_taskQueue.append(task);
|
|
}
|
|
|
|
void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing)
|
|
{
|
|
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
|
|
task.parent = parent;
|
|
task.child = prpChild;
|
|
task.selfClosing = selfClosing;
|
|
|
|
// Add as a sibling of the parent if we have reached the maximum depth allowed.
|
|
if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
|
|
task.parent = task.parent->parentNode();
|
|
|
|
ASSERT(task.parent);
|
|
queueTask(task);
|
|
}
|
|
|
|
void HTMLConstructionSite::executeQueuedTasks()
|
|
{
|
|
// This has no affect on pendingText, and we may have pendingText
|
|
// remaining after executing all other queued tasks.
|
|
const size_t size = m_taskQueue.size();
|
|
if (!size)
|
|
return;
|
|
|
|
// Copy the task queue into a local variable in case executeTask
|
|
// re-enters the parser.
|
|
TaskQueue queue;
|
|
queue.swap(m_taskQueue);
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
executeTask(queue[i]);
|
|
|
|
// We might be detached now.
|
|
}
|
|
|
|
HTMLConstructionSite::HTMLConstructionSite(Document* document)
|
|
: m_document(document)
|
|
, m_attachmentRoot(document)
|
|
{
|
|
}
|
|
|
|
HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment)
|
|
: m_document(&fragment->document())
|
|
, m_attachmentRoot(fragment)
|
|
{
|
|
}
|
|
|
|
HTMLConstructionSite::~HTMLConstructionSite()
|
|
{
|
|
// Depending on why we're being destroyed it might be OK
|
|
// to forget queued tasks, but currently we don't expect to.
|
|
ASSERT(m_taskQueue.isEmpty());
|
|
// Currently we assume that text will never be the last token in the
|
|
// document and that we'll always queue some additional task to cause it to flush.
|
|
ASSERT(m_pendingText.isEmpty());
|
|
}
|
|
|
|
void HTMLConstructionSite::detach()
|
|
{
|
|
// FIXME: We'd like to ASSERT here that we're canceling and not just discarding
|
|
// text that really should have made it into the DOM earlier, but there
|
|
// doesn't seem to be a nice way to do that.
|
|
m_pendingText.discard();
|
|
m_document = nullptr;
|
|
m_attachmentRoot = nullptr;
|
|
}
|
|
|
|
void HTMLConstructionSite::processEndOfFile()
|
|
{
|
|
flush();
|
|
openElements()->popAll();
|
|
}
|
|
|
|
void HTMLConstructionSite::finishedParsing()
|
|
{
|
|
// We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
|
|
ASSERT(m_taskQueue.isEmpty());
|
|
flush();
|
|
m_document->finishedParsing();
|
|
}
|
|
|
|
void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
|
|
{
|
|
RefPtr<Element> element = createElement(token);
|
|
attachLater(currentNode(), element);
|
|
m_openElements.push(element.release());
|
|
}
|
|
|
|
void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
|
|
{
|
|
ASSERT(token->type() == HTMLToken::StartTag);
|
|
// Normally HTMLElementStack is responsible for calling finishParsingChildren,
|
|
// but self-closing elements are never in the element stack so the stack
|
|
// doesn't get a chance to tell them that we're done parsing their children.
|
|
attachLater(currentNode(), createElement(token), true);
|
|
// FIXME: Do we want to acknowledge the token's self-closing flag?
|
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
|
|
}
|
|
|
|
void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
|
|
{
|
|
RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode());
|
|
setAttributes(element.get(), token);
|
|
attachLater(currentNode(), element);
|
|
m_openElements.push(element.release());
|
|
}
|
|
|
|
void HTMLConstructionSite::insertTextNode(const String& string)
|
|
{
|
|
HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
|
|
dummyTask.parent = currentNode();
|
|
|
|
// FIXME: This probably doesn't need to be done both here and in insert(Task).
|
|
if (isHTMLTemplateElement(*dummyTask.parent))
|
|
dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
|
|
|
|
// Unclear when parent != case occurs. Somehow we insert text into two separate
|
|
// nodes while processing the same Token. When it happens we have to flush the
|
|
// pending text into the task queue before making more.
|
|
if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent))
|
|
flushPendingText();
|
|
m_pendingText.append(dummyTask.parent, string);
|
|
}
|
|
|
|
inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
|
|
{
|
|
if (isHTMLTemplateElement(*currentNode()))
|
|
return toHTMLTemplateElement(currentElement())->content()->document();
|
|
return currentNode()->document();
|
|
}
|
|
|
|
PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token)
|
|
{
|
|
Document& document = ownerDocumentForCurrentNode();
|
|
RefPtr<Element> element = HTMLElementFactory::createElement(token->name(), document, true);
|
|
setAttributes(element.get(), token);
|
|
return element.release();
|
|
}
|
|
|
|
}
|