Commit 133bc5c2 authored by Kent Tamura's avatar Kent Tamura Committed by Commit Bot

HTML parser: Fix "HTML integration point" implementation in HTMLTreeBuilderSimulator.

HTMLTreeBuilderSimulator assumed only <foreignObject> as an HTML
integration point. This CL adds <annotation-xml>, <desc>, and SVG
<title>.

Bug: 805924
Change-Id: I6793d9163d4c6bc8bf0790415baedddaac7a1fc2
Reviewed-on: https://chromium-review.googlesource.com/964038
Commit-Queue: Kent Tamura <tkent@chromium.org>
Reviewed-by: default avatarKouhei Ueno <kouhei@chromium.org>
Cr-Commit-Position: refs/heads/master@{#543634}
parent 1e8327c8
<!DOCTYPE html>
<link rel="help" href="https://html.spec.whatwg.org/multipage/parsing.html#tree-construction:html-integration-point">
<body>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<math><annotation-xml id="point-1" encoding="text/html"><xmp>&lt;/xmp&gt;&lt;img></xmp></annotation-xml></math>
<math><annotation-xml id="point-2" encoding="application/xhtml+xml"><style>&lt;/style&gt;&lt;img></style></annotation-xml></math>
<svg><foreignObject id="point-3"><iframe>&lt;/iframe&gt;&lt;img></iframe></foreignObject></svg>
<svg><desc id="point-4"><noembed>&lt;/noembed&gt;&lt;img></noembed></desc></svg>
<svg><title id="point-5"><noframes>&lt;/noframes&gt;&lt;img></noframes></title></svg>
<script>
function generate_test(id) {
return () => {
let point = document.querySelector('#' + id);
assert_not_equals(point.namespaceURI, 'http://www.w3.org/1999/xhtml');
let rawTextElement = point.firstChild;
assert_equals(rawTextElement.namespaceURI, 'http://www.w3.org/1999/xhtml');
assert_equals(rawTextElement.textContent.substr(0, 4), '&lt;',
'Entity references should not be decoded.');
};
}
test(generate_test('point-1'), 'MathML annotation-xml with encoding=text/html should be an HTML integration point');
test(generate_test('point-2'), 'MathML annotation-xml with encoding=application/xhtml+xml should be an HTML integration point');
test(generate_test('point-3'), 'SVG foreignObject should be an HTML integration point');
test(generate_test('point-4'), 'SVG desc should be an HTML integration point');
test(generate_test('point-5'), 'SVG title should be an HTML integration point');
</script>
</body>
...@@ -82,13 +82,6 @@ static bool TokenExitsForeignContent(const CompactHTMLToken& token) { ...@@ -82,13 +82,6 @@ static bool TokenExitsForeignContent(const CompactHTMLToken& token) {
token.GetAttributeItem(sizeAttr))); token.GetAttributeItem(sizeAttr)));
} }
static bool TokenExitsSVG(const CompactHTMLToken& token) {
// FIXME: It's very fragile that we special case foreignObject here to be
// case-insensitive.
return DeprecatedEqualIgnoringCase(token.Data(),
SVGNames::foreignObjectTag.LocalName());
}
static bool TokenExitsMath(const CompactHTMLToken& token) { static bool TokenExitsMath(const CompactHTMLToken& token) {
// FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint
// and changed to use threadSafeMatch. // and changed to use threadSafeMatch.
...@@ -148,10 +141,10 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate( ...@@ -148,10 +141,10 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate(
namespace_stack_.push_back(kMathML); namespace_stack_.push_back(kMathML);
if (InForeignContent() && TokenExitsForeignContent(token)) if (InForeignContent() && TokenExitsForeignContent(token))
namespace_stack_.pop_back(); namespace_stack_.pop_back();
if ((namespace_stack_.back() == SVG && TokenExitsSVG(token)) || if (IsHTMLIntegrationPointForStartTag(token) ||
(namespace_stack_.back() == kMathML && TokenExitsMath(token))) (namespace_stack_.back() == kMathML && TokenExitsMath(token))) {
namespace_stack_.push_back(HTML); namespace_stack_.push_back(HTML);
if (!InForeignContent()) { } else if (!InForeignContent()) {
// FIXME: This is just a copy of Tokenizer::updateStateFor which uses // FIXME: This is just a copy of Tokenizer::updateStateFor which uses
// threadSafeMatches. // threadSafeMatches.
if (ThreadSafeMatch(tag_name, textareaTag) || if (ThreadSafeMatch(tag_name, textareaTag) ||
...@@ -203,8 +196,7 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate( ...@@ -203,8 +196,7 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate(
ThreadSafeMatch(tag_name, SVGNames::svgTag)) || ThreadSafeMatch(tag_name, SVGNames::svgTag)) ||
(namespace_stack_.back() == kMathML && (namespace_stack_.back() == kMathML &&
ThreadSafeMatch(tag_name, MathMLNames::mathTag)) || ThreadSafeMatch(tag_name, MathMLNames::mathTag)) ||
(namespace_stack_.Contains(SVG) && namespace_stack_.back() == HTML && IsHTMLIntegrationPointForEndTag(token) ||
TokenExitsSVG(token)) ||
(namespace_stack_.Contains(kMathML) && (namespace_stack_.Contains(kMathML) &&
namespace_stack_.back() == HTML && TokenExitsMath(token))) { namespace_stack_.back() == HTML && TokenExitsMath(token))) {
namespace_stack_.pop_back(); namespace_stack_.pop_back();
...@@ -226,4 +218,59 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate( ...@@ -226,4 +218,59 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::Simulate(
return simulated_token; return simulated_token;
} }
// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
bool HTMLTreeBuilderSimulator::IsHTMLIntegrationPointForStartTag(
const CompactHTMLToken& token) const {
DCHECK(token.GetType() == HTMLToken::kStartTag) << token.GetType();
Namespace tokens_ns = namespace_stack_.back();
const String& tag_name = token.Data();
if (tokens_ns == kMathML) {
if (!ThreadSafeMatch(tag_name, MathMLNames::annotation_xmlTag))
return false;
if (auto* encoding = token.GetAttributeItem(MathMLNames::encodingAttr)) {
return EqualIgnoringASCIICase(encoding->Value(), "text/html") ||
EqualIgnoringASCIICase(encoding->Value(), "application/xhtml+xml");
}
} else if (tokens_ns == SVG) {
// FIXME: It's very fragile that we special case foreignObject here to be
// case-insensitive.
if (DeprecatedEqualIgnoringCase(tag_name,
SVGNames::foreignObjectTag.LocalName()))
return true;
return ThreadSafeMatch(tag_name, SVGNames::descTag) ||
ThreadSafeMatch(tag_name, SVGNames::titleTag);
}
return false;
}
// https://html.spec.whatwg.org/multipage/parsing.html#html-integration-point
bool HTMLTreeBuilderSimulator::IsHTMLIntegrationPointForEndTag(
const CompactHTMLToken& token) const {
if (token.GetType() != HTMLToken::kEndTag)
return false;
// If it's inside an HTML integration point, the top namespace is
// HTML, and its next namespace is not HTML.
if (namespace_stack_.back() != HTML)
return false;
if (namespace_stack_.size() < 2)
return false;
Namespace tokens_ns = namespace_stack_[namespace_stack_.size() - 2];
const String& tag_name = token.Data();
if (tokens_ns == kMathML)
return ThreadSafeMatch(tag_name, MathMLNames::annotation_xmlTag);
if (tokens_ns == SVG) {
// FIXME: It's very fragile that we special case foreignObject here to be
// case-insensitive.
if (DeprecatedEqualIgnoringCase(tag_name,
SVGNames::foreignObjectTag.LocalName()))
return true;
return ThreadSafeMatch(tag_name, SVGNames::descTag) ||
ThreadSafeMatch(tag_name, SVGNames::titleTag);
}
return false;
}
} // namespace blink } // namespace blink
...@@ -64,6 +64,8 @@ class CORE_EXPORT HTMLTreeBuilderSimulator { ...@@ -64,6 +64,8 @@ class CORE_EXPORT HTMLTreeBuilderSimulator {
private: private:
bool InForeignContent() const { return namespace_stack_.back() != HTML; } bool InForeignContent() const { return namespace_stack_.back() != HTML; }
bool IsHTMLIntegrationPointForStartTag(const CompactHTMLToken&) const;
bool IsHTMLIntegrationPointForEndTag(const CompactHTMLToken&) const;
HTMLParserOptions options_; HTMLParserOptions options_;
State namespace_stack_; State namespace_stack_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment