Commit acc25ce9 authored by Jay Civelli's avatar Jay Civelli Committed by Commit Bot

Fixes for SafeXmlParser choking on DTD and DOCTYPE.

Changing the SafeXmlParser so it ignores nodes like DTDs and processing
instructions.

Bug: 789838
Change-Id: I0bf4a07509309f9e7720fdcea6283542a0b489ff
Reviewed-on: https://chromium-review.googlesource.com/804946Reviewed-by: default avatarScott Graham <scottmg@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Commit-Queue: Jay Civelli <jcivelli@chromium.org>
Cr-Commit-Position: refs/heads/master@{#521392}
parent a1b4f009
......@@ -101,9 +101,6 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
base::Value root_element;
std::vector<base::Value*> element_stack;
while (xml_reader.Read()) {
if (xml_reader.IsWhiteSpace() || xml_reader.IsComment())
continue;
if (xml_reader.IsClosingElement()) {
if (element_stack.empty()) {
ReportError(std::move(callback), "Invalid XML: unbalanced elements");
......@@ -123,14 +120,17 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
new_element = CreateTextNode(text, TextNodeType::kText);
} else if (xml_reader.GetTextIfCDataElement(&text)) {
new_element = CreateTextNode(text, TextNodeType::kCData);
} else {
// Element node.
} else if (xml_reader.IsElement()) {
new_element = CreateNewElement(xml_reader.NodeFullName());
PopulateNamespaces(&new_element, &xml_reader);
PopulateAttributes(&new_element, &xml_reader);
// Self-closing (empty) element have no close tag (or children); don't
// push them on the element stack.
push_new_node_to_stack = !xml_reader.IsEmptyElement();
} else {
// Ignore all other node types (spaces, comments, processing instructions,
// DTDs...).
continue;
}
base::Value* new_element_ptr;
......@@ -139,6 +139,7 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
AddChildToElement(current_element, std::move(new_element));
} else {
// First element we are parsing, it becomes the root element.
DCHECK(xml_reader.IsElement());
DCHECK(root_element.is_none());
root_element = std::move(new_element);
new_element_ptr = &root_element;
......
<!-- This is a comment. -->
<html>Some HTML</html>
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
Some fine HTML
</html>
\ No newline at end of file
<?xml-stylesheet href="mystyle.css" type="text/css"?>
<html>Some HTML</html>
......@@ -63,6 +63,33 @@ TEST_F(XmlParserTest, ParseBadXml) {
TestParseXml(xml, "");
}
TEST_F(XmlParserTest, IgnoreComments) {
TestParseXml("<!-- This is the best XML document IN THE WORLD! --><a></a>",
R"( {"type": "element", "tag": "a"} )");
}
TEST_F(XmlParserTest, IgnoreProcessingCommands) {
TestParseXml(R"(<?xml-stylesheet href="mystyle.css" type="text/css"?>
<a></a>)",
R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a/><?hello?>", R"( {"type": "element", "tag": "a"} )");
}
TEST_F(XmlParserTest, IgnoreDocumentTypes) {
TestParseXml(
R"(<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">Some HTML</html>
)",
R"( {"type": "element",
"namespaces": {"": "http://www.w3.org/1999/xhtml"},
"tag": "html",
"children":[{"type": "text", "text": "Some HTML"}]
}
)");
}
TEST_F(XmlParserTest, ParseSelfClosingTag) {
TestParseXml("<a/>", R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a><b/></a>",
......
......@@ -158,6 +158,10 @@ bool XmlReader::GetTextIfCDataElement(std::string* content) {
return true;
}
bool XmlReader::IsElement() {
return NodeType() == XML_READER_TYPE_ELEMENT;
}
bool XmlReader::IsClosingElement() {
return NodeType() == XML_READER_TYPE_END_ELEMENT;
}
......@@ -166,15 +170,6 @@ bool XmlReader::IsEmptyElement() {
return xmlTextReaderIsEmptyElement(reader_);
}
bool XmlReader::IsWhiteSpace() {
return NodeType() == XML_READER_TYPE_WHITESPACE ||
NodeType() == XML_READER_TYPE_SIGNIFICANT_WHITESPACE;
}
bool XmlReader::IsComment() {
return NodeType() == XML_READER_TYPE_COMMENT;
}
bool XmlReader::ReadElementContent(std::string* content) {
const int start_depth = Depth();
......
......@@ -89,6 +89,11 @@ class XmlReader {
bool GetTextIfTextElement(std::string* content);
bool GetTextIfCDataElement(std::string* content);
// Returns true if the node is an element (e.g. <foo>). Note this returns
// false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
// check for those.
bool IsElement();
// Returns true if the node is a closing element (e.g. </foo>).
bool IsClosingElement();
......@@ -96,12 +101,6 @@ class XmlReader {
// <foo/>).
bool IsEmptyElement();
// Returns true if the current node is a white-space node.
bool IsWhiteSpace();
// Returns true if the current node is a comment (e.g. <!-- comment -->).
bool IsComment();
// Helper functions not provided by libxml ----------------------------------
// Return the string content within an element.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment