Commit acc25ce9 authored by Jay Civelli's avatar Jay Civelli Committed by Commit Bot

Fixes for SafeXmlParser choking on DTD and DOCTYPE.

Changing the SafeXmlParser so it ignores nodes like DTDs and processing
instructions.

Bug: 789838
Change-Id: I0bf4a07509309f9e7720fdcea6283542a0b489ff
Reviewed-on: https://chromium-review.googlesource.com/804946Reviewed-by: default avatarScott Graham <scottmg@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Commit-Queue: Jay Civelli <jcivelli@chromium.org>
Cr-Commit-Position: refs/heads/master@{#521392}
parent a1b4f009
...@@ -101,9 +101,6 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) { ...@@ -101,9 +101,6 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
base::Value root_element; base::Value root_element;
std::vector<base::Value*> element_stack; std::vector<base::Value*> element_stack;
while (xml_reader.Read()) { while (xml_reader.Read()) {
if (xml_reader.IsWhiteSpace() || xml_reader.IsComment())
continue;
if (xml_reader.IsClosingElement()) { if (xml_reader.IsClosingElement()) {
if (element_stack.empty()) { if (element_stack.empty()) {
ReportError(std::move(callback), "Invalid XML: unbalanced elements"); ReportError(std::move(callback), "Invalid XML: unbalanced elements");
...@@ -123,14 +120,17 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) { ...@@ -123,14 +120,17 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
new_element = CreateTextNode(text, TextNodeType::kText); new_element = CreateTextNode(text, TextNodeType::kText);
} else if (xml_reader.GetTextIfCDataElement(&text)) { } else if (xml_reader.GetTextIfCDataElement(&text)) {
new_element = CreateTextNode(text, TextNodeType::kCData); new_element = CreateTextNode(text, TextNodeType::kCData);
} else { } else if (xml_reader.IsElement()) {
// Element node.
new_element = CreateNewElement(xml_reader.NodeFullName()); new_element = CreateNewElement(xml_reader.NodeFullName());
PopulateNamespaces(&new_element, &xml_reader); PopulateNamespaces(&new_element, &xml_reader);
PopulateAttributes(&new_element, &xml_reader); PopulateAttributes(&new_element, &xml_reader);
// Self-closing (empty) element have no close tag (or children); don't // Self-closing (empty) element have no close tag (or children); don't
// push them on the element stack. // push them on the element stack.
push_new_node_to_stack = !xml_reader.IsEmptyElement(); push_new_node_to_stack = !xml_reader.IsEmptyElement();
} else {
// Ignore all other node types (spaces, comments, processing instructions,
// DTDs...).
continue;
} }
base::Value* new_element_ptr; base::Value* new_element_ptr;
...@@ -139,6 +139,7 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) { ...@@ -139,6 +139,7 @@ void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
AddChildToElement(current_element, std::move(new_element)); AddChildToElement(current_element, std::move(new_element));
} else { } else {
// First element we are parsing, it becomes the root element. // First element we are parsing, it becomes the root element.
DCHECK(xml_reader.IsElement());
DCHECK(root_element.is_none()); DCHECK(root_element.is_none());
root_element = std::move(new_element); root_element = std::move(new_element);
new_element_ptr = &root_element; new_element_ptr = &root_element;
......
<!-- This is a comment. -->
<html>Some HTML</html>
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
Some fine HTML
</html>
\ No newline at end of file
<?xml-stylesheet href="mystyle.css" type="text/css"?>
<html>Some HTML</html>
...@@ -63,6 +63,33 @@ TEST_F(XmlParserTest, ParseBadXml) { ...@@ -63,6 +63,33 @@ TEST_F(XmlParserTest, ParseBadXml) {
TestParseXml(xml, ""); TestParseXml(xml, "");
} }
TEST_F(XmlParserTest, IgnoreComments) {
TestParseXml("<!-- This is the best XML document IN THE WORLD! --><a></a>",
R"( {"type": "element", "tag": "a"} )");
}
TEST_F(XmlParserTest, IgnoreProcessingCommands) {
TestParseXml(R"(<?xml-stylesheet href="mystyle.css" type="text/css"?>
<a></a>)",
R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a/><?hello?>", R"( {"type": "element", "tag": "a"} )");
}
TEST_F(XmlParserTest, IgnoreDocumentTypes) {
TestParseXml(
R"(<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">Some HTML</html>
)",
R"( {"type": "element",
"namespaces": {"": "http://www.w3.org/1999/xhtml"},
"tag": "html",
"children":[{"type": "text", "text": "Some HTML"}]
}
)");
}
TEST_F(XmlParserTest, ParseSelfClosingTag) { TEST_F(XmlParserTest, ParseSelfClosingTag) {
TestParseXml("<a/>", R"( {"type": "element", "tag": "a"} )"); TestParseXml("<a/>", R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a><b/></a>", TestParseXml("<a><b/></a>",
......
...@@ -158,6 +158,10 @@ bool XmlReader::GetTextIfCDataElement(std::string* content) { ...@@ -158,6 +158,10 @@ bool XmlReader::GetTextIfCDataElement(std::string* content) {
return true; return true;
} }
bool XmlReader::IsElement() {
return NodeType() == XML_READER_TYPE_ELEMENT;
}
bool XmlReader::IsClosingElement() { bool XmlReader::IsClosingElement() {
return NodeType() == XML_READER_TYPE_END_ELEMENT; return NodeType() == XML_READER_TYPE_END_ELEMENT;
} }
...@@ -166,15 +170,6 @@ bool XmlReader::IsEmptyElement() { ...@@ -166,15 +170,6 @@ bool XmlReader::IsEmptyElement() {
return xmlTextReaderIsEmptyElement(reader_); return xmlTextReaderIsEmptyElement(reader_);
} }
bool XmlReader::IsWhiteSpace() {
return NodeType() == XML_READER_TYPE_WHITESPACE ||
NodeType() == XML_READER_TYPE_SIGNIFICANT_WHITESPACE;
}
bool XmlReader::IsComment() {
return NodeType() == XML_READER_TYPE_COMMENT;
}
bool XmlReader::ReadElementContent(std::string* content) { bool XmlReader::ReadElementContent(std::string* content) {
const int start_depth = Depth(); const int start_depth = Depth();
......
...@@ -89,6 +89,11 @@ class XmlReader { ...@@ -89,6 +89,11 @@ class XmlReader {
bool GetTextIfTextElement(std::string* content); bool GetTextIfTextElement(std::string* content);
bool GetTextIfCDataElement(std::string* content); bool GetTextIfCDataElement(std::string* content);
// Returns true if the node is an element (e.g. <foo>). Note this returns
// false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
// check for those.
bool IsElement();
// Returns true if the node is a closing element (e.g. </foo>). // Returns true if the node is a closing element (e.g. </foo>).
bool IsClosingElement(); bool IsClosingElement();
...@@ -96,12 +101,6 @@ class XmlReader { ...@@ -96,12 +101,6 @@ class XmlReader {
// <foo/>). // <foo/>).
bool IsEmptyElement(); bool IsEmptyElement();
// Returns true if the current node is a white-space node.
bool IsWhiteSpace();
// Returns true if the current node is a comment (e.g. <!-- comment -->).
bool IsComment();
// Helper functions not provided by libxml ---------------------------------- // Helper functions not provided by libxml ----------------------------------
// Return the string content within an element. // Return the string content within an element.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment