Commit 90a4cb1c authored by Jay Civelli's avatar Jay Civelli Committed by Commit Bot

Adding a SafeXMLParser to the data-decoder service.

Introducing a new XML parser service that parses XML into a
base::DictionaryValue. It does so by JSONifying the XML content.

Also factoring out TestServiceManagerListener to its own file, now that
it is used in 2 places (safe_json_parser_browsertest.cc and
safe_xml_parser_browsertest.cc).

Bug: 784667
Change-Id: I1f41f44d49910da43837aac8bca52e24533460ef
Reviewed-on: https://chromium-review.googlesource.com/767890Reviewed-by: default avatarTom Sepez <tsepez@chromium.org>
Reviewed-by: default avatarScott Graham <scottmg@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarKen Rockot <rockot@chromium.org>
Commit-Queue: Jay Civelli <jcivelli@chromium.org>
Cr-Commit-Position: refs/heads/master@{#519832}
parent 270b4e2b
......@@ -9,12 +9,11 @@
#include "base/json/json_writer.h"
#include "base/run_loop.h"
#include "base/values.h"
#include "build/build_config.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "chrome/test/base/test_service_manager_listener.h"
#include "content/public/common/service_manager_connection.h"
#include "content/public/test/test_browser_thread_bundle.h"
#include "content/public/test/test_utils.h"
#include "mojo/public/cpp/bindings/binding.h"
#include "services/data_decoder/public/cpp/safe_json_parser.h"
#include "services/data_decoder/public/interfaces/constants.mojom.h"
#include "services/data_decoder/public/interfaces/json_parser.mojom.h"
......@@ -36,64 +35,6 @@ std::string MaybeToJson(const base::Value* value) {
return json;
}
// This class lets us wait for services to be started and tracks how many times
// a service was started.
class TestServiceManagerListener
: public service_manager::mojom::ServiceManagerListener {
public:
TestServiceManagerListener() = default;
void WaitUntilServiceStarted(const std::string& service_name) {
DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
DCHECK(!on_service_event_loop_closure_);
DCHECK(service_name_.empty());
service_name_ = service_name;
base::RunLoop run_loop;
on_service_event_loop_closure_ = run_loop.QuitClosure();
run_loop.Run();
on_service_event_loop_closure_.Reset();
}
uint32_t GetServiceStartCount(const std::string& service_name) const {
auto iter = service_start_counters_.find(service_name);
return iter == service_start_counters_.end() ? 0 : iter->second;
}
private:
// service_manager::mojom::ServiceManagerListener implementation:
void OnInit(std::vector<service_manager::mojom::RunningServiceInfoPtr>
running_services) override {}
void OnServiceCreated(
service_manager::mojom::RunningServiceInfoPtr service) override {}
void OnServiceStarted(const service_manager::Identity& identity,
uint32_t pid) override {
DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
service_start_counters_[identity.name()]++;
if (identity.name() != service_name_)
return;
service_name_.clear();
std::move(on_service_event_loop_closure_).Run();
}
void OnServicePIDReceived(const service_manager::Identity& identity,
uint32_t pid) override {}
void OnServiceFailedToStart(
const service_manager::Identity& identity) override {}
void OnServiceStopped(const service_manager::Identity& identity) override {}
base::Closure on_service_event_loop_closure_;
std::string service_name_;
std::map<std::string, uint32_t> service_start_counters_;
DISALLOW_COPY_AND_ASSIGN(TestServiceManagerListener);
};
class ParseCallback {
public:
explicit ParseCallback(base::Closure callback) : callback_(callback) {}
......@@ -117,8 +58,6 @@ class ParseCallback {
DISALLOW_COPY_AND_ASSIGN(ParseCallback);
};
} // namespace
class SafeJsonParserTest : public InProcessBrowserTest {
protected:
void TestParse(const std::string& json) {
......@@ -189,31 +128,23 @@ class SafeJsonParserImplTest : public InProcessBrowserTest {
void SetUpOnMainThread() override {
InProcessBrowserTest::SetUpOnMainThread();
// Register a listener on the ServiceManager to track when services are
// started.
mojo::InterfacePtr<service_manager::mojom::ServiceManager> service_manager;
connector_ =
content::ServiceManagerConnection::GetForProcess()->GetConnector();
connector_->BindInterface(service_manager::mojom::kServiceName,
&service_manager);
service_manager::mojom::ServiceManagerListenerPtr listener_ptr;
listener_binding_ = std::make_unique<
mojo::Binding<service_manager::mojom::ServiceManagerListener>>(
&listener_, mojo::MakeRequest(&listener_ptr));
service_manager->AddListener(std::move(listener_ptr));
// Initialize the TestServiceManagerListener so it starts listening for
// service activity.
listener_.Init();
// The data_decoder service will stop if no connection is bound to it after
// 5 seconds. We bind a connection to it for the duration of the test so it
// is guaranteed the service is always running.
connector_->BindInterface(data_decoder::mojom::kServiceName,
&json_parser_ptr_);
connector()->BindInterface(data_decoder::mojom::kServiceName,
&json_parser_ptr_);
listener_.WaitUntilServiceStarted(data_decoder::mojom::kServiceName);
EXPECT_EQ(
1U, listener_.GetServiceStartCount(data_decoder::mojom::kServiceName));
}
service_manager::Connector* connector() const { return connector_; }
service_manager::Connector* connector() const {
return content::ServiceManagerConnection::GetForProcess()->GetConnector();
}
uint32_t GetServiceStartCount(const std::string& service_name) const {
return listener_.GetServiceStartCount(service_name);
......@@ -221,14 +152,13 @@ class SafeJsonParserImplTest : public InProcessBrowserTest {
private:
data_decoder::mojom::JsonParserPtr json_parser_ptr_;
std::unique_ptr<mojo::Binding<service_manager::mojom::ServiceManagerListener>>
listener_binding_;
TestServiceManagerListener listener_;
service_manager::Connector* connector_;
DISALLOW_COPY_AND_ASSIGN(SafeJsonParserImplTest);
};
} // namespace
IN_PROC_BROWSER_TEST_F(SafeJsonParserTest, Parse) {
TestParse("{}");
TestParse("choke");
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include "base/bind.h"
#include "base/callback_helpers.h"
#include "base/json/json_reader.h"
#include "base/macros.h"
#include "base/values.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "chrome/test/base/test_service_manager_listener.h"
#include "content/public/common/service_manager_connection.h"
#include "content/public/test/test_utils.h"
#include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "services/data_decoder/public/interfaces/constants.mojom.h"
#include "services/data_decoder/public/interfaces/xml_parser.mojom.h"
#include "services/service_manager/public/cpp/connector.h"
namespace {
class SafeXmlParserTest : public InProcessBrowserTest {
public:
SafeXmlParserTest() = default;
~SafeXmlParserTest() override = default;
protected:
void SetUpOnMainThread() override {
InProcessBrowserTest::SetUpOnMainThread();
listener_.Init();
// The data_decoder service will stop if no connection is bound to it after
// 5 seconds. We bind a connection to it for the duration of the test so it
// is guaranteed the service is always running.
content::ServiceManagerConnection::GetForProcess()
->GetConnector()
->BindInterface(data_decoder::mojom::kServiceName, &xml_parser_ptr_);
listener_.WaitUntilServiceStarted(data_decoder::mojom::kServiceName);
EXPECT_EQ(
1U, listener_.GetServiceStartCount(data_decoder::mojom::kServiceName));
}
uint32_t GetServiceStartCount(const std::string& service_name) const {
return listener_.GetServiceStartCount(service_name);
}
// Parses |xml| and compares its parsed representation with |expected_json|.
// If |expected_json| is empty, the XML parsing is expected to fail.
void TestParse(base::StringPiece xml, base::StringPiece expected_json) {
SCOPED_TRACE(xml);
base::RunLoop run_loop;
std::unique_ptr<base::Value> expected_value;
if (!expected_json.empty()) {
expected_value = base::JSONReader::Read(expected_json);
DCHECK(expected_value) << "Bad test, incorrect JSON: " << expected_json;
}
data_decoder::ParseXml(
content::ServiceManagerConnection::GetForProcess()->GetConnector(),
xml.as_string(),
base::BindOnce(&SafeXmlParserTest::XmlParsingDone,
base::Unretained(this), run_loop.QuitClosure(),
std::move(expected_value)));
run_loop.Run();
}
private:
void XmlParsingDone(base::Closure quit_loop_closure,
std::unique_ptr<base::Value> expected_value,
std::unique_ptr<base::Value> actual_value,
const base::Optional<std::string>& error) {
base::ScopedClosureRunner(std::move(quit_loop_closure));
if (!expected_value) {
EXPECT_FALSE(actual_value);
EXPECT_TRUE(error);
return;
}
EXPECT_FALSE(error);
ASSERT_TRUE(actual_value);
EXPECT_EQ(*expected_value, *actual_value);
}
data_decoder::mojom::XmlParserPtr xml_parser_ptr_;
TestServiceManagerListener listener_;
DISALLOW_COPY_AND_ASSIGN(SafeXmlParserTest);
};
} // namespace
// Tests that SafeXmlParser does parse. (actual XML parsing is tested in the
// service unit-tests).
IN_PROC_BROWSER_TEST_F(SafeXmlParserTest, Parse) {
TestParse("[\"this is JSON not XML\"]", "");
TestParse("<hello>bonjour</hello>",
R"(
{"type": "element",
"tag": "hello",
"children": [{"type": "text", "text": "bonjour"}]
} )");
}
// Tests that when calling SafeXmlParser::Parse() a new service is started
// every time.
IN_PROC_BROWSER_TEST_F(SafeXmlParserTest, Isolation) {
for (int i = 0; i < 5; i++) {
base::RunLoop run_loop;
bool parsing_success = false;
data_decoder::ParseXml(
content::ServiceManagerConnection::GetForProcess()->GetConnector(),
"<hello>bonjour</hello>",
base::BindOnce(
[](bool* success, base::Closure quit_loop_closure,
std::unique_ptr<base::Value> actual_value,
const base::Optional<std::string>& error) {
*success = !error;
std::move(quit_loop_closure).Run();
},
&parsing_success, run_loop.QuitClosure()));
run_loop.Run();
EXPECT_TRUE(parsing_success);
// 2 + i below because the data_decoder is already running and the index
// starts at 0.
EXPECT_EQ(2U + i, GetServiceStartCount(data_decoder::mojom::kServiceName));
}
}
......@@ -73,6 +73,8 @@ static_library("test_support") {
"base/test_browser_window.h",
"base/test_launcher_utils.cc",
"base/test_launcher_utils.h",
"base/test_service_manager_listener.cc",
"base/test_service_manager_listener.h",
"base/test_switches.cc",
"base/test_switches.h",
"base/testing_browser_process.cc",
......@@ -649,6 +651,7 @@ test("browser_tests") {
"../browser/safe_browsing/test_safe_browsing_database_helper.cc",
"../browser/safe_browsing/test_safe_browsing_database_helper.h",
"../browser/safe_json_parser_browsertest.cc",
"../browser/safe_xml_parser_browsertest.cc",
"../browser/search/hotword_installer_browsertest.cc",
"../browser/search/suggestions/image_fetcher_impl_browsertest.cc",
"../browser/search_engines/template_url_scraper_browsertest.cc",
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/test/base/test_service_manager_listener.h"
#include "base/run_loop.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/common/service_manager_connection.h"
#include "services/service_manager/public/cpp/connector.h"
TestServiceManagerListener::TestServiceManagerListener() : binding_(this) {}
TestServiceManagerListener::~TestServiceManagerListener() {}
void TestServiceManagerListener::Init() {
DCHECK(!binding_.is_bound());
// Register a listener on the ServiceManager to track when services are
// started.
mojo::InterfacePtr<service_manager::mojom::ServiceManager> service_manager;
service_manager::Connector* connector =
content::ServiceManagerConnection::GetForProcess()->GetConnector();
connector->BindInterface(service_manager::mojom::kServiceName,
&service_manager);
service_manager::mojom::ServiceManagerListenerPtr listener_ptr;
binding_.Bind(mojo::MakeRequest(&listener_ptr));
service_manager->AddListener(std::move(listener_ptr));
}
void TestServiceManagerListener::WaitUntilServiceStarted(
const std::string& service_name) {
DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
DCHECK(!on_service_event_loop_closure_);
DCHECK(service_name_.empty());
service_name_ = service_name;
base::RunLoop run_loop;
on_service_event_loop_closure_ = run_loop.QuitClosure();
run_loop.Run();
on_service_event_loop_closure_.Reset();
}
uint32_t TestServiceManagerListener::GetServiceStartCount(
const std::string& service_name) const {
auto iter = service_start_counters_.find(service_name);
return iter == service_start_counters_.end() ? 0 : iter->second;
}
void TestServiceManagerListener::OnInit(
std::vector<service_manager::mojom::RunningServiceInfoPtr>
running_services) {}
void TestServiceManagerListener::OnServiceCreated(
service_manager::mojom::RunningServiceInfoPtr service) {}
void TestServiceManagerListener::OnServiceStarted(
const service_manager::Identity& identity,
uint32_t pid) {
DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
service_start_counters_[identity.name()]++;
if (identity.name() != service_name_)
return;
service_name_.clear();
std::move(on_service_event_loop_closure_).Run();
}
void TestServiceManagerListener::OnServicePIDReceived(
const service_manager::Identity& identity,
uint32_t pid) {}
void TestServiceManagerListener::OnServiceFailedToStart(
const service_manager::Identity& identity) {}
void TestServiceManagerListener::OnServiceStopped(
const service_manager::Identity& identity) {}
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_TEST_BASE_TEST_SERVICE_MANAGER_LISTENER_H_
#define CHROME_TEST_BASE_TEST_SERVICE_MANAGER_LISTENER_H_
#include <cstdint>
#include <map>
#include <string>
#include <vector>
#include "base/callback_forward.h"
#include "base/macros.h"
#include "mojo/public/cpp/bindings/binding.h"
#include "services/service_manager/public/interfaces/service_manager.mojom.h"
namespace service_manager {
class Identity;
}
// This class lets us wait for services to be started and tracks how many times
// a service was started.
class TestServiceManagerListener
: public service_manager::mojom::ServiceManagerListener {
public:
TestServiceManagerListener();
~TestServiceManagerListener() override;
// Must be called once before the other public methods can be used.
void Init();
void WaitUntilServiceStarted(const std::string& service_name);
uint32_t GetServiceStartCount(const std::string& service_name) const;
private:
// service_manager::mojom::ServiceManagerListener implementation:
void OnInit(std::vector<service_manager::mojom::RunningServiceInfoPtr>
running_services) override;
void OnServiceCreated(
service_manager::mojom::RunningServiceInfoPtr service) override;
void OnServiceStarted(const service_manager::Identity& identity,
uint32_t pid) override;
void OnServicePIDReceived(const service_manager::Identity& identity,
uint32_t pid) override;
void OnServiceFailedToStart(
const service_manager::Identity& identity) override;
void OnServiceStopped(const service_manager::Identity& identity) override;
base::Closure on_service_event_loop_closure_;
std::string service_name_;
std::map<std::string, uint32_t> service_start_counters_;
mojo::Binding<service_manager::mojom::ServiceManagerListener> binding_;
DISALLOW_COPY_AND_ASSIGN(TestServiceManagerListener);
};
#endif // CHROME_TEST_BASE_TEST_SERVICE_MANAGER_LISTENER_H_
......@@ -71,7 +71,7 @@
"content_plugin": [ "browser" ],
"content_renderer": [ "browser" ],
"content_utility": [ "browser" ],
"data_decoder": [ "image_decoder", "json_parser" ],
"data_decoder": [ "image_decoder", "json_parser", "xml_parser" ],
"device": [
"device:battery_monitor",
"device:generic_sensor",
......
......@@ -4,6 +4,7 @@
import("//services/service_manager/public/cpp/service.gni")
import("//services/service_manager/public/service_manifest.gni")
import("//testing/libfuzzer/fuzzer_test.gni")
source_set("lib") {
sources = [
......@@ -13,6 +14,8 @@ source_set("lib") {
"image_decoder_impl.h",
"json_parser_impl.cc",
"json_parser_impl.h",
"xml_parser.cc",
"xml_parser.h",
]
deps = [
......@@ -20,6 +23,7 @@ source_set("lib") {
"//mojo/public/cpp/bindings",
"//skia",
"//third_party/WebKit/public:blink",
"//third_party/libxml",
"//ui/gfx",
"//ui/gfx/geometry",
]
......@@ -36,7 +40,9 @@ source_set("tests") {
sources = [
"image_decoder_impl_unittest.cc",
"public/cpp/json_sanitizer_unittest.cc",
"public/cpp/safe_xml_parser_unittest.cc",
"public/cpp/testing_json_parser_unittest.cc",
"xml_parser_unittest.cc",
]
deps = [
......@@ -59,3 +65,15 @@ service_manifest("manifest") {
name = "data_decoder"
source = "manifest.json"
}
fuzzer_test("xml_parser_fuzzer") {
sources = [
"xml_parser_fuzzer.cc",
]
deps = [
":lib",
"//base",
]
dict = "//testing/libfuzzer/fuzzers/dicts/xml.dict"
seed_corpus = "xml_parser_fuzzer_corpus"
}
......@@ -2,6 +2,7 @@ include_rules = [
"+gin",
"+jni",
"+skia",
"+third_party/libxml/chromium",
"+third_party/WebKit/public",
"+third_party/skia",
"+ui/gfx",
......
......@@ -12,6 +12,7 @@
#include "services/data_decoder/image_decoder_impl.h"
#include "services/data_decoder/json_parser_impl.h"
#include "services/data_decoder/public/interfaces/image_decoder.mojom.h"
#include "services/data_decoder/xml_parser.h"
#include "services/service_manager/public/cpp/service_context.h"
namespace data_decoder {
......@@ -33,6 +34,12 @@ void OnJsonParserRequest(service_manager::ServiceContextRefFactory* ref_factory,
std::move(request));
}
void OnXmlParserRequest(service_manager::ServiceContextRefFactory* ref_factory,
mojom::XmlParserRequest request) {
mojo::MakeStrongBinding(base::MakeUnique<XmlParser>(ref_factory->CreateRef()),
std::move(request));
}
} // namespace
DataDecoderService::DataDecoderService() : weak_factory_(this) {}
......@@ -50,6 +57,7 @@ void DataDecoderService::OnStart() {
registry_.AddInterface(
base::Bind(&OnImageDecoderRequest, ref_factory_.get()));
registry_.AddInterface(base::Bind(&OnJsonParserRequest, ref_factory_.get()));
registry_.AddInterface(base::Bind(&OnXmlParserRequest, ref_factory_.get()));
}
void DataDecoderService::OnBindInterface(
......
......@@ -5,7 +5,8 @@
"service_manager:connector": {
"provides": {
"image_decoder": [ "data_decoder::mojom::ImageDecoder" ],
"json_parser": [ "data_decoder::mojom::JsonParser" ]
"json_parser": [ "data_decoder::mojom::JsonParser" ],
"xml_parser": [ "data_decoder::mojom::XmlParser" ]
},
"requires": {
"service_manager": [ "service_manager:all_users" ]
......
......@@ -15,6 +15,8 @@ source_set("cpp") {
"safe_json_parser_impl.cc",
"safe_json_parser_impl.h",
"safe_json_parser_impl.h",
"safe_xml_parser.cc",
"safe_xml_parser.h",
]
public_deps = [
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "base/callback.h"
#include "base/macros.h"
#include "base/threading/thread_checker.h"
#include "base/unguessable_token.h"
#include "base/values.h"
#include "services/data_decoder/public/interfaces/constants.mojom.h"
#include "services/data_decoder/public/interfaces/xml_parser.mojom.h"
#include "services/service_manager/public/cpp/connector.h"
#include "services/service_manager/public/interfaces/constants.mojom.h"
namespace data_decoder {
namespace {
// Class that does the actual parsing. Deletes itself when parsing is done.
class SafeXmlParser {
public:
SafeXmlParser(service_manager::Connector* connector,
const std::string& unsafe_xml,
XmlParserCallback callback);
~SafeXmlParser();
private:
void ReportResults(std::unique_ptr<base::Value> parsed_json,
const base::Optional<std::string>& error);
XmlParserCallback callback_;
mojom::XmlParserPtr xml_parser_ptr_;
SEQUENCE_CHECKER(sequence_checker_);
DISALLOW_COPY_AND_ASSIGN(SafeXmlParser);
};
SafeXmlParser::SafeXmlParser(service_manager::Connector* connector,
const std::string& unsafe_xml,
XmlParserCallback callback)
: callback_(std::move(callback)) {
DCHECK(callback_); // Parsing without a callback is useless.
// Use a random instance ID to guarantee the connection is to a new service
// running in its own process.
base::UnguessableToken token = base::UnguessableToken::Create();
service_manager::Identity identity(mojom::kServiceName,
service_manager::mojom::kInheritUserID,
token.ToString());
connector->BindInterface(identity, &xml_parser_ptr_);
// Unretained(this) is safe as the xml_parser_ptr_ is owned by this class.
xml_parser_ptr_.set_connection_error_handler(base::BindOnce(
&SafeXmlParser::ReportResults, base::Unretained(this),
/*parsed_xml=*/nullptr,
base::make_optional(
std::string("Connection error with the XML parser process."))));
xml_parser_ptr_->Parse(
unsafe_xml,
base::BindOnce(&SafeXmlParser::ReportResults, base::Unretained(this)));
}
SafeXmlParser::~SafeXmlParser() = default;
void SafeXmlParser::ReportResults(std::unique_ptr<base::Value> parsed_xml,
const base::Optional<std::string>& error) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
std::move(callback_).Run(std::move(parsed_xml), error);
// This should be the last interaction with this instance, safely delete.
delete this;
}
const base::Value* GetChildren(const base::Value& element) {
if (!element.is_dict())
return nullptr;
return element.FindKeyOfType(mojom::XmlParser::kChildrenKey,
base::Value::Type::LIST);
}
} // namespace
void ParseXml(service_manager::Connector* connector,
const std::string& unsafe_xml,
XmlParserCallback callback) {
new SafeXmlParser(connector, unsafe_xml, std::move(callback));
}
bool IsXmlElementNamed(const base::Value& element, const std::string& name) {
if (!element.is_dict())
return false;
const base::Value* tag_text = element.FindKeyOfType(
mojom::XmlParser::kTagKey, base::Value::Type::STRING);
return tag_text ? tag_text->GetString() == name : false;
}
bool IsXmlElementOfType(const base::Value& element, const std::string& type) {
if (!element.is_dict())
return false;
const base::Value* type_text = element.FindKeyOfType(
mojom::XmlParser::kTypeKey, base::Value::Type::STRING);
return type_text ? type_text->GetString() == type : false;
}
bool GetXmlElementTagName(const base::Value& element, std::string* tag_name) {
DCHECK(tag_name);
if (!element.is_dict())
return false;
const base::Value* tag_text = element.FindKeyOfType(
mojom::XmlParser::kTagKey, base::Value::Type::STRING);
if (!tag_text)
return false;
*tag_name = tag_text->GetString();
return true;
}
bool GetXmlElementText(const base::Value& element, std::string* text) {
DCHECK(text);
const base::Value* children = GetChildren(element);
if (!children)
return false;
const base::Value* text_node = nullptr;
for (const base::Value& value : children->GetList()) {
if (IsXmlElementOfType(value, mojom::XmlParser::kTextNodeType) ||
IsXmlElementOfType(value, mojom::XmlParser::kCDataNodeType)) {
text_node = &value;
break;
}
}
if (!text_node)
return false;
const base::Value* text_value = text_node->FindKeyOfType(
mojom::XmlParser::kTextKey, base::Value::Type::STRING);
;
*text = text_value ? text_value->GetString() : "";
return true;
}
int GetXmlElementChildrenCount(const base::Value& element,
const std::string& name) {
const base::Value* children = GetChildren(element);
if (!children)
return 0;
int child_count = 0;
for (const base::Value& value : children->GetList()) {
DCHECK(value.is_dict());
std::string tag_name;
bool success = GetXmlElementTagName(value, &tag_name);
if (success && tag_name == name)
child_count++;
}
return child_count;
}
const base::Value* GetXmlElementChildWithType(const base::Value& element,
const std::string& type) {
const base::Value* children = GetChildren(element);
if (!children)
return nullptr;
for (const base::Value& value : children->GetList()) {
DCHECK(value.is_dict());
if (IsXmlElementOfType(value, type)) {
return &value;
}
}
return nullptr;
}
const base::Value* GetXmlElementChildWithTag(const base::Value& element,
const std::string& tag) {
const base::Value* children = GetChildren(element);
if (!children)
return nullptr;
for (const base::Value& value : children->GetList()) {
DCHECK(value.is_dict());
if (IsXmlElementNamed(value, tag))
return &value;
}
return nullptr;
}
const base::Value* FindXmlElementPath(
const base::Value& element,
std::initializer_list<base::StringPiece> path,
bool* unique_path) {
const base::Value* cur = nullptr;
if (unique_path)
*unique_path = true;
for (const base::StringPiece component_piece : path) {
std::string component(component_piece);
if (!cur) {
// First element has to match the current node.
if (!IsXmlElementNamed(element, component))
return nullptr;
cur = &element;
continue;
}
const base::Value* new_cur = GetXmlElementChildWithTag(*cur, component);
if (!new_cur)
return nullptr;
if (unique_path && *unique_path &&
GetXmlElementChildrenCount(*cur, component) > 1)
*unique_path = false;
cur = new_cur;
}
return cur;
}
} // namespace data_decoder
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SERVICES_DATA_DECODER_PUBLIC_CPP_SAFE_XML_PARSER_H_
#define SERVICES_DATA_DECODER_PUBLIC_CPP_SAFE_XML_PARSER_H_
#include <initializer_list>
#include <memory>
#include <string>
#include "base/callback.h"
#include "base/optional.h"
namespace base {
class Value;
}
namespace service_manager {
class Connector;
}
namespace data_decoder {
// Callback invoked when parsing with ParseXml has finished.
// |value| contains the base::Value dictionary structure representing the parsed
// XML. See xml_parser.mojom for an example.
// If the parsing failed, |error| contains an error message and |value| is null.
using XmlParserCallback =
base::OnceCallback<void(std::unique_ptr<base::Value> value,
const base::Optional<std::string>& error)>;
// Parses |unsafe_xml| safely in a utility process and invokes |callback| when
// done. The XML document is transformed into a base::Value dictionary
// structure, with special keys holding the tag name and child nodes.
// |connector| is the connector provided by the service manager and is used to
// retrieve the XML parser service. It's commonly retrieved from a service
// manager connection context object that the embedder provides.
void ParseXml(service_manager::Connector* connector,
const std::string& unsafe_xml,
XmlParserCallback callback);
// Below are convenience methods for handling the elements returned by
// ParseXml().
// Returns true if |element| is an XML element with a tag name |name|, false
// otherwise.
bool IsXmlElementNamed(const base::Value& element, const std::string& name);
// Returns true if |element| is an XML element with a type |type|, false
// otherwise. Valid types are data_decoder::mojom::XmlParser::kElementType,
// kTextNodeType or kCDataNodeType.
bool IsXmlElementOfType(const base::Value& element, const std::string& type);
// Sets |name| with the tag name of |element| and returns true.
// Returns false if |element| does not represent a node with a tag or is not a
// valid XML element.
bool GetXmlElementTagName(const base::Value& element, std::string* name);
// Sets |text| with the text of |element| and returns true.
// Returns false if |element| does not contain any text (if it's not a text or
// CData node).
bool GetXmlElementText(const base::Value& element, std::string* text);
// Returns the number of children of |element| named |name|.
int GetXmlElementChildrenCount(const base::Value& element,
const std::string& name);
// Returns the first child of |element| with the type |type|, or null if there
// are no children with that type.
// |type| are data_decoder::mojom::XmlParser::kElementType, kTextNodeType or
// kCDataNodeType.
const base::Value* GetXmlElementChildWithType(const base::Value& element,
const std::string& type);
// Returns the first child of |element| with the tag |tag|, or null if there
// are no children with that tag.
const base::Value* GetXmlElementChildWithTag(const base::Value& element,
const std::string& tag);
// Returns the value of the element path |path| starting at |element|, or null
// if any element in |path| is missing. Note that if there are more than one
// element matching any part of the path, the first one is used and
// |unique_path| is set to false. It is set to true otherwise and can be null if
// not needed.
const base::Value* FindXmlElementPath(
const base::Value& element,
std::initializer_list<base::StringPiece> path,
bool* unique_path);
} // namespace data_decoder
#endif // SERVICES_DATA_DECODER_PUBLIC_CPP_SAFE_XML_PARSER_H_
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include <memory>
#include "base/bind.h"
#include "base/values.h"
#include "build/build_config.h"
#include "services/data_decoder/xml_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace data_decoder {
namespace {
std::unique_ptr<base::Value> ParseXml(const std::string& xml) {
XmlParser parser_impl(/*service_ref=*/nullptr);
mojom::XmlParser& parser = parser_impl;
std::unique_ptr<base::Value> root_node;
parser.Parse(xml, base::Bind(
[](std::unique_ptr<base::Value>* node,
std::unique_ptr<base::Value> parsed_root_node,
const base::Optional<std::string>& error) {
*node = std::move(parsed_root_node);
},
&root_node));
return root_node;
}
void ExpectElementTextEq(const base::Value& element,
const std::string& expected_text) {
std::string text;
if (expected_text.empty()) {
EXPECT_FALSE(GetXmlElementText(element, &text));
} else {
EXPECT_TRUE(GetXmlElementText(element, &text));
EXPECT_EQ(expected_text, text);
}
}
using SafeXmlParserTest = testing::Test;
} // namespace
TEST_F(SafeXmlParserTest, NameAccessors) {
// Test that the API does not choke on non XML element values.
base::Value not_an_xml_value;
EXPECT_FALSE(IsXmlElementNamed(not_an_xml_value, "hello"));
not_an_xml_value = base::Value("hello");
EXPECT_FALSE(IsXmlElementNamed(not_an_xml_value, "hello"));
// Test IsXmlElementNamed.
std::unique_ptr<base::Value> xml_element = ParseXml("<hello></hello>");
ASSERT_TRUE(xml_element);
EXPECT_TRUE(IsXmlElementNamed(*xml_element, "hello"));
EXPECT_FALSE(IsXmlElementNamed(*xml_element, "bonjour"));
// Test GetXmlElementTagName.
std::string tag_name;
EXPECT_TRUE(GetXmlElementTagName(*xml_element, &tag_name));
EXPECT_EQ("hello", tag_name);
}
TEST_F(SafeXmlParserTest, TextAccessor) {
// Test that the API does not choke on non XML element values.
ExpectElementTextEq(base::Value(), "");
ExpectElementTextEq(base::Value("hello"), "");
// Test retrieving text from elements with no text.
std::unique_ptr<base::Value> no_text_element = ParseXml("<hello/>");
ExpectElementTextEq(*no_text_element, "");
// Test retrieving text from elements with actual text.
std::unique_ptr<base::Value> text_element =
ParseXml("<hello>bonjour bonjour</hello>");
ASSERT_TRUE(text_element);
ExpectElementTextEq(*text_element, "bonjour bonjour");
// Retrieving text from elements with multiple text children returns the first
// one only.
std::unique_ptr<base::Value> multiple_text_elements =
ParseXml("<hello>bonjour<space/>bonjour</hello>");
ASSERT_TRUE(multiple_text_elements);
ExpectElementTextEq(*multiple_text_elements, "bonjour");
std::unique_ptr<base::Value> cdata_element =
ParseXml("<hello><![CDATA[This is <b>CData</b>.]]></hello>");
ASSERT_TRUE(cdata_element);
ExpectElementTextEq(*cdata_element, "This is <b>CData</b>.");
std::unique_ptr<base::Value> text_and_cdata_element =
ParseXml("<hello>This is text.<![CDATA[This is <b>CData</b>.]]></hello>");
ASSERT_TRUE(text_and_cdata_element);
ExpectElementTextEq(*text_and_cdata_element, "This is text.");
std::unique_ptr<base::Value> cdata_and_text_element =
ParseXml("<hello><![CDATA[This is <b>CData</b>.]]>This is text.</hello>");
ASSERT_TRUE(cdata_and_text_element);
ExpectElementTextEq(*cdata_and_text_element, "This is <b>CData</b>.");
}
TEST_F(SafeXmlParserTest, ChildAccessor) {
// Test that the API does not choke on non XML element values.
base::Value not_an_xml_value;
EXPECT_FALSE(GetXmlElementChildrenCount(not_an_xml_value, "hello"));
EXPECT_FALSE(GetXmlElementChildWithTag(not_an_xml_value, "hello"));
// Childless element case.
std::unique_ptr<base::Value> childless_element = ParseXml("<hello/>");
ASSERT_TRUE(childless_element);
EXPECT_EQ(0, GetXmlElementChildrenCount(*childless_element, "fr"));
EXPECT_FALSE(GetXmlElementChildWithTag(*childless_element, "fr"));
childless_element = ParseXml("<hello>bonjour</hello>");
EXPECT_EQ(0, GetXmlElementChildrenCount(*childless_element, "fr"));
EXPECT_FALSE(GetXmlElementChildWithTag(*childless_element, "fr"));
// Element with children case.
std::unique_ptr<base::Value> element =
ParseXml("<hello><fr>bonjour</fr><fr>salut</fr><es>hola</es></hello>");
ASSERT_TRUE(element);
EXPECT_EQ(2, GetXmlElementChildrenCount(*element, "fr"));
EXPECT_EQ(1, GetXmlElementChildrenCount(*element, "es"));
EXPECT_EQ(0, GetXmlElementChildrenCount(*element, "jp"));
const base::Value* value = GetXmlElementChildWithTag(*element, "fr");
ASSERT_TRUE(value);
// The first matching element is returned.
ExpectElementTextEq(*value, "bonjour");
value = GetXmlElementChildWithTag(*element, "es");
ExpectElementTextEq(*value, "hola");
EXPECT_FALSE(GetXmlElementChildWithTag(*element, "jp"));
}
TEST_F(SafeXmlParserTest, FindByPath) {
// Test that the API does not choke on non XML element values.
EXPECT_FALSE(
FindXmlElementPath(base::Value(), {"hello"}, /*unique_path=*/nullptr));
std::unique_ptr<base::Value> element = ParseXml(
"<hello>"
" <fr>"
" <formal>bonjour</formal>"
" <casual>salut</casual>"
" <casual>ca gaze</casual>"
" </fr>"
" <es>"
" <formal>buenos dias</formal>"
" <casual>hola</casual>"
" </es>"
"</hello>");
// Unexiting paths.
EXPECT_FALSE(FindXmlElementPath(*element, {"bad"}, /*unique_path=*/nullptr));
EXPECT_FALSE(
FindXmlElementPath(*element, {"hello", "bad"}, /*unique_path=*/nullptr));
// Partial paths.
const base::Value* fr_element =
FindXmlElementPath(*element, {"hello", "fr"}, /*unique_path=*/nullptr);
ASSERT_TRUE(fr_element);
EXPECT_TRUE(IsXmlElementNamed(*fr_element, "fr"));
EXPECT_EQ(1, GetXmlElementChildrenCount(*fr_element, "formal"));
EXPECT_EQ(2, GetXmlElementChildrenCount(*fr_element, "casual"));
// Path to a leaf element.
const base::Value* es_element = FindXmlElementPath(
*element, {"hello", "es", "casual"}, /*unique_path=*/nullptr);
ASSERT_TRUE(es_element);
ExpectElementTextEq(*es_element, "hola");
// Test unique path.
bool unique_path = true;
fr_element =
FindXmlElementPath(*element, {"hello", "fr", "casual"}, &unique_path);
ASSERT_TRUE(fr_element);
EXPECT_FALSE(unique_path);
unique_path = false;
fr_element =
FindXmlElementPath(*element, {"hello", "es", "casual"}, &unique_path);
ASSERT_TRUE(fr_element);
EXPECT_TRUE(unique_path);
}
} // namespace data_decoder
......@@ -8,6 +8,7 @@ mojom("interfaces") {
sources = [
"image_decoder.mojom",
"json_parser.mojom",
"xml_parser.mojom",
]
public_deps = [
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Interface used to parse simple XML files.
//
// Ignores namespaces, node attributes, comments and white-spaces.
// Note also that the text content of elements is returned as base::Value of
// type STRING (UTF-8 encoded): no effort is made by the parser to parse numeric
// values.
module data_decoder.mojom;
import "mojo/common/values.mojom";
interface XmlParser {
const string kTypeKey = "type";
const string kTagKey = "tag";
const string kTextKey = "text";
const string kChildrenKey = "children";
const string kElementType = "element";
const string kTextNodeType = "text";
const string kCDataNodeType = "cdata";
// Parses the input XML and returns a Value with its content. If parsing
// failed that value is empty and an error is set:
//
// For example the following XML:
//
// <library>
// <book id="k123">
// <author>Isaac Newton</author>
// <title><![CDATA[Philosophiae Naturalis Principia Mathematica]]</title>
// <price>40.95</price>
// </book>
// <book id="k456">
// <author>Dr. Seuss</author>
// <title>Green Eggs and Ham</title>
// <genre>Kid</genre>
// <kids/>
// <price>4.95</price>
// </book>
// </library>
//
// becomes (base::Value dictionary represented as JSON):
//
// {"type": "element",
// "tag": "library",
// "children": [
// {"type": "element",
// "tag": "book",
// "children": [
// {"type": "element",
// "tag": "author",
// "children": [{"type": "text", "text": "Isaac Newton"}]},
// {"type": "element",
// "tag": "title",
// "children": [
// {"type": "cdata",
// "text": "Philosophiae Naturalis Principia Mathematica"}]},
// {"type": "element",
// "tag": "price",
// "children": [{"type": "text", "text": "40.95"}]}
// ]},
// {"type": "element",
// "tag": "book",
// "children": [
// {"type": "element",
// "tag": "author",
// "children": [{"type": "text", "text": "Dr. Seuss"}]},
// {"type": "element",
// "tag": "title",
// "children": [{"type": "text", "text": "Green Eggs and Ham"}]},
// {"type": "element", "tag": "kids"},
// {"type": "element",
// "tag": "price",
// "children": [{"type": "text", "text": "4.95"}],
// }
// }
// ]
// }
// ]}
//
// Note that the client library provides convenience methods for accessing
// data from the returned base::Value dictionary structure (see
// safe_parser_xml.h).
Parse(string xml) => (mojo.common.mojom.Value? result, string? error);
};
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/data_decoder/xml_parser.h"
#include <utility>
#include "base/values.h"
#include "third_party/libxml/chromium/libxml_utils.h"
namespace data_decoder {
namespace {
void ReportError(XmlParser::ParseCallback callback, const std::string& error) {
std::move(callback).Run(/*result=*/nullptr, base::make_optional(error));
}
enum class TextNodeType { kText, kCData };
base::Value CreateTextNode(const std::string& text, TextNodeType node_type) {
base::Value element(base::Value::Type::DICTIONARY);
element.SetKey(mojom::XmlParser::kTypeKey,
base::Value(node_type == TextNodeType::kText
? mojom::XmlParser::kTextNodeType
: mojom::XmlParser::kCDataNodeType));
element.SetKey(mojom::XmlParser::kTextKey, base::Value(text));
return element;
}
// Creates and returns new element node with the tag name |name|.
base::Value CreateNewElement(const std::string& name) {
base::Value element(base::Value::Type::DICTIONARY);
element.SetKey(mojom::XmlParser::kTypeKey,
base::Value(mojom::XmlParser::kElementType));
element.SetKey(mojom::XmlParser::kTagKey, base::Value(name));
return element;
}
// Adds |child| as a child of |element|, creating the children list if
// necessary. Returns a ponter to |child|.
base::Value* AddChildToElement(base::Value* element, base::Value child) {
DCHECK(element->is_dict());
base::Value* children = element->FindKey(mojom::XmlParser::kChildrenKey);
DCHECK(!children || children->is_list());
if (!children)
children = element->SetKey(mojom::XmlParser::kChildrenKey,
base::Value(base::Value::Type::LIST));
children->GetList().push_back(std::move(child));
return &children->GetList().back();
}
} // namespace
XmlParser::XmlParser(
std::unique_ptr<service_manager::ServiceContextRef> service_ref)
: service_ref_(std::move(service_ref)) {}
XmlParser::~XmlParser() = default;
void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
XmlReader xml_reader;
if (!xml_reader.Load(xml)) {
ReportError(std::move(callback), "Invalid XML: failed to load");
return;
}
base::Value root_element;
std::vector<base::Value*> element_stack;
while (xml_reader.Read()) {
if (xml_reader.IsWhiteSpace() || xml_reader.IsComment())
continue;
if (xml_reader.IsClosingElement()) {
if (element_stack.empty()) {
ReportError(std::move(callback), "Invalid XML: unbalanced elements");
return;
}
element_stack.pop_back();
continue;
}
std::string cdata;
std::string text;
base::Value* current_element =
element_stack.empty() ? nullptr : element_stack.back();
bool push_new_node_to_stack = false;
base::Value new_element;
if (xml_reader.GetTextIfTextElement(&text)) {
new_element = CreateTextNode(text, TextNodeType::kText);
} else if (xml_reader.GetTextIfCDataElement(&text)) {
new_element = CreateTextNode(text, TextNodeType::kCData);
} else {
// Element node.
new_element = CreateNewElement(xml_reader.NodeName());
// Self-closing (empty) element have no close tag (or children); don't
// push them on the element stack.
push_new_node_to_stack = !xml_reader.IsEmptyElement();
}
base::Value* new_element_ptr;
if (current_element) {
new_element_ptr =
AddChildToElement(current_element, std::move(new_element));
} else {
// First element we are parsing, it becomes the root element.
DCHECK(root_element.is_none());
root_element = std::move(new_element);
new_element_ptr = &root_element;
}
if (push_new_node_to_stack)
element_stack.push_back(new_element_ptr);
}
if (!element_stack.empty()) {
ReportError(std::move(callback), "Invalid XML: unbalanced elements");
return;
}
base::DictionaryValue* dictionary = nullptr;
root_element.GetAsDictionary(&dictionary);
if (!dictionary || dictionary->empty()) {
ReportError(std::move(callback), "Invalid XML: bad content");
return;
}
std::move(callback).Run(
base::Value::ToUniquePtrValue(std::move(root_element)),
base::Optional<std::string>());
}
} // namespace data_decoder
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SERVICES_DATA_DECODER_XML_PARSER_H_
#define SERVICES_DATA_DECODER_XML_PARSER_H_
#include <memory>
#include <string>
#include "base/macros.h"
#include "services/data_decoder/public/interfaces/xml_parser.mojom.h"
#include "services/service_manager/public/cpp/service_context_ref.h"
namespace data_decoder {
class XmlParser : public mojom::XmlParser {
public:
explicit XmlParser(
std::unique_ptr<service_manager::ServiceContextRef> service_ref);
~XmlParser() override;
private:
const std::unique_ptr<service_manager::ServiceContextRef> service_ref_;
// mojom::XmlParser implementation.
void Parse(const std::string& xml, ParseCallback callback) override;
DISALLOW_COPY_AND_ASSIGN(XmlParser);
};
} // namespace data_decoder
#endif // SERVICES_DATA_DECODER_XML_PARSER_H_
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "base/bind.h"
#include "base/message_loop/message_loop.h"
#include "base/run_loop.h"
#include "services/data_decoder/xml_parser.h"
namespace {
void OnParseXml(base::Closure quit_loop,
std::unique_ptr<base::Value> value,
const base::Optional<std::string>& error) {
std::move(quit_loop).Run();
}
} // namespace
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
const char* data_ptr = reinterpret_cast<const char*>(data);
data_decoder::XmlParser xml_parser_impl(/*service_ref=*/nullptr);
data_decoder::mojom::XmlParser& xml_parser = xml_parser_impl;
base::MessageLoop message_loop;
base::RunLoop run_loop;
xml_parser.Parse(std::string(data_ptr, size),
base::Bind(&OnParseXml, run_loop.QuitClosure()));
run_loop.Run();
return 0;
}
<hello>bonjour</hello>
\ No newline at end of file
<a>
<b>
<c>
<d>
<e>
<f>
<g>
<h>
<i>
<j>
<k>
<l>
<m>
<n>
<o>
<p>
<q>
<r>
<s>
<t>
<u>
<v>
<w>
<x>
<y>
<z>
Alphabet
</z>
</y>
</x>
</w>
</v>
</u>
</t>
</s>
</r>
</q>
</p>
</o>
</n>
</m>
</l>
</k>
</j>
</i>
</h>
</g>
</f>
</e>
</d>
</c>
</b>
</a>
\ No newline at end of file
<?xml version='1.0' encoding='UTF-8'?>
<!-- This is an XML sample -->
<library xmlns='http://library' xmlns:foo='http://foo.com'>
<book foo:id="k123">
<author>Isaac Newton</author>
<title>Philosophiae Naturalis Principia Mathematica</title>
<genre>Science</genre>
<price>40.95</price>
<publish_date>1947-9-03</publish_date>
</book>
<book foo:id="k456">
<author>Dr. Seuss</author>
<title>Green Eggs and Ham</title>
<genre>Kid</genre>
<foo:kids/>
<price>4.95</price>
<publish_date>1960-8-12</publish_date>
</book>
</library>
\ No newline at end of file
<test>{"tag": "woop", "boing": 123, 12: ""foodyums"}</test>
\ No newline at end of file
<number>18446744073709551616</number>
<a><b>b1</b><c>c1</c><b>b2</b><c>c2</c><b>b3</b><c>c3</c></a>
\ No newline at end of file
<hello>
<fr>coucou</fr>
<fr><proper>bonjour</proper><slang>salut</slang></fr>
<fr>ca va</fr>
</hello>
<a><b></b><b></b></a>
\ No newline at end of file
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include "base/bind.h"
#include "base/json/json_reader.h"
#include "base/message_loop/message_loop.h"
#include "services/data_decoder/xml_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace data_decoder {
namespace {
void TestParseXmlCallback(std::unique_ptr<base::Value>* value_out,
base::Optional<std::string>* error_out,
std::unique_ptr<base::Value> value,
const base::Optional<std::string>& error) {
*value_out = std::move(value);
*error_out = error;
}
// Parses the passed in |xml| and compares the result to |json|.
// If |json| is empty, the parsing is expected to fail.
void TestParseXml(const std::string& xml, const std::string& json) {
XmlParser parser_impl(/*service_ref=*/nullptr);
// Use a reference to mojom::XmlParser as XmlParser implements the interface
// privately.
mojom::XmlParser& parser = parser_impl;
std::unique_ptr<base::Value> actual_value;
base::Optional<std::string> error;
parser.Parse(xml, base::Bind(&TestParseXmlCallback, &actual_value, &error));
if (json.empty()) {
EXPECT_TRUE(error);
EXPECT_FALSE(actual_value);
return;
}
EXPECT_FALSE(error) << "Unexpected error: " << *error;
EXPECT_TRUE(actual_value);
std::unique_ptr<base::Value> expected_value = base::JSONReader::Read(json);
DCHECK(expected_value) << "Bad test, incorrect JSON: " << json;
EXPECT_EQ(*expected_value, *actual_value);
}
} // namespace
using XmlParserTest = testing::Test;
TEST_F(XmlParserTest, ParseBadXml) {
std::string invalid_xml_strings[] = {"",
" ",
"Awesome possum",
"[\"json\", \"or\", \"xml?\"]",
"<unbalanced>",
"<hello>bad tag</goodbye>"};
for (auto& xml : invalid_xml_strings)
TestParseXml(xml, "");
}
TEST_F(XmlParserTest, ParseSelfClosingTag) {
TestParseXml("<a/>", R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a><b/></a>",
R"( {"type": "element",
"tag": "a",
"children": [{"type": "element", "tag": "b"}]} )");
TestParseXml("<a><b/><b/><b/></a>",
R"( {"type": "element",
"tag": "a",
"children":[
{"type": "element", "tag": "b"},
{"type": "element", "tag": "b"},
{"type": "element", "tag": "b"}
]}
)");
}
TEST_F(XmlParserTest, ParseEmptyTag) {
TestParseXml("<a></a>", R"( {"type": "element", "tag": "a"} )");
TestParseXml("<a><b></b></a>",
R"( {"type": "element",
"tag": "a",
"children": [{"type": "element", "tag": "b"}]} )");
TestParseXml("<a><b></b><b></b></a>",
R"( {"type": "element",
"tag": "a",
"children":[{"type": "element", "tag": "b"},
{"type": "element", "tag": "b"}]} )");
}
TEST_F(XmlParserTest, ParseTextElement) {
TestParseXml("<hello>bonjour</hello>",
R"( {"type": "element",
"tag": "hello",
"children": [{"type": "text", "text": "bonjour"}]} )");
}
TEST_F(XmlParserTest, ParseCDataElement) {
TestParseXml(R"(<hello><![CDATA[This is CData.
With weird chars [ ] { } <> ; : ' " and
some <b>formatting</b> <br>]]>
</hello> )",
R"( {"type": "element",
"tag": "hello",
"children": [{"type": "cdata",
"text": "This is CData.\n With weird chars [ ] { } <> ; : ' \" and\n some <b>formatting</b> <br>"
}]} )");
}
TEST_F(XmlParserTest, ParseBadCDataElement) {
// Missing first bracket.
TestParseXml("<hello><!CDATA[This is CData.]]></hello>", "");
// Space before last bracket.
TestParseXml("<hello><![CDATA[This is CData.] ]></hello>", "");
// Space before closing >.
TestParseXml("<hello><![CDATA[This is CData.]] ></hello>", "");
}
TEST_F(XmlParserTest, ParseTextWithEntities) {
TestParseXml("<hello>&quot;bonjour&amp; &apos; &lt;hello&gt;</hello>",
R"( {"type": "element",
"tag": "hello",
"children": [{"type": "text",
"text": "\"bonjour& ' <hello>"}]} )");
// Entities in CDATA are not evaluated.
TestParseXml("<hello><![CDATA[&quot;bonjour&amp; &apos;]]></hello>",
R"( {"type": "element",
"tag": "hello",
"children": [{"type": "cdata",
"text": "&quot;bonjour&amp; &apos;"}]} )");
}
TEST_F(XmlParserTest, ParseMultipleSimilarTextElement) {
TestParseXml("<hello><fr>bonjour</fr><fr>salut</fr><fr>coucou</fr></hello>",
R"( {"type": "element",
"tag": "hello",
"children": [
{"type": "element",
"tag": "fr",
"children": [{"type": "text", "text": "bonjour"}]},
{"type": "element",
"tag": "fr",
"children": [{"type": "text", "text": "salut"}]},
{"type": "element",
"tag": "fr",
"children": [{"type": "text", "text": "coucou"}]}
]} )");
}
TEST_F(XmlParserTest, ParseMixMatchTextNonTextElement) {
TestParseXml(
R"(
<hello>
<fr>coucou</fr>
<fr><proper>bonjour</proper><slang>salut</slang></fr>
<fr>ca va</fr>
</hello> )",
R"(
{"type": "element",
"tag": "hello",
"children": [
{"type": "element",
"tag": "fr",
"children": [{"type": "text", "text": "coucou"}]},
{"type": "element",
"tag": "fr",
"children": [
{"type": "element",
"tag": "proper",
"children": [{"type": "text", "text": "bonjour" }]},
{"type": "element",
"tag": "slang",
"children": [{"type": "text", "text": "salut" }]}
]},
{"type": "element",
"tag": "fr",
"children": [{"type": "text", "text": "ca va"}]}
]} )");
}
TEST_F(XmlParserTest, ParseElementsInText) {
TestParseXml(
"<p>This is <b>some</b> text.<![CDATA[ this <i>formatting</i> is ignored"
" ]]></p>",
R"(
{"type": "element", "tag": "p", "children": [
{"type": "text", "text": "This is "},
{"type": "element", "tag": "b", "children": [
{"type": "text", "text": "some"}
]},
{"type": "text", "text": " text."},
{"type": "cdata", "text": " this <i>formatting</i> is ignored "}
]} )");
}
TEST_F(XmlParserTest, ParseNestedXml) {
TestParseXml(
R"( <M><a><t><r><y><o><s><h><k><a>Zdravstvuy</a>
</k></h></s></o></y></r></t></a></M> )",
R"( {"type": "element", "tag": "M", "children": [
{"type": "element", "tag": "a", "children": [
{"type": "element", "tag": "t", "children": [
{"type": "element", "tag": "r", "children": [
{"type": "element", "tag": "y", "children": [
{"type": "element", "tag": "o", "children": [
{"type": "element", "tag": "s", "children": [
{"type": "element", "tag": "h", "children": [
{"type": "element", "tag": "k", "children": [
{"type": "element", "tag": "a", "children": [
{"type": "text", "text": "Zdravstvuy"}
]}
]}
]}
]}
]}
]}
]}
]}
]}
]} )");
}
TEST_F(XmlParserTest, ParseMultipleSimilarElements) {
TestParseXml("<a><b>b1</b><c>c1</c><b>b2</b><c>c2</c><b>b3</b><c>c3</c></a>",
R"( {"type": "element", "tag": "a", "children": [
{"type": "element", "tag": "b", "children":[
{"type": "text", "text": "b1"}]},
{"type": "element", "tag": "c", "children":[
{"type": "text", "text": "c1"}]},
{"type": "element", "tag": "b", "children":[
{"type": "text", "text": "b2"}]},
{"type": "element", "tag": "c", "children":[
{"type": "text", "text": "c2"}]},
{"type": "element", "tag": "b", "children":[
{"type": "text", "text": "b3"}]},
{"type": "element", "tag": "c", "children":[
{"type": "text", "text": "c3"}]}
]} )");
}
TEST_F(XmlParserTest, LargeNumber) {
TestParseXml("<number>18446744073709551616</number>",
R"( {"type": "element",
"tag": "number",
"children": [
{ "type": "text", "text": "18446744073709551616"}
]} )");
}
TEST_F(XmlParserTest, JsonInjection) {
TestParseXml(
R"( <test>{"tag": "woop", "boing": 123, 12: ""foodyums"}</test> )",
R"( {"type": "element",
"tag": "test",
"children": [
{"type": "text",
"text": "{\"tag\": \"woop\", \"boing\": 123, 12: \"\"foodyums\"}"}
]}
)");
}
TEST_F(XmlParserTest, ParseTypicalXml) {
constexpr char kXml[] = R"(<?xml version='1.0' encoding='UTF-8'?>
<!-- This is an XML sample -->
<library xmlns='http://library' xmlns:foo='http://foo.com'>
<book foo:id="k123">
<author>Isaac Newton</author>
<title>Philosophiae Naturalis Principia Mathematica</title>
<genre>Science</genre>
<price>40.95</price>
<publish_date>1947-9-03</publish_date>
</book>
<book foo:id="k456">
<author>Dr. Seuss</author>
<title>Green Eggs and Ham</title>
<genre>Kid</genre>
<foo:kids/>
<price>4.95</price>
<publish_date>1960-8-12</publish_date>
</book>
</library>
)";
constexpr char kJson[] = R"(
{"type": "element",
"tag": "library",
"children": [
{"type": "element",
"tag": "book",
"children": [
{"type": "element",
"tag": "author",
"children": [{"type": "text", "text": "Isaac Newton"}]
},
{"type": "element",
"tag": "title",
"children": [
{"type": "text",
"text": "Philosophiae Naturalis Principia Mathematica"}
]
},
{"type": "element",
"tag": "genre",
"children": [{"type": "text", "text": "Science"}]
},
{"type": "element",
"tag": "price",
"children": [{"type": "text", "text": "40.95"}]
},
{"type": "element",
"tag": "publish_date",
"children": [{"type": "text", "text": "1947-9-03"}]
}
]
},
{"type": "element",
"tag": "book",
"children": [
{"type": "element",
"tag": "author",
"children": [{"type": "text", "text": "Dr. Seuss"}]
},
{"type": "element",
"tag": "title",
"children": [{"type": "text", "text": "Green Eggs and Ham"}]
},
{"type": "element",
"tag": "genre",
"children": [{"type": "text", "text": "Kid"}]
},
{"type": "element",
"tag": "kids"
},
{"type": "element",
"tag": "price",
"children": [{"type": "text", "text": "4.95"}]
},
{"type": "element",
"tag": "publish_date",
"children": [{"type": "text", "text": "1960-8-12"}]
}
]}
]
}
)";
TestParseXml(kXml, kJson);
}
} // namespace data_decoder
......@@ -48,10 +48,39 @@ bool XmlReader::NodeAttribute(const char* name, std::string* out) {
return true;
}
bool XmlReader::GetTextIfTextElement(std::string* content) {
if (NodeType() != XML_READER_TYPE_TEXT)
return false;
*content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
return true;
}
bool XmlReader::GetTextIfCDataElement(std::string* content) {
if (NodeType() != XML_READER_TYPE_CDATA)
return false;
*content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
return true;
}
bool XmlReader::IsClosingElement() {
return NodeType() == XML_READER_TYPE_END_ELEMENT;
}
bool XmlReader::IsEmptyElement() {
return xmlTextReaderIsEmptyElement(reader_);
}
bool XmlReader::IsWhiteSpace() {
return NodeType() == XML_READER_TYPE_WHITESPACE ||
NodeType() == XML_READER_TYPE_SIGNIFICANT_WHITESPACE;
}
bool XmlReader::IsComment() {
return NodeType() == XML_READER_TYPE_COMMENT;
}
bool XmlReader::ReadElementContent(std::string* content) {
const int start_depth = Depth();
......
......@@ -73,9 +73,25 @@ class XmlReader {
// returns true and |value| is set to "a".
bool NodeAttribute(const char* name, std::string* value);
// Sets |content| to the content of the current node if it is a #text/#cdata
// node.
// Returns true if the current node is a #text/#cdata node, false otherwise.
bool GetTextIfTextElement(std::string* content);
bool GetTextIfCDataElement(std::string* content);
// Returns true if the node is a closing element (e.g. </foo>).
bool IsClosingElement();
// Returns true if the current node is an empty (self-closing) element (e.g.
// <foo/>).
bool IsEmptyElement();
// Returns true if the current node is a white-space node.
bool IsWhiteSpace();
// Returns true if the current node is a comment (e.g. <!-- comment -->).
bool IsComment();
// Helper functions not provided by libxml ----------------------------------
// Return the string content within an element.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment