Commit a49d62c7 authored by Robert Sesek's avatar Robert Sesek Committed by Commit Bot

Use the data_decoder service in TemplateURLParser.

This requires making the TemplateURLParser be asynchronous rather than
directly returning the result. That has a ripple effect of changing the
lifetimes of some of the parameters to Parse(), such as the
SearchTermsData.

The Firefox importer also uses the TemplateURLParser (although it may
be entirely broken, per https://crbug.com/868768). The importer assumes
that all operations are synchronous, so this adds an internal helper
class to manage the now-asynchronous state for TemplateURL parsing.

Bug: 699342
Change-Id: I311d9e29dbbca34a4f5696b251a0fbaaadcc506b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879973
Commit-Queue: Robert Sesek <rsesek@chromium.org>
Reviewed-by: default avatarIlya Sherman <isherman@chromium.org>
Reviewed-by: default avatarMartin Barbella <mbarbella@chromium.org>
Reviewed-by: default avatarNico Weber <thakis@chromium.org>
Reviewed-by: default avatarKevin Bailey <krb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#712773}
parent 2aa1c80a
...@@ -5580,6 +5580,7 @@ static_library("test_support") { ...@@ -5580,6 +5580,7 @@ static_library("test_support") {
"//content/test:test_support", "//content/test:test_support",
"//google_apis:test_support", "//google_apis:test_support",
"//net:test_support", "//net:test_support",
"//services/data_decoder/public/cpp:test_support",
"//services/preferences/public/cpp/tracked:test_support", "//services/preferences/public/cpp/tracked:test_support",
"//skia", "//skia",
"//testing/gmock", "//testing/gmock",
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_parser.h" #include "components/search_engines/template_url_parser.h"
#include "components/search_engines/template_url_prepopulate_data.h" #include "components/search_engines/template_url_prepopulate_data.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "ui/base/l10n/l10n_util.h" #include "ui/base/l10n/l10n_util.h"
#include <iterator> #include <iterator>
...@@ -92,53 +93,95 @@ std::unique_ptr<TemplateURL> CreateTemplateURL(const base::string16& url, ...@@ -92,53 +93,95 @@ std::unique_ptr<TemplateURL> CreateTemplateURL(const base::string16& url,
return std::make_unique<TemplateURL>(data); return std::make_unique<TemplateURL>(data);
} }
// Parses the OpenSearch XML files in |xml_files| and populates |search_engines| } // namespace
// with the resulting TemplateURLs.
void ParseSearchEnginesFromFirefoxXMLData(
const std::vector<std::string>& xml_data,
TemplateURLService::OwnedTemplateURLVector* search_engines) {
DCHECK(search_engines);
std::map<std::string, std::unique_ptr<TemplateURL>> search_engine_for_url; // When the Bridge receives the search engines XML data via
// The first XML file represents the default search engine in Firefox 3, so we // SetFirefoxSearchEnginesXMLData(), this class is responsible for managing the
// need to keep it on top of the list. // asynchronous TemplateURL parsing operations. The Bridge generally operates
auto default_turl = search_engine_for_url.end(); // synchronously, so this class manages the state and notifies the bridge when
for (auto xml_iter = xml_data.begin(); xml_iter != xml_data.end(); // parsing is done.
++xml_iter) { class InProcessImporterBridge::SearchEnginesParser {
std::unique_ptr<TemplateURL> template_url = TemplateURLParser::Parse( public:
UIThreadSearchTermsData(), xml_iter->data(), xml_iter->length(), // Starts parsing the |search_engines_xml_data| and will notify |bridge|
base::BindRepeating(&FirefoxURLParameterFilter)); // upon completion.
if (template_url) { SearchEnginesParser(const std::vector<std::string>& search_engines_xml_data,
auto iter = search_engine_for_url.find(template_url->url()); InProcessImporterBridge* bridge)
if (iter == search_engine_for_url.end()) { : bridge_(bridge), data_decoder_(new data_decoder::DataDecoder()) {
iter = search_engine_for_url DCHECK(!search_engines_xml_data.empty());
.insert(std::make_pair(template_url->url(), StartParse(search_engines_xml_data);
std::move(template_url)))
.first;
} else {
// We have already found a search engine with the same URL. We give
// priority to the latest one found, as GetSearchEnginesXMLFiles()
// returns a vector with first Firefox default search engines and then
// the user's ones. We want to give priority to the user ones.
iter->second = std::move(template_url);
} }
if (default_turl == search_engine_for_url.end())
default_turl = iter; // Returns true if all the data have been parsed, false if the operation
// is still ongoing.
bool is_done() const { return is_done_; }
// If InProcessImporterBridge::NotifyEnded() is called before is_done()
// returns true, NotifyEnded() sets this flag so that it can be called back
// to complete the import.
void set_notify_ended_on_completion() { notify_ended_on_completion_ = true; }
private:
void StartParse(const std::vector<std::string>& search_engines_xml_data) {
const auto& last_item = search_engines_xml_data.end() - 1;
TemplateURLParser::ParameterFilter param_filter =
base::BindRepeating(&FirefoxURLParameterFilter);
for (auto it = search_engines_xml_data.begin();
it != search_engines_xml_data.end(); ++it) {
// Because all TemplateURLParser are handled by the same data_decoder_
// instance, the results will be returned FIFO.
// The SearchEnginesParser is owned by the InProcessImporterBridge,
// which is not deleted until NotifyEnded() is called, so using Unretained
// is safe.
TemplateURLParser::ParseWithDataDecoder(
data_decoder_.get(), &search_terms_data_, *it, param_filter,
base::BindOnce(&SearchEnginesParser::OnURLParsed,
base::Unretained(this), it == last_item));
} }
} }
// Put the results in the |search_engines| vector. void OnURLParsed(bool is_last_item, std::unique_ptr<TemplateURL> url) {
for (auto t_iter = search_engine_for_url.begin(); if (url)
t_iter != search_engine_for_url.end(); ++t_iter) { parsed_urls_.push_back(std::move(url));
if (t_iter == default_turl)
search_engines->insert(search_engines->begin(), if (is_last_item)
std::move(default_turl->second)); FinishParsing();
else
search_engines->push_back(std::move(t_iter->second));
} }
}
} // namespace void FinishParsing() {
is_done_ = true;
// Shut down the DataDecoder.
data_decoder_.reset();
bridge_->WriteSearchEngines(std::move(parsed_urls_));
if (notify_ended_on_completion_)
bridge_->NotifyEnded();
}
// Storage for the URLs. These are stored in the same order as the original
// |search_engines_xml_data|.
TemplateURLService::OwnedTemplateURLVector parsed_urls_;
InProcessImporterBridge* bridge_; // Weak, owns this.
// Set to true if the last search engine has been parsed.
bool is_done_ = false;
// Set to true if the ImporterBridge has been NotifyEnded() already but was
// waiting on this class to finish the import.
bool notify_ended_on_completion_ = false;
// Parameter for TemplateURLParser.
UIThreadSearchTermsData search_terms_data_;
// The DataDecoder instance that is shared amongst all the TemplateURLs being
// parsed.
std::unique_ptr<data_decoder::DataDecoder> data_decoder_;
DISALLOW_COPY_AND_ASSIGN(SearchEnginesParser);
};
InProcessImporterBridge::InProcessImporterBridge( InProcessImporterBridge::InProcessImporterBridge(
ProfileWriter* writer, ProfileWriter* writer,
...@@ -186,10 +229,10 @@ void InProcessImporterBridge::SetKeywords( ...@@ -186,10 +229,10 @@ void InProcessImporterBridge::SetKeywords(
void InProcessImporterBridge::SetFirefoxSearchEnginesXMLData( void InProcessImporterBridge::SetFirefoxSearchEnginesXMLData(
const std::vector<std::string>& search_engine_data) { const std::vector<std::string>& search_engine_data) {
TemplateURLService::OwnedTemplateURLVector search_engines; if (!search_engine_data.empty()) {
ParseSearchEnginesFromFirefoxXMLData(search_engine_data, &search_engines); // SearchEnginesParser will call back the Bridge back when it is done.
search_engines_.reset(new SearchEnginesParser(search_engine_data, this));
writer_->AddKeywords(std::move(search_engines), true); }
} }
void InProcessImporterBridge::SetPasswordForm( void InProcessImporterBridge::SetPasswordForm(
...@@ -228,6 +271,13 @@ void InProcessImporterBridge::NotifyItemEnded(importer::ImportItem item) { ...@@ -228,6 +271,13 @@ void InProcessImporterBridge::NotifyItemEnded(importer::ImportItem item) {
} }
void InProcessImporterBridge::NotifyEnded() { void InProcessImporterBridge::NotifyEnded() {
// If there are search engines to parse but parsing them is not yet complete,
// arrange to be called back when they are done.
if (search_engines_ && !search_engines_->is_done()) {
search_engines_->set_notify_ended_on_completion();
return;
}
host_->NotifyImportEnded(); host_->NotifyImportEnded();
} }
...@@ -236,3 +286,35 @@ base::string16 InProcessImporterBridge::GetLocalizedString(int message_id) { ...@@ -236,3 +286,35 @@ base::string16 InProcessImporterBridge::GetLocalizedString(int message_id) {
} }
InProcessImporterBridge::~InProcessImporterBridge() {} InProcessImporterBridge::~InProcessImporterBridge() {}
void InProcessImporterBridge::WriteSearchEngines(
TemplateURL::OwnedTemplateURLVector template_urls) {
std::map<std::string, std::unique_ptr<TemplateURL>> search_engine_for_url;
for (auto& template_url : template_urls) {
std::string key = template_url->url();
// Give priority to the latest template URL that is found, as
// GetSearchEnginesXMLFiles() returns a vector with first Firefox default
// search engines and then the user's ones. The user ones should take
// precedence.
search_engine_for_url[key] = std::move(template_url);
}
// The first URL represents the default search engine in Firefox 3, so we
// need to keep it on top of the list.
auto default_turl = search_engine_for_url.end();
if (!template_urls.empty())
default_turl = search_engine_for_url.find(template_urls[0]->url());
// Put the results in the |search_engines| vector.
TemplateURLService::OwnedTemplateURLVector search_engines;
for (auto it = search_engine_for_url.begin();
it != search_engine_for_url.end(); ++it) {
if (it == default_turl) {
search_engines.insert(search_engines.begin(),
std::move(default_turl->second));
} else {
search_engines.push_back(std::move(it->second));
}
}
writer_->AddKeywords(std::move(search_engines), true);
}
...@@ -60,10 +60,19 @@ class InProcessImporterBridge : public ImporterBridge { ...@@ -60,10 +60,19 @@ class InProcessImporterBridge : public ImporterBridge {
// End ImporterBridge implementation. // End ImporterBridge implementation.
private: private:
class SearchEnginesParser;
friend class SearchEnginesParser;
~InProcessImporterBridge() override; ~InProcessImporterBridge() override;
// Called by the SearchEnginesParser when all the search engines have been
// parsed. The |template_urls| vector is in the same sort order that was
// passed to SetFirefoxSearchEnginesXMLData().
void WriteSearchEngines(TemplateURL::OwnedTemplateURLVector template_urls);
ProfileWriter* const writer_; // weak ProfileWriter* const writer_; // weak
const base::WeakPtr<ExternalProcessImporterHost> host_; const base::WeakPtr<ExternalProcessImporterHost> host_;
std::unique_ptr<SearchEnginesParser> search_engines_;
DISALLOW_COPY_AND_ASSIGN(InProcessImporterBridge); DISALLOW_COPY_AND_ASSIGN(InProcessImporterBridge);
}; };
......
...@@ -10,9 +10,11 @@ ...@@ -10,9 +10,11 @@
#include "base/path_service.h" #include "base/path_service.h"
#include "base/stl_util.h" #include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/test/task_environment.h"
#include "chrome/common/chrome_paths.h" #include "chrome/common/chrome_paths.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using base::ASCIIToUTF16; using base::ASCIIToUTF16;
...@@ -39,15 +41,25 @@ class TemplateURLParserTest : public testing::Test { ...@@ -39,15 +41,25 @@ class TemplateURLParserTest : public testing::Test {
void ParseFile(const std::string& file_name, void ParseFile(const std::string& file_name,
const TemplateURLParser::ParameterFilter& filter); const TemplateURLParser::ParameterFilter& filter);
void ParseString(const std::string& data,
const TemplateURLParser::ParameterFilter& filter);
// ParseFile parses the results into this template_url. // ParseFile parses the results into this template_url.
std::unique_ptr<TemplateURL> template_url_; std::unique_ptr<TemplateURL> template_url_;
private: private:
void OnTemplateURLParsed(base::OnceClosure quit_closure,
std::unique_ptr<TemplateURL> template_url) {
template_url_ = std::move(template_url);
std::move(quit_closure).Run();
}
base::FilePath osdd_dir_; base::FilePath osdd_dir_;
base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder data_decoder_;
}; };
TemplateURLParserTest::TemplateURLParserTest() { TemplateURLParserTest::TemplateURLParserTest() {}
}
TemplateURLParserTest::~TemplateURLParserTest() { TemplateURLParserTest::~TemplateURLParserTest() {
} }
...@@ -66,8 +78,19 @@ void TemplateURLParserTest::ParseFile( ...@@ -66,8 +78,19 @@ void TemplateURLParserTest::ParseFile(
std::string contents; std::string contents;
ASSERT_TRUE(base::ReadFileToString(full_path, &contents)); ASSERT_TRUE(base::ReadFileToString(full_path, &contents));
template_url_ = TemplateURLParser::Parse(SearchTermsData(), contents.data(), ParseString(contents, filter);
contents.length(), filter); }
void TemplateURLParserTest::ParseString(
const std::string& data,
const TemplateURLParser::ParameterFilter& filter) {
base::RunLoop run_loop;
SearchTermsData search_terms_data;
TemplateURLParser::Parse(
&search_terms_data, data, filter,
base::BindOnce(&TemplateURLParserTest::OnTemplateURLParsed,
base::Unretained(this), run_loop.QuitClosure()));
run_loop.Run();
} }
// Actual tests --------------------------------------------------------------- // Actual tests ---------------------------------------------------------------
...@@ -249,6 +272,5 @@ TEST_F(TemplateURLParserTest, InvalidInput) { ...@@ -249,6 +272,5 @@ TEST_F(TemplateURLParserTest, InvalidInput) {
</Url> </Url>
</OpenSearchDescription> </OpenSearchDescription>
)"; )";
TemplateURLParser::Parse(SearchTermsData(), char_data, base::size(char_data), ParseString(char_data, filter);
filter);
} }
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_data.h" #include "components/search_engines/template_url_data.h"
#include "components/search_engines/template_url_service_observer.h" #include "components/search_engines/template_url_service_observer.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
class KeywordWebDataService; class KeywordWebDataService;
class TemplateURLService; class TemplateURLService;
...@@ -83,6 +84,7 @@ class TemplateURLServiceTestUtil : public TemplateURLServiceObserver { ...@@ -83,6 +84,7 @@ class TemplateURLServiceTestUtil : public TemplateURLServiceObserver {
base::string16 search_term_; base::string16 search_term_;
scoped_refptr<KeywordWebDataService> web_data_service_; scoped_refptr<KeywordWebDataService> web_data_service_;
std::unique_ptr<TemplateURLService> model_; std::unique_ptr<TemplateURLService> model_;
data_decoder::test::InProcessDataDecoder data_decoder_;
DISALLOW_COPY_AND_ASSIGN(TemplateURLServiceTestUtil); DISALLOW_COPY_AND_ASSIGN(TemplateURLServiceTestUtil);
}; };
......
...@@ -48,6 +48,7 @@ static_library("utility") { ...@@ -48,6 +48,7 @@ static_library("utility") {
"//media", "//media",
"//net:net_with_v8", "//net:net_with_v8",
"//printing/buildflags", "//printing/buildflags",
"//services/data_decoder:lib",
"//services/network:network_service", "//services/network:network_service",
"//services/service_manager/public/cpp", "//services/service_manager/public/cpp",
"//skia", "//skia",
...@@ -100,7 +101,6 @@ static_library("utility") { ...@@ -100,7 +101,6 @@ static_library("utility") {
"//chrome/common:mojo_bindings", "//chrome/common:mojo_bindings",
"//chrome/common/importer:interfaces", "//chrome/common/importer:interfaces",
"//components/autofill/core/common", "//components/autofill/core/common",
"//services/data_decoder:lib",
"//services/proxy_resolver:lib", "//services/proxy_resolver:lib",
] ]
} }
......
...@@ -70,9 +70,9 @@ static_library("search_engines") { ...@@ -70,9 +70,9 @@ static_library("search_engines") {
"//components/variations", "//components/variations",
"//google_apis", "//google_apis",
"//net", "//net",
"//services/data_decoder/public/cpp",
"//services/network/public/cpp", "//services/network/public/cpp",
"//sql", "//sql",
"//third_party/libxml", # https://crbug.com/699342
"//third_party/metrics_proto", "//third_party/metrics_proto",
"//ui/base", "//ui/base",
"//ui/gfx", "//ui/gfx",
......
...@@ -15,8 +15,8 @@ include_rules = [ ...@@ -15,8 +15,8 @@ include_rules = [
"+components/variations", "+components/variations",
"+components/webdata", "+components/webdata",
"+google_apis", "+google_apis",
"+libxml",
"+net", "+net",
"+services/data_decoder/public",
"+services/network/public/cpp", "+services/network/public/cpp",
"+services/network/test", "+services/network/test",
"+sql", "+sql",
......
...@@ -80,6 +80,7 @@ class TemplateURLFetcher::RequestDelegate { ...@@ -80,6 +80,7 @@ class TemplateURLFetcher::RequestDelegate {
base::string16 keyword() const { return keyword_; } base::string16 keyword() const { return keyword_; }
private: private:
void OnTemplateURLParsed(std::unique_ptr<TemplateURL> template_url);
void OnLoaded(); void OnLoaded();
void AddSearchProvider(); void AddSearchProvider();
...@@ -140,6 +141,25 @@ TemplateURLFetcher::RequestDelegate::RequestDelegate( ...@@ -140,6 +141,25 @@ TemplateURLFetcher::RequestDelegate::RequestDelegate(
50000 /* max_body_size */); 50000 /* max_body_size */);
} }
void TemplateURLFetcher::RequestDelegate::OnTemplateURLParsed(
std::unique_ptr<TemplateURL> template_url) {
template_url_ = std::move(template_url);
if (!template_url_ ||
!template_url_->url_ref().SupportsReplacement(
fetcher_->template_url_service_->search_terms_data())) {
fetcher_->RequestCompleted(this);
// WARNING: RequestCompleted deletes us.
return;
}
// Wait for the model to be loaded before adding the provider.
if (!fetcher_->template_url_service_->loaded())
return;
AddSearchProvider();
// WARNING: AddSearchProvider deletes us.
}
void TemplateURLFetcher::RequestDelegate::OnLoaded() { void TemplateURLFetcher::RequestDelegate::OnLoaded() {
template_url_subscription_.reset(); template_url_subscription_.reset();
if (!template_url_) if (!template_url_)
...@@ -158,23 +178,11 @@ void TemplateURLFetcher::RequestDelegate::OnSimpleLoaderComplete( ...@@ -158,23 +178,11 @@ void TemplateURLFetcher::RequestDelegate::OnSimpleLoaderComplete(
return; return;
} }
template_url_ = TemplateURLParser::Parse( TemplateURLParser::Parse(
fetcher_->template_url_service_->search_terms_data(), &fetcher_->template_url_service_->search_terms_data(),
response_body->data(), response_body->length(), *response_body.get(), TemplateURLParser::ParameterFilter(),
TemplateURLParser::ParameterFilter()); base::BindOnce(&RequestDelegate::OnTemplateURLParsed,
if (!template_url_ || base::Unretained(this)));
!template_url_->url_ref().SupportsReplacement(
fetcher_->template_url_service_->search_terms_data())) {
fetcher_->RequestCompleted(this);
// WARNING: RequestCompleted deletes us.
return;
}
// Wait for the model to be loaded before adding the provider.
if (!fetcher_->template_url_service_->loaded())
return;
AddSearchProvider();
// WARNING: AddSearchProvider deletes us.
} }
void TemplateURLFetcher::RequestDelegate::AddSearchProvider() { void TemplateURLFetcher::RequestDelegate::AddSearchProvider() {
......
...@@ -7,29 +7,26 @@ ...@@ -7,29 +7,26 @@
#include <string.h> #include <string.h>
#include <algorithm> #include <algorithm>
#include <map>
#include <memory>
#include <vector> #include <vector>
#include "base/bind.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "libxml/parser.h" #include "services/data_decoder/public/cpp/data_decoder.h"
#include "libxml/xmlwriter.h" #include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "services/data_decoder/public/mojom/xml_parser.mojom.h"
#include "ui/gfx/favicon_size.h" #include "ui/gfx/favicon_size.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/url_constants.h" #include "url/url_constants.h"
namespace { namespace {
// NOTE: libxml uses the UTF-8 encoding. As 0-127 of UTF-8 corresponds
// to that of char, the following names are all in terms of char. This avoids
// having to convert to wide, then do comparisons.
// Defines for element names of the OSD document: // Defines for element names of the OSD document:
const char kURLElement[] = "Url"; const char kURLElement[] = "Url";
const char kParamElement[] = "Param"; const char kParamElement[] = "Param";
...@@ -56,10 +53,6 @@ const char kHTMLType[] = "text/html"; ...@@ -56,10 +53,6 @@ const char kHTMLType[] = "text/html";
// Mime type for as you type suggestions. // Mime type for as you type suggestions.
const char kSuggestionType[] = "application/x-suggestions+json"; const char kSuggestionType[] = "application/x-suggestions+json";
std::string XMLCharToString(const xmlChar* value) {
return std::string(reinterpret_cast<const char*>(value));
}
// Returns true if input_encoding contains a valid input encoding string. This // Returns true if input_encoding contains a valid input encoding string. This
// doesn't verify that we have a valid encoding for the string, just that the // doesn't verify that we have a valid encoding for the string, just that the
// string contains characters that constitute a valid input encoding. // string contains characters that constitute a valid input encoding.
...@@ -101,342 +94,176 @@ bool IsHTTPRef(const std::string& url) { ...@@ -101,342 +94,176 @@ bool IsHTTPRef(const std::string& url) {
gurl.SchemeIs(url::kHttpsScheme)); gurl.SchemeIs(url::kHttpsScheme));
} }
} // namespace // SafeTemplateURLParser takes the output of the data_decoder service's
// XmlParser and extracts the data from the search description into a
// TemplateURL.
// TemplateURLParsingContext -------------------------------------------------- class SafeTemplateURLParser {
// To minimize memory overhead while parsing, a SAX style parser is used.
// TemplateURLParsingContext is used to maintain the state we're in the document
// while parsing.
class TemplateURLParsingContext {
public: public:
// Enum of the known element types.
enum ElementType {
UNKNOWN,
OPEN_SEARCH_DESCRIPTION,
URL,
PARAM,
SHORT_NAME,
IMAGE,
INPUT_ENCODING,
ALIAS,
};
enum Method { enum Method {
GET, GET,
POST POST
}; };
// Key/value of a Param node. // Key/value of a Param node.
typedef std::pair<std::string, std::string> Param; using Param = std::pair<std::string, std::string>;
explicit TemplateURLParsingContext( SafeTemplateURLParser(
const TemplateURLParser::ParameterFilter& parameter_filter); const SearchTermsData* search_terms_data,
const TemplateURLParser::ParameterFilter& parameter_filter,
TemplateURLParser::ParseCallback callback)
: search_terms_data_(search_terms_data),
parameter_filter_(parameter_filter),
callback_(std::move(callback)) {}
static void StartElementImpl(void* ctx, SafeTemplateURLParser(const SafeTemplateURLParser&) = delete;
const xmlChar* name, SafeTemplateURLParser& operator=(const SafeTemplateURLParser&) = delete;
const xmlChar** atts);
static void EndElementImpl(void* ctx, const xmlChar* name);
static void CharactersImpl(void* ctx, const xmlChar* ch, int len);
// Returns a TemplateURL representing the result of parsing. This will be // Parse callback for DataDecoder::ParseXml(). This calls the callback
// null if parsing failed or if the results were invalid for some reason (e.g. // passed to the constructor upon completion.
// the resulting URL was not HTTP[S], a name wasn't supplied, a resulting void OnXmlParseComplete(
// TemplateURLRef was invalid, etc.). data_decoder::DataDecoder::ValueOrError value_or_error);
std::unique_ptr<TemplateURL> GetTemplateURL(
const SearchTermsData& search_terms_data);
private: private:
// Key is UTF8 encoded. void ParseURLs(const std::vector<const base::Value*>& urls);
typedef std::map<std::string, ElementType> ElementNameToElementTypeMap; void ParseImages(const std::vector<const base::Value*>& images);
void ParseEncodings(const std::vector<const base::Value*>& encodings);
void ParseAliases(const std::vector<const base::Value*>& aliases);
static void InitMapping(); std::unique_ptr<TemplateURL> FinalizeTemplateURL();
void ParseURL(const xmlChar** atts); // Returns all child elements of |elem| named |tag|, which are searched
void ParseImage(const xmlChar** atts); // for using the XML qualified namespaces in |namespaces_|.
void ParseParam(const xmlChar** atts); bool GetChildElementsByTag(const base::Value& elem,
void ProcessURLParams(); const std::string& tag,
std::vector<const base::Value*>* children);
// Returns the current ElementType.
ElementType GetKnownType();
static ElementNameToElementTypeMap* kElementNameToElementTypeMap;
// Data that gets updated as we parse, and is converted to a TemplateURL by // Data that gets updated as we parse, and is converted to a TemplateURL by
// GetTemplateURL(). // FinalizeTemplateURL().
TemplateURLData data_; TemplateURLData data_;
std::vector<ElementType> elements_;
bool image_is_valid_for_favicon_;
// Character content for the current element.
base::string16 string_;
const TemplateURLParser::ParameterFilter& parameter_filter_;
// The list of parameters parsed in the Param nodes of a Url node.
std::vector<Param> extra_params_;
// The HTTP methods used. // The HTTP methods used.
Method method_; Method method_ = GET;
Method suggestion_method_; Method suggestion_method_ = GET;
// If true, we are currently parsing a suggest URL, otherwise it is an HTML
// search. Note that we don't need a stack as URL nodes cannot be nested.
bool is_suggest_url_;
// If true, the user has set a keyword and we should use it. Otherwise, // If true, the user has set a keyword and we should use it. Otherwise,
// we generate a keyword based on the URL. // we generate a keyword based on the URL.
bool has_custom_keyword_; bool has_custom_keyword_ = false;
// Whether we should derive the image from the URL (when images are data // Whether we should derive the image from the URL (when images are data
// URLs). // URLs).
bool derive_image_from_url_; bool derive_image_from_url_ = false;
DISALLOW_COPY_AND_ASSIGN(TemplateURLParsingContext); // The XML namespaces that were declared on the root element. These are used
}; // to search for tags by name in GetChildElementsByTag(). Will always contain
// at least one element, if only the empty string.
std::vector<std::string> namespaces_;
// static const SearchTermsData* search_terms_data_;
TemplateURLParsingContext::ElementNameToElementTypeMap* TemplateURLParser::ParameterFilter parameter_filter_;
TemplateURLParsingContext::kElementNameToElementTypeMap = nullptr; TemplateURLParser::ParseCallback callback_;
};
TemplateURLParsingContext::TemplateURLParsingContext(
const TemplateURLParser::ParameterFilter& parameter_filter)
: image_is_valid_for_favicon_(false),
parameter_filter_(parameter_filter),
method_(GET),
suggestion_method_(GET),
is_suggest_url_(false),
has_custom_keyword_(false),
derive_image_from_url_(false) {
if (kElementNameToElementTypeMap == nullptr)
InitMapping();
}
// static void SafeTemplateURLParser::OnXmlParseComplete(
void TemplateURLParsingContext::StartElementImpl(void* ctx, data_decoder::DataDecoder::ValueOrError value_or_error) {
const xmlChar* name, if (value_or_error.error) {
const xmlChar** atts) { DLOG(ERROR) << "Failed to parse XML: " << *value_or_error.error;
// Remove the namespace from |name|, ex: os:Url -> Url. std::move(callback_).Run(nullptr);
std::string node_name(XMLCharToString(name)); return;
size_t index = node_name.find_first_of(":");
if (index != std::string::npos)
node_name.erase(0, index + 1);
TemplateURLParsingContext* context =
reinterpret_cast<TemplateURLParsingContext*>(ctx);
context->elements_.push_back(
context->kElementNameToElementTypeMap->count(node_name) ?
(*context->kElementNameToElementTypeMap)[node_name] : UNKNOWN);
switch (context->GetKnownType()) {
case TemplateURLParsingContext::URL:
context->extra_params_.clear();
context->ParseURL(atts);
break;
case TemplateURLParsingContext::IMAGE:
context->ParseImage(atts);
break;
case TemplateURLParsingContext::PARAM:
context->ParseParam(atts);
break;
default:
break;
} }
context->string_.clear();
}
// static const base::Value& root = *value_or_error.value;
void TemplateURLParsingContext::EndElementImpl(void* ctx, const xmlChar* name) {
TemplateURLParsingContext* context = // Get the namespaces used in the XML document, which will be used
reinterpret_cast<TemplateURLParsingContext*>(ctx); // to access nodes by tag name in GetChildElementsByTag().
switch (context->GetKnownType()) { if (const base::Value* namespaces =
case TemplateURLParsingContext::URL: root.FindDictKey(data_decoder::mojom::XmlParser::kNamespacesKey)) {
context->ProcessURLParams(); for (const auto& item : namespaces->DictItems()) {
break; namespaces_.push_back(item.first);
case TemplateURLParsingContext::SHORT_NAME:
context->data_.SetShortName(context->string_);
break;
case TemplateURLParsingContext::IMAGE: {
GURL image_url(base::UTF16ToUTF8(context->string_));
if (image_url.SchemeIs(url::kDataScheme)) {
// TODO (jcampan): bug 1169256: when dealing with data URL, we need to
// decode the data URL in the renderer. For now, we'll just point to the
// favicon from the URL.
context->derive_image_from_url_ = true;
} else if (context->image_is_valid_for_favicon_ && image_url.is_valid() &&
(image_url.SchemeIs(url::kHttpScheme) ||
image_url.SchemeIs(url::kHttpsScheme))) {
context->data_.favicon_url = image_url;
}
context->image_is_valid_for_favicon_ = false;
break;
}
case TemplateURLParsingContext::INPUT_ENCODING: {
std::string input_encoding = base::UTF16ToASCII(context->string_);
if (IsValidEncodingString(input_encoding))
context->data_.input_encodings.push_back(input_encoding);
break;
}
case TemplateURLParsingContext::ALIAS: {
if (!context->string_.empty()) {
context->data_.SetKeyword(context->string_);
context->has_custom_keyword_ = true;
} }
break;
} }
default: if (namespaces_.empty())
break; namespaces_.push_back(std::string());
std::string root_tag;
if (!data_decoder::GetXmlElementTagName(root, &root_tag) ||
(root_tag != kOpenSearchDescriptionElement &&
root_tag != kFirefoxSearchDescriptionElement)) {
DLOG(ERROR) << "Unexpected root tag: " << root_tag;
std::move(callback_).Run(nullptr);
return;
} }
context->string_.clear();
context->elements_.pop_back();
}
// static // The only required element is the URL.
void TemplateURLParsingContext::CharactersImpl(void* ctx, std::vector<const base::Value*> urls;
const xmlChar* ch, if (!GetChildElementsByTag(root, kURLElement, &urls)) {
int len) { std::move(callback_).Run(nullptr);
reinterpret_cast<TemplateURLParsingContext*>(ctx)->string_ += return;
base::UTF8ToUTF16( }
base::StringPiece(reinterpret_cast<const char*>(ch), len)); ParseURLs(urls);
}
std::unique_ptr<TemplateURL> TemplateURLParsingContext::GetTemplateURL( std::vector<const base::Value*> images;
const SearchTermsData& search_terms_data) { if (GetChildElementsByTag(root, kImageElement, &images))
// TODO(jcampan): Support engines that use POST; see http://crbug.com/18107 ParseImages(images);
if (method_ == TemplateURLParsingContext::POST || !IsHTTPRef(data_.url()) ||
!IsHTTPRef(data_.suggestions_url))
return nullptr;
if (suggestion_method_ == TemplateURLParsingContext::POST)
data_.suggestions_url.clear();
// If the image was a data URL, use the favicon from the search URL instead. std::vector<const base::Value*> encodings;
// (see the TODO in EndElementImpl()). if (GetChildElementsByTag(root, kInputEncodingElement, &encodings))
GURL search_url(data_.url()); ParseEncodings(encodings);
if (derive_image_from_url_ && data_.favicon_url.is_empty())
data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url);
// Generate a keyword for this search engine if a custom one was not present std::vector<const base::Value*> aliases;
// in the imported data. if (GetChildElementsByTag(root, kAliasElement, &aliases))
if (!has_custom_keyword_) ParseAliases(aliases);
data_.SetKeyword(TemplateURL::GenerateKeyword(search_url));
// If the OSDD omits or has an empty short name, use the keyword.
if (data_.short_name().empty())
data_.SetShortName(data_.keyword());
// Bail if the search URL is empty or if either TemplateURLRef is invalid. std::vector<const base::Value*> short_names;
std::unique_ptr<TemplateURL> template_url = if (GetChildElementsByTag(root, kShortNameElement, &short_names)) {
std::make_unique<TemplateURL>(data_); std::string name;
if (template_url->url().empty() || if (data_decoder::GetXmlElementText(*short_names.back(), &name))
!template_url->url_ref().IsValid(search_terms_data) || data_.SetShortName(base::UTF8ToUTF16(name));
(!template_url->suggestions_url().empty() &&
!template_url->suggestions_url_ref().IsValid(search_terms_data))) {
return nullptr;
} }
return template_url; std::move(callback_).Run(FinalizeTemplateURL());
}
// static
void TemplateURLParsingContext::InitMapping() {
kElementNameToElementTypeMap = new std::map<std::string, ElementType>;
(*kElementNameToElementTypeMap)[kURLElement] = URL;
(*kElementNameToElementTypeMap)[kParamElement] = PARAM;
(*kElementNameToElementTypeMap)[kShortNameElement] = SHORT_NAME;
(*kElementNameToElementTypeMap)[kImageElement] = IMAGE;
(*kElementNameToElementTypeMap)[kOpenSearchDescriptionElement] =
OPEN_SEARCH_DESCRIPTION;
(*kElementNameToElementTypeMap)[kFirefoxSearchDescriptionElement] =
OPEN_SEARCH_DESCRIPTION;
(*kElementNameToElementTypeMap)[kInputEncodingElement] = INPUT_ENCODING;
(*kElementNameToElementTypeMap)[kAliasElement] = ALIAS;
} }
void TemplateURLParsingContext::ParseURL(const xmlChar** atts) { void SafeTemplateURLParser::ParseURLs(
if (!atts) const std::vector<const base::Value*>& urls) {
return; for (auto* url : urls) {
std::string template_url =
std::string template_url; data_decoder::GetXmlElementAttribute(*url, kURLTemplateAttribute);
bool is_post = false; std::string type =
bool is_html_url = false; data_decoder::GetXmlElementAttribute(*url, kURLTypeAttribute);
bool is_suggest_url = false; bool is_post = base::LowerCaseEqualsASCII(
for (; *atts; atts += 2) { data_decoder::GetXmlElementAttribute(*url, kParamMethodAttribute),
std::string name(XMLCharToString(*atts)); "post");
const xmlChar* value = atts[1]; bool is_html_url = (type == kHTMLType);
if (name == kURLTypeAttribute) { bool is_suggest_url = (type == kSuggestionType);
std::string type = XMLCharToString(value);
is_html_url = (type == kHTMLType);
is_suggest_url = (type == kSuggestionType);
} else if (name == kURLTemplateAttribute) {
template_url = XMLCharToString(value);
} else if (name == kParamMethodAttribute) {
is_post = base::LowerCaseEqualsASCII(XMLCharToString(value), "post");
}
}
if (is_html_url && !template_url.empty()) { if (is_html_url && !template_url.empty()) {
data_.SetURL(template_url); data_.SetURL(template_url);
is_suggest_url_ = false; is_suggest_url = false;
if (is_post) if (is_post)
method_ = POST; method_ = POST;
} else if (is_suggest_url) { } else if (is_suggest_url) {
data_.suggestions_url = template_url; data_.suggestions_url = template_url;
is_suggest_url_ = true;
if (is_post) if (is_post)
suggestion_method_ = POST; suggestion_method_ = POST;
} }
}
void TemplateURLParsingContext::ParseImage(const xmlChar** atts) { std::vector<Param> extra_params;
if (!atts)
return;
int width = 0; std::vector<const base::Value*> params;
int height = 0; GetChildElementsByTag(*url, kParamElement, &params);
std::string type; for (auto* param : params) {
for (; *atts; atts += 2) { std::string key =
std::string name(XMLCharToString(*atts)); data_decoder::GetXmlElementAttribute(*param, kParamNameAttribute);
const xmlChar* value = atts[1]; std::string value =
if (name == kImageTypeAttribute) { data_decoder::GetXmlElementAttribute(*param, kParamValueAttribute);
type = XMLCharToString(value); if (!key.empty() &&
} else if (name == kImageWidthAttribute) { (parameter_filter_.is_null() || parameter_filter_.Run(key, value))) {
base::StringToInt(XMLCharToString(value), &width); extra_params.push_back(Param(key, value));
} else if (name == kImageHeightAttribute) {
base::StringToInt(XMLCharToString(value), &height);
}
}
image_is_valid_for_favicon_ = (width == gfx::kFaviconSize) &&
(height == gfx::kFaviconSize) &&
((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
}
void TemplateURLParsingContext::ParseParam(const xmlChar** atts) {
if (!atts)
return;
std::string key, value;
for (; *atts; atts += 2) {
std::string name(XMLCharToString(*atts));
const xmlChar* val = atts[1];
if (name == kParamNameAttribute) {
key = XMLCharToString(val);
} else if (name == kParamValueAttribute) {
value = XMLCharToString(val);
} }
} }
if (!key.empty() && if (!parameter_filter_.is_null() || !extra_params.empty()) {
(parameter_filter_.is_null() || parameter_filter_.Run(key, value))) GURL url(is_suggest_url ? data_.suggestions_url : data_.url());
extra_params_.push_back(Param(key, value));
}
void TemplateURLParsingContext::ProcessURLParams() {
if (parameter_filter_.is_null() && extra_params_.empty())
return;
GURL url(is_suggest_url_ ? data_.suggestions_url : data_.url());
if (!url.is_valid()) if (!url.is_valid())
return; return;
...@@ -462,56 +289,163 @@ void TemplateURLParsingContext::ProcessURLParams() { ...@@ -462,56 +289,163 @@ void TemplateURLParsingContext::ProcessURLParams() {
new_query = url.query(); new_query = url.query();
// Add the extra parameters if any. // Add the extra parameters if any.
if (!extra_params_.empty()) { if (!extra_params.empty()) {
modified = true; modified = true;
for (std::vector<Param>::const_iterator iter(extra_params_.begin()); for (const auto& iter : extra_params)
iter != extra_params_.end(); ++iter) AppendParamToQuery(iter.first, iter.second, &new_query);
AppendParamToQuery(iter->first, iter->second, &new_query);
} }
if (modified) { if (modified) {
GURL::Replacements repl; GURL::Replacements repl;
repl.SetQueryStr(new_query); repl.SetQueryStr(new_query);
url = url.ReplaceComponents(repl); url = url.ReplaceComponents(repl);
if (is_suggest_url_) if (is_suggest_url)
data_.suggestions_url = url.spec(); data_.suggestions_url = url.spec();
else if (url.is_valid()) else if (url.is_valid())
data_.SetURL(url.spec()); data_.SetURL(url.spec());
} }
}
}
} }
TemplateURLParsingContext::ElementType void SafeTemplateURLParser::ParseImages(
TemplateURLParsingContext::GetKnownType() { const std::vector<const base::Value*>& images) {
if (elements_.size() == 2 && elements_[0] == OPEN_SEARCH_DESCRIPTION) for (auto* image : images) {
return elements_[1]; std::string url_string;
// We only expect PARAM nodes under the URL node. if (!data_decoder::GetXmlElementText(*image, &url_string))
return (elements_.size() == 3 && elements_[0] == OPEN_SEARCH_DESCRIPTION && continue;
elements_[1] == URL && elements_[2] == PARAM) ? PARAM : UNKNOWN;
std::string type =
data_decoder::GetXmlElementAttribute(*image, kImageTypeAttribute);
int width = 0;
int height = 0;
base::StringToInt(
data_decoder::GetXmlElementAttribute(*image, kImageWidthAttribute),
&width);
base::StringToInt(
data_decoder::GetXmlElementAttribute(*image, kImageHeightAttribute),
&height);
bool image_is_valid_for_favicon =
(width == gfx::kFaviconSize) && (height == gfx::kFaviconSize) &&
((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
GURL image_url(url_string);
if (image_url.SchemeIs(url::kDataScheme)) {
// TODO(jcampan): bug 1169256: when dealing with data URL, we need to
// decode the data URL in the renderer. For now, we'll just point to the
// favicon from the URL.
derive_image_from_url_ = true;
} else if (image_is_valid_for_favicon && image_url.is_valid() &&
(image_url.SchemeIs(url::kHttpScheme) ||
image_url.SchemeIs(url::kHttpsScheme))) {
data_.favicon_url = image_url;
}
image_is_valid_for_favicon = false;
}
} }
void SafeTemplateURLParser::ParseEncodings(
const std::vector<const base::Value*>& encodings) {
for (auto* encoding : encodings) {
std::string encoding_value;
if (data_decoder::GetXmlElementText(*encoding, &encoding_value)) {
if (IsValidEncodingString(encoding_value))
data_.input_encodings.push_back(encoding_value);
}
}
}
void SafeTemplateURLParser::ParseAliases(
const std::vector<const base::Value*>& aliases) {
for (auto* alias : aliases) {
std::string alias_value;
if (data_decoder::GetXmlElementText(*alias, &alias_value)) {
data_.SetKeyword(base::UTF8ToUTF16(alias_value));
has_custom_keyword_ = true;
}
}
}
std::unique_ptr<TemplateURL> SafeTemplateURLParser::FinalizeTemplateURL() {
// TODO(https://crbug.com/18107): Support engines that use POST.
if (method_ == POST || !IsHTTPRef(data_.url()) ||
!IsHTTPRef(data_.suggestions_url)) {
DLOG(ERROR) << "POST URLs are not supported";
return nullptr;
}
if (suggestion_method_ == POST)
data_.suggestions_url.clear();
// If the image was a data URL, use the favicon from the search URL instead.
// (see the TODO in ParseImages()).
GURL search_url(data_.url());
if (derive_image_from_url_ && data_.favicon_url.is_empty())
data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url);
// Generate a keyword for this search engine if a custom one was not present
// in the imported data.
if (!has_custom_keyword_)
data_.SetKeyword(TemplateURL::GenerateKeyword(search_url));
// If the OSDD omits or has an empty short name, use the keyword.
if (data_.short_name().empty())
data_.SetShortName(data_.keyword());
// Bail if the search URL is empty or if either TemplateURLRef is invalid.
std::unique_ptr<TemplateURL> template_url =
std::make_unique<TemplateURL>(data_);
if (template_url->url().empty() ||
!template_url->url_ref().IsValid(*search_terms_data_) ||
(!template_url->suggestions_url().empty() &&
!template_url->suggestions_url_ref().IsValid(*search_terms_data_))) {
DLOG(ERROR) << "Template URL is not valid";
return nullptr;
}
return template_url;
}
bool SafeTemplateURLParser::GetChildElementsByTag(
const base::Value& elem,
const std::string& tag,
std::vector<const base::Value*>* children) {
bool result = false;
for (const auto& ns : namespaces_) {
std::string name = data_decoder::GetXmlQualifiedName(ns, tag);
result |=
data_decoder::GetAllXmlElementChildrenWithTag(elem, name, children);
}
return result;
}
} // namespace
// TemplateURLParser ---------------------------------------------------------- // TemplateURLParser ----------------------------------------------------------
// static // static
std::unique_ptr<TemplateURL> TemplateURLParser::Parse( void TemplateURLParser::Parse(const SearchTermsData* search_terms_data,
const SearchTermsData& search_terms_data, const std::string& data,
const char* data, const ParameterFilter& parameter_filter,
size_t length, ParseCallback completion_callback) {
const TemplateURLParser::ParameterFilter& param_filter) { auto safe_parser = std::make_unique<SafeTemplateURLParser>(
// xmlSubstituteEntitiesDefault(1) makes it so that &amp; isn't mapped to search_terms_data, parameter_filter, std::move(completion_callback));
// &#38; . Unfortunately xmlSubstituteEntitiesDefault affects global state. data_decoder::DataDecoder::ParseXmlIsolated(
// If this becomes problematic we'll need to provide our own entity data, base::BindOnce(&SafeTemplateURLParser::OnXmlParseComplete,
// type for &amp;, or strip out &#38; by hand after parsing. std::move(safe_parser)));
int last_sub_entities_value = xmlSubstituteEntitiesDefault(1); }
TemplateURLParsingContext context(param_filter);
xmlSAXHandler sax_handler; // static
memset(&sax_handler, 0, sizeof(sax_handler)); void TemplateURLParser::ParseWithDataDecoder(
sax_handler.startElement = &TemplateURLParsingContext::StartElementImpl; data_decoder::DataDecoder* data_decoder,
sax_handler.endElement = &TemplateURLParsingContext::EndElementImpl; const SearchTermsData* search_terms_data,
sax_handler.characters = &TemplateURLParsingContext::CharactersImpl; const std::string& data,
int error = xmlSAXUserParseMemory(&sax_handler, &context, data, const ParameterFilter& parameter_filter,
static_cast<int>(length)); ParseCallback completion_callback) {
xmlSubstituteEntitiesDefault(last_sub_entities_value); auto safe_parser = std::make_unique<SafeTemplateURLParser>(
search_terms_data, parameter_filter, std::move(completion_callback));
return error ? nullptr : context.GetTemplateURL(search_terms_data); data_decoder->ParseXml(
data, base::BindOnce(&SafeTemplateURLParser::OnXmlParseComplete,
std::move(safe_parser)));
} }
...@@ -16,6 +16,10 @@ ...@@ -16,6 +16,10 @@
class SearchTermsData; class SearchTermsData;
class TemplateURL; class TemplateURL;
namespace data_decoder {
class DataDecoder;
}
// TemplateURLParser, as the name implies, handling reading of TemplateURLs // TemplateURLParser, as the name implies, handling reading of TemplateURLs
// from OpenSearch description documents. // from OpenSearch description documents.
class TemplateURLParser { class TemplateURLParser {
...@@ -27,19 +31,30 @@ class TemplateURLParser { ...@@ -27,19 +31,30 @@ class TemplateURLParser {
using ParameterFilter = using ParameterFilter =
base::RepeatingCallback<bool(const std::string&, const std::string&)>; base::RepeatingCallback<bool(const std::string&, const std::string&)>;
using ParseCallback = base::OnceCallback<void(std::unique_ptr<TemplateURL>)>;
// Decodes the chunk of data representing a TemplateURL, creates the // Decodes the chunk of data representing a TemplateURL, creates the
// TemplateURL, and returns it. Returns null if the data does not describe a // TemplateURL, and calls the |completion_callback| with the result. A null
// valid TemplateURL, the URLs referenced do not point to valid http/https // value is provided if the data does not describe a valid TemplateURL, the
// resources, or for some other reason we do not support the described // URLs referenced do not point to valid http/https resources, or for some
// TemplateURL. |parameter_filter| can be used if you want to filter some // other reason we do not support the described TemplateURL.
// parameters out of the URL. For example, when importing from another // |parameter_filter| can be used if you want to filter some parameters out
// browser, we remove any parameter identifying that browser. If set to null, // of the URL. For example, when importing from another browser, we remove
// the URL is not modified. // any parameter identifying that browser. If set to null, the URL is not
static std::unique_ptr<TemplateURL> Parse( // modified.
const SearchTermsData& search_terms_data, static void Parse(const SearchTermsData* search_terms_data,
const char* data, const std::string& data,
size_t length, const ParameterFilter& parameter_filter,
const ParameterFilter& parameter_filter); ParseCallback completion_callback);
// The same as Parse(), but it allows the caller to manage the lifetime of
// the DataDecoder service. The |data_decoder| must be kept alive until the
// |completion_callback| is called.
static void ParseWithDataDecoder(data_decoder::DataDecoder* data_decoder,
const SearchTermsData* search_terms_data,
const std::string& data,
const ParameterFilter& parameter_filter,
ParseCallback completion_callback);
private: private:
// No one should create one of these. // No one should create one of these.
......
...@@ -31,11 +31,13 @@ source_set("cpp") { ...@@ -31,11 +31,13 @@ source_set("cpp") {
public = [ public = [
"data_decoder.h", "data_decoder.h",
"json_sanitizer.h", "json_sanitizer.h",
"safe_xml_parser.h",
] ]
sources = [ sources = [
"data_decoder.cc", "data_decoder.cc",
"json_sanitizer.cc", "json_sanitizer.cc",
"safe_xml_parser.cc",
] ]
configs += [ "//build/config/compiler:wexit_time_destructors" ] configs += [ "//build/config/compiler:wexit_time_destructors" ]
...@@ -68,12 +70,10 @@ source_set("cpp") { ...@@ -68,12 +70,10 @@ source_set("cpp") {
public += [ public += [
"decode_image.h", "decode_image.h",
"safe_bundled_exchanges_parser.h", "safe_bundled_exchanges_parser.h",
"safe_xml_parser.h",
] ]
sources += [ sources += [
"decode_image.cc", "decode_image.cc",
"safe_bundled_exchanges_parser.cc", "safe_bundled_exchanges_parser.cc",
"safe_xml_parser.cc",
] ]
} }
......
...@@ -67,6 +67,8 @@ fuzzer_test("template_url_parser_fuzzer") { ...@@ -67,6 +67,8 @@ fuzzer_test("template_url_parser_fuzzer") {
"//base", "//base",
"//base:i18n", "//base:i18n",
"//components/search_engines:search_engines", "//components/search_engines:search_engines",
"//services/data_decoder/public/cpp",
"//services/data_decoder/public/cpp:test_support",
"//third_party/libxml:libxml", "//third_party/libxml:libxml",
] ]
dict = "//third_party/libxml/fuzz/xml.dict" dict = "//third_party/libxml/fuzz/xml.dict"
......
...@@ -14,9 +14,12 @@ ...@@ -14,9 +14,12 @@
#include "base/bind.h" #include "base/bind.h"
#include "base/command_line.h" #include "base/command_line.h"
#include "base/i18n/icu_util.h" #include "base/i18n/icu_util.h"
#include "base/run_loop.h"
#include "base/task/single_thread_task_executor.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_parser.h" #include "components/search_engines/template_url_parser.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
#include "testing/libfuzzer/libfuzzer_exports.h" #include "testing/libfuzzer/libfuzzer_exports.h"
bool PseudoRandomFilter(std::mt19937* generator, bool PseudoRandomFilter(std::mt19937* generator,
...@@ -45,7 +48,11 @@ void ignore(void* ctx, const char* msg, ...) { ...@@ -45,7 +48,11 @@ void ignore(void* ctx, const char* msg, ...) {
class Env { class Env {
public: public:
Env() { xmlSetGenericErrorFunc(NULL, &ignore); } Env() { xmlSetGenericErrorFunc(nullptr, &ignore); }
private:
base::SingleThreadTaskExecutor executor_;
data_decoder::test::InProcessDataDecoder data_decoder_;
}; };
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
...@@ -63,11 +70,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { ...@@ -63,11 +70,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// does not support 8 bit types on Windows. // does not support 8 bit types on Windows.
std::uniform_int_distribution<uint16_t> pool(0, 1); std::uniform_int_distribution<uint16_t> pool(0, 1);
base::RunLoop run_loop;
SearchTermsData search_terms_data;
std::string string_data(reinterpret_cast<const char*>(params + 1), size);
TemplateURLParser::ParameterFilter filter = TemplateURLParser::ParameterFilter filter =
base::BindRepeating(&PseudoRandomFilter, base::Unretained(&generator), base::BindRepeating(&PseudoRandomFilter, base::Unretained(&generator),
base::Unretained(&pool)); base::Unretained(&pool));
TemplateURLParser::Parse(&search_terms_data, string_data, filter,
base::BindOnce(
[](base::OnceClosure quit_closure,
std::unique_ptr<TemplateURL> ignored) {
std::move(quit_closure).Run();
},
run_loop.QuitClosure()));
run_loop.Run();
const char* char_data = reinterpret_cast<const char*>(params + 1);
TemplateURLParser::Parse(SearchTermsData(), char_data, size, filter);
return 0; return 0;
} }
...@@ -141,7 +141,6 @@ static_library("libxml") { ...@@ -141,7 +141,6 @@ static_library("libxml") {
":xml_reader", ":xml_reader",
":xml_writer", ":xml_writer",
":libxml_utils", ":libxml_utils",
"//components/search_engines",
"//testing/libfuzzer/*", "//testing/libfuzzer/*",
"//third_party/blink/renderer/*", "//third_party/blink/renderer/*",
"//third_party/fontconfig", "//third_party/fontconfig",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment