Commit a49d62c7 authored by Robert Sesek's avatar Robert Sesek Committed by Commit Bot

Use the data_decoder service in TemplateURLParser.

This requires making the TemplateURLParser be asynchronous rather than
directly returning the result. That has a ripple effect of changing the
lifetimes of some of the parameters to Parse(), such as the
SearchTermsData.

The Firefox importer also uses the TemplateURLParser (although it may
be entirely broken, per https://crbug.com/868768). The importer assumes
that all operations are synchronous, so this adds an internal helper
class to manage the now-asynchronous state for TemplateURL parsing.

Bug: 699342
Change-Id: I311d9e29dbbca34a4f5696b251a0fbaaadcc506b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879973
Commit-Queue: Robert Sesek <rsesek@chromium.org>
Reviewed-by: default avatarIlya Sherman <isherman@chromium.org>
Reviewed-by: default avatarMartin Barbella <mbarbella@chromium.org>
Reviewed-by: default avatarNico Weber <thakis@chromium.org>
Reviewed-by: default avatarKevin Bailey <krb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#712773}
parent 2aa1c80a
...@@ -5580,6 +5580,7 @@ static_library("test_support") { ...@@ -5580,6 +5580,7 @@ static_library("test_support") {
"//content/test:test_support", "//content/test:test_support",
"//google_apis:test_support", "//google_apis:test_support",
"//net:test_support", "//net:test_support",
"//services/data_decoder/public/cpp:test_support",
"//services/preferences/public/cpp/tracked:test_support", "//services/preferences/public/cpp/tracked:test_support",
"//skia", "//skia",
"//testing/gmock", "//testing/gmock",
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_parser.h" #include "components/search_engines/template_url_parser.h"
#include "components/search_engines/template_url_prepopulate_data.h" #include "components/search_engines/template_url_prepopulate_data.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "ui/base/l10n/l10n_util.h" #include "ui/base/l10n/l10n_util.h"
#include <iterator> #include <iterator>
...@@ -92,53 +93,95 @@ std::unique_ptr<TemplateURL> CreateTemplateURL(const base::string16& url, ...@@ -92,53 +93,95 @@ std::unique_ptr<TemplateURL> CreateTemplateURL(const base::string16& url,
return std::make_unique<TemplateURL>(data); return std::make_unique<TemplateURL>(data);
} }
// Parses the OpenSearch XML files in |xml_files| and populates |search_engines| } // namespace
// with the resulting TemplateURLs.
void ParseSearchEnginesFromFirefoxXMLData(
const std::vector<std::string>& xml_data,
TemplateURLService::OwnedTemplateURLVector* search_engines) {
DCHECK(search_engines);
std::map<std::string, std::unique_ptr<TemplateURL>> search_engine_for_url; // When the Bridge receives the search engines XML data via
// The first XML file represents the default search engine in Firefox 3, so we // SetFirefoxSearchEnginesXMLData(), this class is responsible for managing the
// need to keep it on top of the list. // asynchronous TemplateURL parsing operations. The Bridge generally operates
auto default_turl = search_engine_for_url.end(); // synchronously, so this class manages the state and notifies the bridge when
for (auto xml_iter = xml_data.begin(); xml_iter != xml_data.end(); // parsing is done.
++xml_iter) { class InProcessImporterBridge::SearchEnginesParser {
std::unique_ptr<TemplateURL> template_url = TemplateURLParser::Parse( public:
UIThreadSearchTermsData(), xml_iter->data(), xml_iter->length(), // Starts parsing the |search_engines_xml_data| and will notify |bridge|
base::BindRepeating(&FirefoxURLParameterFilter)); // upon completion.
if (template_url) { SearchEnginesParser(const std::vector<std::string>& search_engines_xml_data,
auto iter = search_engine_for_url.find(template_url->url()); InProcessImporterBridge* bridge)
if (iter == search_engine_for_url.end()) { : bridge_(bridge), data_decoder_(new data_decoder::DataDecoder()) {
iter = search_engine_for_url DCHECK(!search_engines_xml_data.empty());
.insert(std::make_pair(template_url->url(), StartParse(search_engines_xml_data);
std::move(template_url))) }
.first;
} else { // Returns true if all the data have been parsed, false if the operation
// We have already found a search engine with the same URL. We give // is still ongoing.
// priority to the latest one found, as GetSearchEnginesXMLFiles() bool is_done() const { return is_done_; }
// returns a vector with first Firefox default search engines and then
// the user's ones. We want to give priority to the user ones. // If InProcessImporterBridge::NotifyEnded() is called before is_done()
iter->second = std::move(template_url); // returns true, NotifyEnded() sets this flag so that it can be called back
} // to complete the import.
if (default_turl == search_engine_for_url.end()) void set_notify_ended_on_completion() { notify_ended_on_completion_ = true; }
default_turl = iter;
private:
void StartParse(const std::vector<std::string>& search_engines_xml_data) {
const auto& last_item = search_engines_xml_data.end() - 1;
TemplateURLParser::ParameterFilter param_filter =
base::BindRepeating(&FirefoxURLParameterFilter);
for (auto it = search_engines_xml_data.begin();
it != search_engines_xml_data.end(); ++it) {
// Because all TemplateURLParser are handled by the same data_decoder_
// instance, the results will be returned FIFO.
// The SearchEnginesParser is owned by the InProcessImporterBridge,
// which is not deleted until NotifyEnded() is called, so using Unretained
// is safe.
TemplateURLParser::ParseWithDataDecoder(
data_decoder_.get(), &search_terms_data_, *it, param_filter,
base::BindOnce(&SearchEnginesParser::OnURLParsed,
base::Unretained(this), it == last_item));
} }
} }
// Put the results in the |search_engines| vector. void OnURLParsed(bool is_last_item, std::unique_ptr<TemplateURL> url) {
for (auto t_iter = search_engine_for_url.begin(); if (url)
t_iter != search_engine_for_url.end(); ++t_iter) { parsed_urls_.push_back(std::move(url));
if (t_iter == default_turl)
search_engines->insert(search_engines->begin(), if (is_last_item)
std::move(default_turl->second)); FinishParsing();
else
search_engines->push_back(std::move(t_iter->second));
} }
}
} // namespace void FinishParsing() {
is_done_ = true;
// Shut down the DataDecoder.
data_decoder_.reset();
bridge_->WriteSearchEngines(std::move(parsed_urls_));
if (notify_ended_on_completion_)
bridge_->NotifyEnded();
}
// Storage for the URLs. These are stored in the same order as the original
// |search_engines_xml_data|.
TemplateURLService::OwnedTemplateURLVector parsed_urls_;
InProcessImporterBridge* bridge_; // Weak, owns this.
// Set to true if the last search engine has been parsed.
bool is_done_ = false;
// Set to true if the ImporterBridge has been NotifyEnded() already but was
// waiting on this class to finish the import.
bool notify_ended_on_completion_ = false;
// Parameter for TemplateURLParser.
UIThreadSearchTermsData search_terms_data_;
// The DataDecoder instance that is shared amongst all the TemplateURLs being
// parsed.
std::unique_ptr<data_decoder::DataDecoder> data_decoder_;
DISALLOW_COPY_AND_ASSIGN(SearchEnginesParser);
};
InProcessImporterBridge::InProcessImporterBridge( InProcessImporterBridge::InProcessImporterBridge(
ProfileWriter* writer, ProfileWriter* writer,
...@@ -186,10 +229,10 @@ void InProcessImporterBridge::SetKeywords( ...@@ -186,10 +229,10 @@ void InProcessImporterBridge::SetKeywords(
void InProcessImporterBridge::SetFirefoxSearchEnginesXMLData( void InProcessImporterBridge::SetFirefoxSearchEnginesXMLData(
const std::vector<std::string>& search_engine_data) { const std::vector<std::string>& search_engine_data) {
TemplateURLService::OwnedTemplateURLVector search_engines; if (!search_engine_data.empty()) {
ParseSearchEnginesFromFirefoxXMLData(search_engine_data, &search_engines); // SearchEnginesParser will call back the Bridge back when it is done.
search_engines_.reset(new SearchEnginesParser(search_engine_data, this));
writer_->AddKeywords(std::move(search_engines), true); }
} }
void InProcessImporterBridge::SetPasswordForm( void InProcessImporterBridge::SetPasswordForm(
...@@ -228,6 +271,13 @@ void InProcessImporterBridge::NotifyItemEnded(importer::ImportItem item) { ...@@ -228,6 +271,13 @@ void InProcessImporterBridge::NotifyItemEnded(importer::ImportItem item) {
} }
void InProcessImporterBridge::NotifyEnded() { void InProcessImporterBridge::NotifyEnded() {
// If there are search engines to parse but parsing them is not yet complete,
// arrange to be called back when they are done.
if (search_engines_ && !search_engines_->is_done()) {
search_engines_->set_notify_ended_on_completion();
return;
}
host_->NotifyImportEnded(); host_->NotifyImportEnded();
} }
...@@ -236,3 +286,35 @@ base::string16 InProcessImporterBridge::GetLocalizedString(int message_id) { ...@@ -236,3 +286,35 @@ base::string16 InProcessImporterBridge::GetLocalizedString(int message_id) {
} }
InProcessImporterBridge::~InProcessImporterBridge() {} InProcessImporterBridge::~InProcessImporterBridge() {}
void InProcessImporterBridge::WriteSearchEngines(
TemplateURL::OwnedTemplateURLVector template_urls) {
std::map<std::string, std::unique_ptr<TemplateURL>> search_engine_for_url;
for (auto& template_url : template_urls) {
std::string key = template_url->url();
// Give priority to the latest template URL that is found, as
// GetSearchEnginesXMLFiles() returns a vector with first Firefox default
// search engines and then the user's ones. The user ones should take
// precedence.
search_engine_for_url[key] = std::move(template_url);
}
// The first URL represents the default search engine in Firefox 3, so we
// need to keep it on top of the list.
auto default_turl = search_engine_for_url.end();
if (!template_urls.empty())
default_turl = search_engine_for_url.find(template_urls[0]->url());
// Put the results in the |search_engines| vector.
TemplateURLService::OwnedTemplateURLVector search_engines;
for (auto it = search_engine_for_url.begin();
it != search_engine_for_url.end(); ++it) {
if (it == default_turl) {
search_engines.insert(search_engines.begin(),
std::move(default_turl->second));
} else {
search_engines.push_back(std::move(it->second));
}
}
writer_->AddKeywords(std::move(search_engines), true);
}
...@@ -60,10 +60,19 @@ class InProcessImporterBridge : public ImporterBridge { ...@@ -60,10 +60,19 @@ class InProcessImporterBridge : public ImporterBridge {
// End ImporterBridge implementation. // End ImporterBridge implementation.
private: private:
class SearchEnginesParser;
friend class SearchEnginesParser;
~InProcessImporterBridge() override; ~InProcessImporterBridge() override;
// Called by the SearchEnginesParser when all the search engines have been
// parsed. The |template_urls| vector is in the same sort order that was
// passed to SetFirefoxSearchEnginesXMLData().
void WriteSearchEngines(TemplateURL::OwnedTemplateURLVector template_urls);
ProfileWriter* const writer_; // weak ProfileWriter* const writer_; // weak
const base::WeakPtr<ExternalProcessImporterHost> host_; const base::WeakPtr<ExternalProcessImporterHost> host_;
std::unique_ptr<SearchEnginesParser> search_engines_;
DISALLOW_COPY_AND_ASSIGN(InProcessImporterBridge); DISALLOW_COPY_AND_ASSIGN(InProcessImporterBridge);
}; };
......
...@@ -10,9 +10,11 @@ ...@@ -10,9 +10,11 @@
#include "base/path_service.h" #include "base/path_service.h"
#include "base/stl_util.h" #include "base/stl_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/test/task_environment.h"
#include "chrome/common/chrome_paths.h" #include "chrome/common/chrome_paths.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using base::ASCIIToUTF16; using base::ASCIIToUTF16;
...@@ -39,15 +41,25 @@ class TemplateURLParserTest : public testing::Test { ...@@ -39,15 +41,25 @@ class TemplateURLParserTest : public testing::Test {
void ParseFile(const std::string& file_name, void ParseFile(const std::string& file_name,
const TemplateURLParser::ParameterFilter& filter); const TemplateURLParser::ParameterFilter& filter);
void ParseString(const std::string& data,
const TemplateURLParser::ParameterFilter& filter);
// ParseFile parses the results into this template_url. // ParseFile parses the results into this template_url.
std::unique_ptr<TemplateURL> template_url_; std::unique_ptr<TemplateURL> template_url_;
private: private:
void OnTemplateURLParsed(base::OnceClosure quit_closure,
std::unique_ptr<TemplateURL> template_url) {
template_url_ = std::move(template_url);
std::move(quit_closure).Run();
}
base::FilePath osdd_dir_; base::FilePath osdd_dir_;
base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder data_decoder_;
}; };
TemplateURLParserTest::TemplateURLParserTest() { TemplateURLParserTest::TemplateURLParserTest() {}
}
TemplateURLParserTest::~TemplateURLParserTest() { TemplateURLParserTest::~TemplateURLParserTest() {
} }
...@@ -66,8 +78,19 @@ void TemplateURLParserTest::ParseFile( ...@@ -66,8 +78,19 @@ void TemplateURLParserTest::ParseFile(
std::string contents; std::string contents;
ASSERT_TRUE(base::ReadFileToString(full_path, &contents)); ASSERT_TRUE(base::ReadFileToString(full_path, &contents));
template_url_ = TemplateURLParser::Parse(SearchTermsData(), contents.data(), ParseString(contents, filter);
contents.length(), filter); }
void TemplateURLParserTest::ParseString(
const std::string& data,
const TemplateURLParser::ParameterFilter& filter) {
base::RunLoop run_loop;
SearchTermsData search_terms_data;
TemplateURLParser::Parse(
&search_terms_data, data, filter,
base::BindOnce(&TemplateURLParserTest::OnTemplateURLParsed,
base::Unretained(this), run_loop.QuitClosure()));
run_loop.Run();
} }
// Actual tests --------------------------------------------------------------- // Actual tests ---------------------------------------------------------------
...@@ -249,6 +272,5 @@ TEST_F(TemplateURLParserTest, InvalidInput) { ...@@ -249,6 +272,5 @@ TEST_F(TemplateURLParserTest, InvalidInput) {
</Url> </Url>
</OpenSearchDescription> </OpenSearchDescription>
)"; )";
TemplateURLParser::Parse(SearchTermsData(), char_data, base::size(char_data), ParseString(char_data, filter);
filter);
} }
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_data.h" #include "components/search_engines/template_url_data.h"
#include "components/search_engines/template_url_service_observer.h" #include "components/search_engines/template_url_service_observer.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
class KeywordWebDataService; class KeywordWebDataService;
class TemplateURLService; class TemplateURLService;
...@@ -83,6 +84,7 @@ class TemplateURLServiceTestUtil : public TemplateURLServiceObserver { ...@@ -83,6 +84,7 @@ class TemplateURLServiceTestUtil : public TemplateURLServiceObserver {
base::string16 search_term_; base::string16 search_term_;
scoped_refptr<KeywordWebDataService> web_data_service_; scoped_refptr<KeywordWebDataService> web_data_service_;
std::unique_ptr<TemplateURLService> model_; std::unique_ptr<TemplateURLService> model_;
data_decoder::test::InProcessDataDecoder data_decoder_;
DISALLOW_COPY_AND_ASSIGN(TemplateURLServiceTestUtil); DISALLOW_COPY_AND_ASSIGN(TemplateURLServiceTestUtil);
}; };
......
...@@ -48,6 +48,7 @@ static_library("utility") { ...@@ -48,6 +48,7 @@ static_library("utility") {
"//media", "//media",
"//net:net_with_v8", "//net:net_with_v8",
"//printing/buildflags", "//printing/buildflags",
"//services/data_decoder:lib",
"//services/network:network_service", "//services/network:network_service",
"//services/service_manager/public/cpp", "//services/service_manager/public/cpp",
"//skia", "//skia",
...@@ -100,7 +101,6 @@ static_library("utility") { ...@@ -100,7 +101,6 @@ static_library("utility") {
"//chrome/common:mojo_bindings", "//chrome/common:mojo_bindings",
"//chrome/common/importer:interfaces", "//chrome/common/importer:interfaces",
"//components/autofill/core/common", "//components/autofill/core/common",
"//services/data_decoder:lib",
"//services/proxy_resolver:lib", "//services/proxy_resolver:lib",
] ]
} }
......
...@@ -70,9 +70,9 @@ static_library("search_engines") { ...@@ -70,9 +70,9 @@ static_library("search_engines") {
"//components/variations", "//components/variations",
"//google_apis", "//google_apis",
"//net", "//net",
"//services/data_decoder/public/cpp",
"//services/network/public/cpp", "//services/network/public/cpp",
"//sql", "//sql",
"//third_party/libxml", # https://crbug.com/699342
"//third_party/metrics_proto", "//third_party/metrics_proto",
"//ui/base", "//ui/base",
"//ui/gfx", "//ui/gfx",
......
...@@ -15,8 +15,8 @@ include_rules = [ ...@@ -15,8 +15,8 @@ include_rules = [
"+components/variations", "+components/variations",
"+components/webdata", "+components/webdata",
"+google_apis", "+google_apis",
"+libxml",
"+net", "+net",
"+services/data_decoder/public",
"+services/network/public/cpp", "+services/network/public/cpp",
"+services/network/test", "+services/network/test",
"+sql", "+sql",
......
...@@ -80,6 +80,7 @@ class TemplateURLFetcher::RequestDelegate { ...@@ -80,6 +80,7 @@ class TemplateURLFetcher::RequestDelegate {
base::string16 keyword() const { return keyword_; } base::string16 keyword() const { return keyword_; }
private: private:
void OnTemplateURLParsed(std::unique_ptr<TemplateURL> template_url);
void OnLoaded(); void OnLoaded();
void AddSearchProvider(); void AddSearchProvider();
...@@ -140,6 +141,25 @@ TemplateURLFetcher::RequestDelegate::RequestDelegate( ...@@ -140,6 +141,25 @@ TemplateURLFetcher::RequestDelegate::RequestDelegate(
50000 /* max_body_size */); 50000 /* max_body_size */);
} }
void TemplateURLFetcher::RequestDelegate::OnTemplateURLParsed(
std::unique_ptr<TemplateURL> template_url) {
template_url_ = std::move(template_url);
if (!template_url_ ||
!template_url_->url_ref().SupportsReplacement(
fetcher_->template_url_service_->search_terms_data())) {
fetcher_->RequestCompleted(this);
// WARNING: RequestCompleted deletes us.
return;
}
// Wait for the model to be loaded before adding the provider.
if (!fetcher_->template_url_service_->loaded())
return;
AddSearchProvider();
// WARNING: AddSearchProvider deletes us.
}
void TemplateURLFetcher::RequestDelegate::OnLoaded() { void TemplateURLFetcher::RequestDelegate::OnLoaded() {
template_url_subscription_.reset(); template_url_subscription_.reset();
if (!template_url_) if (!template_url_)
...@@ -158,23 +178,11 @@ void TemplateURLFetcher::RequestDelegate::OnSimpleLoaderComplete( ...@@ -158,23 +178,11 @@ void TemplateURLFetcher::RequestDelegate::OnSimpleLoaderComplete(
return; return;
} }
template_url_ = TemplateURLParser::Parse( TemplateURLParser::Parse(
fetcher_->template_url_service_->search_terms_data(), &fetcher_->template_url_service_->search_terms_data(),
response_body->data(), response_body->length(), *response_body.get(), TemplateURLParser::ParameterFilter(),
TemplateURLParser::ParameterFilter()); base::BindOnce(&RequestDelegate::OnTemplateURLParsed,
if (!template_url_ || base::Unretained(this)));
!template_url_->url_ref().SupportsReplacement(
fetcher_->template_url_service_->search_terms_data())) {
fetcher_->RequestCompleted(this);
// WARNING: RequestCompleted deletes us.
return;
}
// Wait for the model to be loaded before adding the provider.
if (!fetcher_->template_url_service_->loaded())
return;
AddSearchProvider();
// WARNING: AddSearchProvider deletes us.
} }
void TemplateURLFetcher::RequestDelegate::AddSearchProvider() { void TemplateURLFetcher::RequestDelegate::AddSearchProvider() {
......
...@@ -7,29 +7,26 @@ ...@@ -7,29 +7,26 @@
#include <string.h> #include <string.h>
#include <algorithm> #include <algorithm>
#include <map>
#include <memory>
#include <vector> #include <vector>
#include "base/bind.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "libxml/parser.h" #include "services/data_decoder/public/cpp/data_decoder.h"
#include "libxml/xmlwriter.h" #include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "services/data_decoder/public/mojom/xml_parser.mojom.h"
#include "ui/gfx/favicon_size.h" #include "ui/gfx/favicon_size.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/url_constants.h" #include "url/url_constants.h"
namespace { namespace {
// NOTE: libxml uses the UTF-8 encoding. As 0-127 of UTF-8 corresponds
// to that of char, the following names are all in terms of char. This avoids
// having to convert to wide, then do comparisons.
// Defines for element names of the OSD document: // Defines for element names of the OSD document:
const char kURLElement[] = "Url"; const char kURLElement[] = "Url";
const char kParamElement[] = "Param"; const char kParamElement[] = "Param";
...@@ -56,10 +53,6 @@ const char kHTMLType[] = "text/html"; ...@@ -56,10 +53,6 @@ const char kHTMLType[] = "text/html";
// Mime type for as you type suggestions. // Mime type for as you type suggestions.
const char kSuggestionType[] = "application/x-suggestions+json"; const char kSuggestionType[] = "application/x-suggestions+json";
std::string XMLCharToString(const xmlChar* value) {
return std::string(reinterpret_cast<const char*>(value));
}
// Returns true if input_encoding contains a valid input encoding string. This // Returns true if input_encoding contains a valid input encoding string. This
// doesn't verify that we have a valid encoding for the string, just that the // doesn't verify that we have a valid encoding for the string, just that the
// string contains characters that constitute a valid input encoding. // string contains characters that constitute a valid input encoding.
...@@ -101,217 +94,292 @@ bool IsHTTPRef(const std::string& url) { ...@@ -101,217 +94,292 @@ bool IsHTTPRef(const std::string& url) {
gurl.SchemeIs(url::kHttpsScheme)); gurl.SchemeIs(url::kHttpsScheme));
} }
} // namespace // SafeTemplateURLParser takes the output of the data_decoder service's
// XmlParser and extracts the data from the search description into a
// TemplateURL.
// TemplateURLParsingContext -------------------------------------------------- class SafeTemplateURLParser {
// To minimize memory overhead while parsing, a SAX style parser is used.
// TemplateURLParsingContext is used to maintain the state we're in the document
// while parsing.
class TemplateURLParsingContext {
public: public:
// Enum of the known element types.
enum ElementType {
UNKNOWN,
OPEN_SEARCH_DESCRIPTION,
URL,
PARAM,
SHORT_NAME,
IMAGE,
INPUT_ENCODING,
ALIAS,
};
enum Method { enum Method {
GET, GET,
POST POST
}; };
// Key/value of a Param node. // Key/value of a Param node.
typedef std::pair<std::string, std::string> Param; using Param = std::pair<std::string, std::string>;
explicit TemplateURLParsingContext( SafeTemplateURLParser(
const TemplateURLParser::ParameterFilter& parameter_filter); const SearchTermsData* search_terms_data,
const TemplateURLParser::ParameterFilter& parameter_filter,
TemplateURLParser::ParseCallback callback)
: search_terms_data_(search_terms_data),
parameter_filter_(parameter_filter),
callback_(std::move(callback)) {}
static void StartElementImpl(void* ctx, SafeTemplateURLParser(const SafeTemplateURLParser&) = delete;
const xmlChar* name, SafeTemplateURLParser& operator=(const SafeTemplateURLParser&) = delete;
const xmlChar** atts);
static void EndElementImpl(void* ctx, const xmlChar* name);
static void CharactersImpl(void* ctx, const xmlChar* ch, int len);
// Returns a TemplateURL representing the result of parsing. This will be // Parse callback for DataDecoder::ParseXml(). This calls the callback
// null if parsing failed or if the results were invalid for some reason (e.g. // passed to the constructor upon completion.
// the resulting URL was not HTTP[S], a name wasn't supplied, a resulting void OnXmlParseComplete(
// TemplateURLRef was invalid, etc.). data_decoder::DataDecoder::ValueOrError value_or_error);
std::unique_ptr<TemplateURL> GetTemplateURL(
const SearchTermsData& search_terms_data);
private: private:
// Key is UTF8 encoded. void ParseURLs(const std::vector<const base::Value*>& urls);
typedef std::map<std::string, ElementType> ElementNameToElementTypeMap; void ParseImages(const std::vector<const base::Value*>& images);
void ParseEncodings(const std::vector<const base::Value*>& encodings);
void ParseAliases(const std::vector<const base::Value*>& aliases);
static void InitMapping(); std::unique_ptr<TemplateURL> FinalizeTemplateURL();
void ParseURL(const xmlChar** atts); // Returns all child elements of |elem| named |tag|, which are searched
void ParseImage(const xmlChar** atts); // for using the XML qualified namespaces in |namespaces_|.
void ParseParam(const xmlChar** atts); bool GetChildElementsByTag(const base::Value& elem,
void ProcessURLParams(); const std::string& tag,
std::vector<const base::Value*>* children);
// Returns the current ElementType.
ElementType GetKnownType();
static ElementNameToElementTypeMap* kElementNameToElementTypeMap;
// Data that gets updated as we parse, and is converted to a TemplateURL by // Data that gets updated as we parse, and is converted to a TemplateURL by
// GetTemplateURL(). // FinalizeTemplateURL().
TemplateURLData data_; TemplateURLData data_;
std::vector<ElementType> elements_;
bool image_is_valid_for_favicon_;
// Character content for the current element.
base::string16 string_;
const TemplateURLParser::ParameterFilter& parameter_filter_;
// The list of parameters parsed in the Param nodes of a Url node.
std::vector<Param> extra_params_;
// The HTTP methods used. // The HTTP methods used.
Method method_; Method method_ = GET;
Method suggestion_method_; Method suggestion_method_ = GET;
// If true, we are currently parsing a suggest URL, otherwise it is an HTML
// search. Note that we don't need a stack as URL nodes cannot be nested.
bool is_suggest_url_;
// If true, the user has set a keyword and we should use it. Otherwise, // If true, the user has set a keyword and we should use it. Otherwise,
// we generate a keyword based on the URL. // we generate a keyword based on the URL.
bool has_custom_keyword_; bool has_custom_keyword_ = false;
// Whether we should derive the image from the URL (when images are data // Whether we should derive the image from the URL (when images are data
// URLs). // URLs).
bool derive_image_from_url_; bool derive_image_from_url_ = false;
// The XML namespaces that were declared on the root element. These are used
// to search for tags by name in GetChildElementsByTag(). Will always contain
// at least one element, if only the empty string.
std::vector<std::string> namespaces_;
DISALLOW_COPY_AND_ASSIGN(TemplateURLParsingContext); const SearchTermsData* search_terms_data_;
TemplateURLParser::ParameterFilter parameter_filter_;
TemplateURLParser::ParseCallback callback_;
}; };
// static void SafeTemplateURLParser::OnXmlParseComplete(
TemplateURLParsingContext::ElementNameToElementTypeMap* data_decoder::DataDecoder::ValueOrError value_or_error) {
TemplateURLParsingContext::kElementNameToElementTypeMap = nullptr; if (value_or_error.error) {
DLOG(ERROR) << "Failed to parse XML: " << *value_or_error.error;
TemplateURLParsingContext::TemplateURLParsingContext( std::move(callback_).Run(nullptr);
const TemplateURLParser::ParameterFilter& parameter_filter) return;
: image_is_valid_for_favicon_(false), }
parameter_filter_(parameter_filter),
method_(GET),
suggestion_method_(GET),
is_suggest_url_(false),
has_custom_keyword_(false),
derive_image_from_url_(false) {
if (kElementNameToElementTypeMap == nullptr)
InitMapping();
}
// static const base::Value& root = *value_or_error.value;
void TemplateURLParsingContext::StartElementImpl(void* ctx,
const xmlChar* name, // Get the namespaces used in the XML document, which will be used
const xmlChar** atts) { // to access nodes by tag name in GetChildElementsByTag().
// Remove the namespace from |name|, ex: os:Url -> Url. if (const base::Value* namespaces =
std::string node_name(XMLCharToString(name)); root.FindDictKey(data_decoder::mojom::XmlParser::kNamespacesKey)) {
size_t index = node_name.find_first_of(":"); for (const auto& item : namespaces->DictItems()) {
if (index != std::string::npos) namespaces_.push_back(item.first);
node_name.erase(0, index + 1); }
TemplateURLParsingContext* context =
reinterpret_cast<TemplateURLParsingContext*>(ctx);
context->elements_.push_back(
context->kElementNameToElementTypeMap->count(node_name) ?
(*context->kElementNameToElementTypeMap)[node_name] : UNKNOWN);
switch (context->GetKnownType()) {
case TemplateURLParsingContext::URL:
context->extra_params_.clear();
context->ParseURL(atts);
break;
case TemplateURLParsingContext::IMAGE:
context->ParseImage(atts);
break;
case TemplateURLParsingContext::PARAM:
context->ParseParam(atts);
break;
default:
break;
} }
context->string_.clear(); if (namespaces_.empty())
namespaces_.push_back(std::string());
std::string root_tag;
if (!data_decoder::GetXmlElementTagName(root, &root_tag) ||
(root_tag != kOpenSearchDescriptionElement &&
root_tag != kFirefoxSearchDescriptionElement)) {
DLOG(ERROR) << "Unexpected root tag: " << root_tag;
std::move(callback_).Run(nullptr);
return;
}
// The only required element is the URL.
std::vector<const base::Value*> urls;
if (!GetChildElementsByTag(root, kURLElement, &urls)) {
std::move(callback_).Run(nullptr);
return;
}
ParseURLs(urls);
std::vector<const base::Value*> images;
if (GetChildElementsByTag(root, kImageElement, &images))
ParseImages(images);
std::vector<const base::Value*> encodings;
if (GetChildElementsByTag(root, kInputEncodingElement, &encodings))
ParseEncodings(encodings);
std::vector<const base::Value*> aliases;
if (GetChildElementsByTag(root, kAliasElement, &aliases))
ParseAliases(aliases);
std::vector<const base::Value*> short_names;
if (GetChildElementsByTag(root, kShortNameElement, &short_names)) {
std::string name;
if (data_decoder::GetXmlElementText(*short_names.back(), &name))
data_.SetShortName(base::UTF8ToUTF16(name));
}
std::move(callback_).Run(FinalizeTemplateURL());
} }
// static void SafeTemplateURLParser::ParseURLs(
void TemplateURLParsingContext::EndElementImpl(void* ctx, const xmlChar* name) { const std::vector<const base::Value*>& urls) {
TemplateURLParsingContext* context = for (auto* url : urls) {
reinterpret_cast<TemplateURLParsingContext*>(ctx); std::string template_url =
switch (context->GetKnownType()) { data_decoder::GetXmlElementAttribute(*url, kURLTemplateAttribute);
case TemplateURLParsingContext::URL: std::string type =
context->ProcessURLParams(); data_decoder::GetXmlElementAttribute(*url, kURLTypeAttribute);
break; bool is_post = base::LowerCaseEqualsASCII(
case TemplateURLParsingContext::SHORT_NAME: data_decoder::GetXmlElementAttribute(*url, kParamMethodAttribute),
context->data_.SetShortName(context->string_); "post");
break; bool is_html_url = (type == kHTMLType);
case TemplateURLParsingContext::IMAGE: { bool is_suggest_url = (type == kSuggestionType);
GURL image_url(base::UTF16ToUTF8(context->string_));
if (image_url.SchemeIs(url::kDataScheme)) { if (is_html_url && !template_url.empty()) {
// TODO (jcampan): bug 1169256: when dealing with data URL, we need to data_.SetURL(template_url);
// decode the data URL in the renderer. For now, we'll just point to the is_suggest_url = false;
// favicon from the URL. if (is_post)
context->derive_image_from_url_ = true; method_ = POST;
} else if (context->image_is_valid_for_favicon_ && image_url.is_valid() && } else if (is_suggest_url) {
(image_url.SchemeIs(url::kHttpScheme) || data_.suggestions_url = template_url;
image_url.SchemeIs(url::kHttpsScheme))) { if (is_post)
context->data_.favicon_url = image_url; suggestion_method_ = POST;
}
context->image_is_valid_for_favicon_ = false;
break;
} }
case TemplateURLParsingContext::INPUT_ENCODING: {
std::string input_encoding = base::UTF16ToASCII(context->string_); std::vector<Param> extra_params;
if (IsValidEncodingString(input_encoding))
context->data_.input_encodings.push_back(input_encoding); std::vector<const base::Value*> params;
break; GetChildElementsByTag(*url, kParamElement, &params);
for (auto* param : params) {
std::string key =
data_decoder::GetXmlElementAttribute(*param, kParamNameAttribute);
std::string value =
data_decoder::GetXmlElementAttribute(*param, kParamValueAttribute);
if (!key.empty() &&
(parameter_filter_.is_null() || parameter_filter_.Run(key, value))) {
extra_params.push_back(Param(key, value));
}
} }
case TemplateURLParsingContext::ALIAS: {
if (!context->string_.empty()) { if (!parameter_filter_.is_null() || !extra_params.empty()) {
context->data_.SetKeyword(context->string_); GURL url(is_suggest_url ? data_.suggestions_url : data_.url());
context->has_custom_keyword_ = true; if (!url.is_valid())
return;
// If there is a parameter filter, parse the existing URL and remove any
// unwanted parameter.
std::string new_query;
bool modified = false;
if (!parameter_filter_.is_null()) {
url::Component query = url.parsed_for_possibly_invalid_spec().query;
url::Component key, value;
const char* url_spec = url.spec().c_str();
while (url::ExtractQueryKeyValue(url_spec, &query, &key, &value)) {
std::string key_str(url_spec, key.begin, key.len);
std::string value_str(url_spec, value.begin, value.len);
if (parameter_filter_.Run(key_str, value_str)) {
AppendParamToQuery(key_str, value_str, &new_query);
} else {
modified = true;
}
}
}
if (!modified)
new_query = url.query();
// Add the extra parameters if any.
if (!extra_params.empty()) {
modified = true;
for (const auto& iter : extra_params)
AppendParamToQuery(iter.first, iter.second, &new_query);
}
if (modified) {
GURL::Replacements repl;
repl.SetQueryStr(new_query);
url = url.ReplaceComponents(repl);
if (is_suggest_url)
data_.suggestions_url = url.spec();
else if (url.is_valid())
data_.SetURL(url.spec());
} }
break;
} }
default:
break;
} }
context->string_.clear();
context->elements_.pop_back();
} }
// static void SafeTemplateURLParser::ParseImages(
void TemplateURLParsingContext::CharactersImpl(void* ctx, const std::vector<const base::Value*>& images) {
const xmlChar* ch, for (auto* image : images) {
int len) { std::string url_string;
reinterpret_cast<TemplateURLParsingContext*>(ctx)->string_ += if (!data_decoder::GetXmlElementText(*image, &url_string))
base::UTF8ToUTF16( continue;
base::StringPiece(reinterpret_cast<const char*>(ch), len));
std::string type =
data_decoder::GetXmlElementAttribute(*image, kImageTypeAttribute);
int width = 0;
int height = 0;
base::StringToInt(
data_decoder::GetXmlElementAttribute(*image, kImageWidthAttribute),
&width);
base::StringToInt(
data_decoder::GetXmlElementAttribute(*image, kImageHeightAttribute),
&height);
bool image_is_valid_for_favicon =
(width == gfx::kFaviconSize) && (height == gfx::kFaviconSize) &&
((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
GURL image_url(url_string);
if (image_url.SchemeIs(url::kDataScheme)) {
// TODO(jcampan): bug 1169256: when dealing with data URL, we need to
// decode the data URL in the renderer. For now, we'll just point to the
// favicon from the URL.
derive_image_from_url_ = true;
} else if (image_is_valid_for_favicon && image_url.is_valid() &&
(image_url.SchemeIs(url::kHttpScheme) ||
image_url.SchemeIs(url::kHttpsScheme))) {
data_.favicon_url = image_url;
}
image_is_valid_for_favicon = false;
}
} }
std::unique_ptr<TemplateURL> TemplateURLParsingContext::GetTemplateURL( void SafeTemplateURLParser::ParseEncodings(
const SearchTermsData& search_terms_data) { const std::vector<const base::Value*>& encodings) {
// TODO(jcampan): Support engines that use POST; see http://crbug.com/18107 for (auto* encoding : encodings) {
if (method_ == TemplateURLParsingContext::POST || !IsHTTPRef(data_.url()) || std::string encoding_value;
!IsHTTPRef(data_.suggestions_url)) if (data_decoder::GetXmlElementText(*encoding, &encoding_value)) {
if (IsValidEncodingString(encoding_value))
data_.input_encodings.push_back(encoding_value);
}
}
}
void SafeTemplateURLParser::ParseAliases(
const std::vector<const base::Value*>& aliases) {
for (auto* alias : aliases) {
std::string alias_value;
if (data_decoder::GetXmlElementText(*alias, &alias_value)) {
data_.SetKeyword(base::UTF8ToUTF16(alias_value));
has_custom_keyword_ = true;
}
}
}
std::unique_ptr<TemplateURL> SafeTemplateURLParser::FinalizeTemplateURL() {
// TODO(https://crbug.com/18107): Support engines that use POST.
if (method_ == POST || !IsHTTPRef(data_.url()) ||
!IsHTTPRef(data_.suggestions_url)) {
DLOG(ERROR) << "POST URLs are not supported";
return nullptr; return nullptr;
if (suggestion_method_ == TemplateURLParsingContext::POST) }
if (suggestion_method_ == POST)
data_.suggestions_url.clear(); data_.suggestions_url.clear();
// If the image was a data URL, use the favicon from the search URL instead. // If the image was a data URL, use the favicon from the search URL instead.
// (see the TODO in EndElementImpl()). // (see the TODO in ParseImages()).
GURL search_url(data_.url()); GURL search_url(data_.url());
if (derive_image_from_url_ && data_.favicon_url.is_empty()) if (derive_image_from_url_ && data_.favicon_url.is_empty())
data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url); data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url);
...@@ -329,189 +397,55 @@ std::unique_ptr<TemplateURL> TemplateURLParsingContext::GetTemplateURL( ...@@ -329,189 +397,55 @@ std::unique_ptr<TemplateURL> TemplateURLParsingContext::GetTemplateURL(
std::unique_ptr<TemplateURL> template_url = std::unique_ptr<TemplateURL> template_url =
std::make_unique<TemplateURL>(data_); std::make_unique<TemplateURL>(data_);
if (template_url->url().empty() || if (template_url->url().empty() ||
!template_url->url_ref().IsValid(search_terms_data) || !template_url->url_ref().IsValid(*search_terms_data_) ||
(!template_url->suggestions_url().empty() && (!template_url->suggestions_url().empty() &&
!template_url->suggestions_url_ref().IsValid(search_terms_data))) { !template_url->suggestions_url_ref().IsValid(*search_terms_data_))) {
DLOG(ERROR) << "Template URL is not valid";
return nullptr; return nullptr;
} }
return template_url; return template_url;
} }
// static bool SafeTemplateURLParser::GetChildElementsByTag(
void TemplateURLParsingContext::InitMapping() { const base::Value& elem,
kElementNameToElementTypeMap = new std::map<std::string, ElementType>; const std::string& tag,
(*kElementNameToElementTypeMap)[kURLElement] = URL; std::vector<const base::Value*>* children) {
(*kElementNameToElementTypeMap)[kParamElement] = PARAM; bool result = false;
(*kElementNameToElementTypeMap)[kShortNameElement] = SHORT_NAME; for (const auto& ns : namespaces_) {
(*kElementNameToElementTypeMap)[kImageElement] = IMAGE; std::string name = data_decoder::GetXmlQualifiedName(ns, tag);
(*kElementNameToElementTypeMap)[kOpenSearchDescriptionElement] = result |=
OPEN_SEARCH_DESCRIPTION; data_decoder::GetAllXmlElementChildrenWithTag(elem, name, children);
(*kElementNameToElementTypeMap)[kFirefoxSearchDescriptionElement] =
OPEN_SEARCH_DESCRIPTION;
(*kElementNameToElementTypeMap)[kInputEncodingElement] = INPUT_ENCODING;
(*kElementNameToElementTypeMap)[kAliasElement] = ALIAS;
}
void TemplateURLParsingContext::ParseURL(const xmlChar** atts) {
if (!atts)
return;
std::string template_url;
bool is_post = false;
bool is_html_url = false;
bool is_suggest_url = false;
for (; *atts; atts += 2) {
std::string name(XMLCharToString(*atts));
const xmlChar* value = atts[1];
if (name == kURLTypeAttribute) {
std::string type = XMLCharToString(value);
is_html_url = (type == kHTMLType);
is_suggest_url = (type == kSuggestionType);
} else if (name == kURLTemplateAttribute) {
template_url = XMLCharToString(value);
} else if (name == kParamMethodAttribute) {
is_post = base::LowerCaseEqualsASCII(XMLCharToString(value), "post");
}
}
if (is_html_url && !template_url.empty()) {
data_.SetURL(template_url);
is_suggest_url_ = false;
if (is_post)
method_ = POST;
} else if (is_suggest_url) {
data_.suggestions_url = template_url;
is_suggest_url_ = true;
if (is_post)
suggestion_method_ = POST;
}
}
void TemplateURLParsingContext::ParseImage(const xmlChar** atts) {
if (!atts)
return;
int width = 0;
int height = 0;
std::string type;
for (; *atts; atts += 2) {
std::string name(XMLCharToString(*atts));
const xmlChar* value = atts[1];
if (name == kImageTypeAttribute) {
type = XMLCharToString(value);
} else if (name == kImageWidthAttribute) {
base::StringToInt(XMLCharToString(value), &width);
} else if (name == kImageHeightAttribute) {
base::StringToInt(XMLCharToString(value), &height);
}
}
image_is_valid_for_favicon_ = (width == gfx::kFaviconSize) &&
(height == gfx::kFaviconSize) &&
((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
}
void TemplateURLParsingContext::ParseParam(const xmlChar** atts) {
if (!atts)
return;
std::string key, value;
for (; *atts; atts += 2) {
std::string name(XMLCharToString(*atts));
const xmlChar* val = atts[1];
if (name == kParamNameAttribute) {
key = XMLCharToString(val);
} else if (name == kParamValueAttribute) {
value = XMLCharToString(val);
}
} }
return result;
if (!key.empty() &&
(parameter_filter_.is_null() || parameter_filter_.Run(key, value)))
extra_params_.push_back(Param(key, value));
} }
void TemplateURLParsingContext::ProcessURLParams() { } // namespace
if (parameter_filter_.is_null() && extra_params_.empty())
return;
GURL url(is_suggest_url_ ? data_.suggestions_url : data_.url());
if (!url.is_valid())
return;
// If there is a parameter filter, parse the existing URL and remove any
// unwanted parameter.
std::string new_query;
bool modified = false;
if (!parameter_filter_.is_null()) {
url::Component query = url.parsed_for_possibly_invalid_spec().query;
url::Component key, value;
const char* url_spec = url.spec().c_str();
while (url::ExtractQueryKeyValue(url_spec, &query, &key, &value)) {
std::string key_str(url_spec, key.begin, key.len);
std::string value_str(url_spec, value.begin, value.len);
if (parameter_filter_.Run(key_str, value_str)) {
AppendParamToQuery(key_str, value_str, &new_query);
} else {
modified = true;
}
}
}
if (!modified)
new_query = url.query();
// Add the extra parameters if any.
if (!extra_params_.empty()) {
modified = true;
for (std::vector<Param>::const_iterator iter(extra_params_.begin());
iter != extra_params_.end(); ++iter)
AppendParamToQuery(iter->first, iter->second, &new_query);
}
if (modified) { // TemplateURLParser ----------------------------------------------------------
GURL::Replacements repl;
repl.SetQueryStr(new_query);
url = url.ReplaceComponents(repl);
if (is_suggest_url_)
data_.suggestions_url = url.spec();
else if (url.is_valid())
data_.SetURL(url.spec());
}
}
TemplateURLParsingContext::ElementType // static
TemplateURLParsingContext::GetKnownType() { void TemplateURLParser::Parse(const SearchTermsData* search_terms_data,
if (elements_.size() == 2 && elements_[0] == OPEN_SEARCH_DESCRIPTION) const std::string& data,
return elements_[1]; const ParameterFilter& parameter_filter,
// We only expect PARAM nodes under the URL node. ParseCallback completion_callback) {
return (elements_.size() == 3 && elements_[0] == OPEN_SEARCH_DESCRIPTION && auto safe_parser = std::make_unique<SafeTemplateURLParser>(
elements_[1] == URL && elements_[2] == PARAM) ? PARAM : UNKNOWN; search_terms_data, parameter_filter, std::move(completion_callback));
data_decoder::DataDecoder::ParseXmlIsolated(
data, base::BindOnce(&SafeTemplateURLParser::OnXmlParseComplete,
std::move(safe_parser)));
} }
// TemplateURLParser ----------------------------------------------------------
// static // static
std::unique_ptr<TemplateURL> TemplateURLParser::Parse( void TemplateURLParser::ParseWithDataDecoder(
const SearchTermsData& search_terms_data, data_decoder::DataDecoder* data_decoder,
const char* data, const SearchTermsData* search_terms_data,
size_t length, const std::string& data,
const TemplateURLParser::ParameterFilter& param_filter) { const ParameterFilter& parameter_filter,
// xmlSubstituteEntitiesDefault(1) makes it so that &amp; isn't mapped to ParseCallback completion_callback) {
// &#38; . Unfortunately xmlSubstituteEntitiesDefault affects global state. auto safe_parser = std::make_unique<SafeTemplateURLParser>(
// If this becomes problematic we'll need to provide our own entity search_terms_data, parameter_filter, std::move(completion_callback));
// type for &amp;, or strip out &#38; by hand after parsing. data_decoder->ParseXml(
int last_sub_entities_value = xmlSubstituteEntitiesDefault(1); data, base::BindOnce(&SafeTemplateURLParser::OnXmlParseComplete,
TemplateURLParsingContext context(param_filter); std::move(safe_parser)));
xmlSAXHandler sax_handler;
memset(&sax_handler, 0, sizeof(sax_handler));
sax_handler.startElement = &TemplateURLParsingContext::StartElementImpl;
sax_handler.endElement = &TemplateURLParsingContext::EndElementImpl;
sax_handler.characters = &TemplateURLParsingContext::CharactersImpl;
int error = xmlSAXUserParseMemory(&sax_handler, &context, data,
static_cast<int>(length));
xmlSubstituteEntitiesDefault(last_sub_entities_value);
return error ? nullptr : context.GetTemplateURL(search_terms_data);
} }
...@@ -16,6 +16,10 @@ ...@@ -16,6 +16,10 @@
class SearchTermsData; class SearchTermsData;
class TemplateURL; class TemplateURL;
namespace data_decoder {
class DataDecoder;
}
// TemplateURLParser, as the name implies, handling reading of TemplateURLs // TemplateURLParser, as the name implies, handling reading of TemplateURLs
// from OpenSearch description documents. // from OpenSearch description documents.
class TemplateURLParser { class TemplateURLParser {
...@@ -27,19 +31,30 @@ class TemplateURLParser { ...@@ -27,19 +31,30 @@ class TemplateURLParser {
using ParameterFilter = using ParameterFilter =
base::RepeatingCallback<bool(const std::string&, const std::string&)>; base::RepeatingCallback<bool(const std::string&, const std::string&)>;
using ParseCallback = base::OnceCallback<void(std::unique_ptr<TemplateURL>)>;
// Decodes the chunk of data representing a TemplateURL, creates the // Decodes the chunk of data representing a TemplateURL, creates the
// TemplateURL, and returns it. Returns null if the data does not describe a // TemplateURL, and calls the |completion_callback| with the result. A null
// valid TemplateURL, the URLs referenced do not point to valid http/https // value is provided if the data does not describe a valid TemplateURL, the
// resources, or for some other reason we do not support the described // URLs referenced do not point to valid http/https resources, or for some
// TemplateURL. |parameter_filter| can be used if you want to filter some // other reason we do not support the described TemplateURL.
// parameters out of the URL. For example, when importing from another // |parameter_filter| can be used if you want to filter some parameters out
// browser, we remove any parameter identifying that browser. If set to null, // of the URL. For example, when importing from another browser, we remove
// the URL is not modified. // any parameter identifying that browser. If set to null, the URL is not
static std::unique_ptr<TemplateURL> Parse( // modified.
const SearchTermsData& search_terms_data, static void Parse(const SearchTermsData* search_terms_data,
const char* data, const std::string& data,
size_t length, const ParameterFilter& parameter_filter,
const ParameterFilter& parameter_filter); ParseCallback completion_callback);
// The same as Parse(), but it allows the caller to manage the lifetime of
// the DataDecoder service. The |data_decoder| must be kept alive until the
// |completion_callback| is called.
static void ParseWithDataDecoder(data_decoder::DataDecoder* data_decoder,
const SearchTermsData* search_terms_data,
const std::string& data,
const ParameterFilter& parameter_filter,
ParseCallback completion_callback);
private: private:
// No one should create one of these. // No one should create one of these.
......
...@@ -31,11 +31,13 @@ source_set("cpp") { ...@@ -31,11 +31,13 @@ source_set("cpp") {
public = [ public = [
"data_decoder.h", "data_decoder.h",
"json_sanitizer.h", "json_sanitizer.h",
"safe_xml_parser.h",
] ]
sources = [ sources = [
"data_decoder.cc", "data_decoder.cc",
"json_sanitizer.cc", "json_sanitizer.cc",
"safe_xml_parser.cc",
] ]
configs += [ "//build/config/compiler:wexit_time_destructors" ] configs += [ "//build/config/compiler:wexit_time_destructors" ]
...@@ -68,12 +70,10 @@ source_set("cpp") { ...@@ -68,12 +70,10 @@ source_set("cpp") {
public += [ public += [
"decode_image.h", "decode_image.h",
"safe_bundled_exchanges_parser.h", "safe_bundled_exchanges_parser.h",
"safe_xml_parser.h",
] ]
sources += [ sources += [
"decode_image.cc", "decode_image.cc",
"safe_bundled_exchanges_parser.cc", "safe_bundled_exchanges_parser.cc",
"safe_xml_parser.cc",
] ]
} }
......
...@@ -67,6 +67,8 @@ fuzzer_test("template_url_parser_fuzzer") { ...@@ -67,6 +67,8 @@ fuzzer_test("template_url_parser_fuzzer") {
"//base", "//base",
"//base:i18n", "//base:i18n",
"//components/search_engines:search_engines", "//components/search_engines:search_engines",
"//services/data_decoder/public/cpp",
"//services/data_decoder/public/cpp:test_support",
"//third_party/libxml:libxml", "//third_party/libxml:libxml",
] ]
dict = "//third_party/libxml/fuzz/xml.dict" dict = "//third_party/libxml/fuzz/xml.dict"
......
...@@ -14,9 +14,12 @@ ...@@ -14,9 +14,12 @@
#include "base/bind.h" #include "base/bind.h"
#include "base/command_line.h" #include "base/command_line.h"
#include "base/i18n/icu_util.h" #include "base/i18n/icu_util.h"
#include "base/run_loop.h"
#include "base/task/single_thread_task_executor.h"
#include "components/search_engines/search_terms_data.h" #include "components/search_engines/search_terms_data.h"
#include "components/search_engines/template_url.h" #include "components/search_engines/template_url.h"
#include "components/search_engines/template_url_parser.h" #include "components/search_engines/template_url_parser.h"
#include "services/data_decoder/public/cpp/test_support/in_process_data_decoder.h"
#include "testing/libfuzzer/libfuzzer_exports.h" #include "testing/libfuzzer/libfuzzer_exports.h"
bool PseudoRandomFilter(std::mt19937* generator, bool PseudoRandomFilter(std::mt19937* generator,
...@@ -45,7 +48,11 @@ void ignore(void* ctx, const char* msg, ...) { ...@@ -45,7 +48,11 @@ void ignore(void* ctx, const char* msg, ...) {
class Env { class Env {
public: public:
Env() { xmlSetGenericErrorFunc(NULL, &ignore); } Env() { xmlSetGenericErrorFunc(nullptr, &ignore); }
private:
base::SingleThreadTaskExecutor executor_;
data_decoder::test::InProcessDataDecoder data_decoder_;
}; };
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
...@@ -63,11 +70,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { ...@@ -63,11 +70,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// does not support 8 bit types on Windows. // does not support 8 bit types on Windows.
std::uniform_int_distribution<uint16_t> pool(0, 1); std::uniform_int_distribution<uint16_t> pool(0, 1);
base::RunLoop run_loop;
SearchTermsData search_terms_data;
std::string string_data(reinterpret_cast<const char*>(params + 1), size);
TemplateURLParser::ParameterFilter filter = TemplateURLParser::ParameterFilter filter =
base::BindRepeating(&PseudoRandomFilter, base::Unretained(&generator), base::BindRepeating(&PseudoRandomFilter, base::Unretained(&generator),
base::Unretained(&pool)); base::Unretained(&pool));
TemplateURLParser::Parse(&search_terms_data, string_data, filter,
base::BindOnce(
[](base::OnceClosure quit_closure,
std::unique_ptr<TemplateURL> ignored) {
std::move(quit_closure).Run();
},
run_loop.QuitClosure()));
run_loop.Run();
const char* char_data = reinterpret_cast<const char*>(params + 1);
TemplateURLParser::Parse(SearchTermsData(), char_data, size, filter);
return 0; return 0;
} }
...@@ -141,7 +141,6 @@ static_library("libxml") { ...@@ -141,7 +141,6 @@ static_library("libxml") {
":xml_reader", ":xml_reader",
":xml_writer", ":xml_writer",
":libxml_utils", ":libxml_utils",
"//components/search_engines",
"//testing/libfuzzer/*", "//testing/libfuzzer/*",
"//third_party/blink/renderer/*", "//third_party/blink/renderer/*",
"//third_party/fontconfig", "//third_party/fontconfig",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment