Commit 874b919d authored by Alexander Yashkin's avatar Alexander Yashkin Committed by Commit Bot

Fix search term encoding

Fix for incorrect handling of encodings used for search terms.
TemplateURL::EncodeSearchTerms function was always using first passed
codepage instead of trying to use passed codepages in the order provided,
as stated in online docs for DefaultSearchProviderEncodings policy.

Bug: 582756
Change-Id: I659f82f0e777c950a888e2ceff8726c4fdcf0378
Reviewed-on: https://chromium-review.googlesource.com/922621
Commit-Queue: Alexander Yashkin <a-v-y@yandex-team.ru>
Reviewed-by: default avatarPeter Kasting <pkasting@chromium.org>
Cr-Commit-Position: refs/heads/master@{#537547}
parent 6d87f986
...@@ -71,19 +71,23 @@ const char kOutputEncodingType[] = "UTF-8"; ...@@ -71,19 +71,23 @@ const char kOutputEncodingType[] = "UTF-8";
// Attempts to encode |terms| and |original_query| in |encoding| and escape // Attempts to encode |terms| and |original_query| in |encoding| and escape
// them. |terms| may be escaped as path or query depending on |is_in_query|; // them. |terms| may be escaped as path or query depending on |is_in_query|;
// |original_query| is always escaped as query. Returns whether the encoding // |original_query| is always escaped as query. If |force_encode| is true
// process succeeded. // encoding ignores errors and function always returns true. Otherwise function
// returns whether the encoding process succeeded.
bool TryEncoding(const base::string16& terms, bool TryEncoding(const base::string16& terms,
const base::string16& original_query, const base::string16& original_query,
const char* encoding, const char* encoding,
bool is_in_query, bool is_in_query,
bool force_encode,
base::string16* escaped_terms, base::string16* escaped_terms,
base::string16* escaped_original_query) { base::string16* escaped_original_query) {
DCHECK(escaped_terms); DCHECK(escaped_terms);
DCHECK(escaped_original_query); DCHECK(escaped_original_query);
base::OnStringConversionError::Type error_handling =
force_encode ? base::OnStringConversionError::SKIP
: base::OnStringConversionError::FAIL;
std::string encoded_terms; std::string encoded_terms;
if (!base::UTF16ToCodepage(terms, encoding, if (!base::UTF16ToCodepage(terms, encoding, error_handling, &encoded_terms))
base::OnStringConversionError::SKIP, &encoded_terms))
return false; return false;
*escaped_terms = base::UTF8ToUTF16(is_in_query ? *escaped_terms = base::UTF8ToUTF16(is_in_query ?
net::EscapeQueryParamValue(encoded_terms, true) : net::EscapeQueryParamValue(encoded_terms, true) :
...@@ -91,8 +95,8 @@ bool TryEncoding(const base::string16& terms, ...@@ -91,8 +95,8 @@ bool TryEncoding(const base::string16& terms,
if (original_query.empty()) if (original_query.empty())
return true; return true;
std::string encoded_original_query; std::string encoded_original_query;
if (!base::UTF16ToCodepage(original_query, encoding, if (!base::UTF16ToCodepage(original_query, encoding, error_handling,
base::OnStringConversionError::SKIP, &encoded_original_query)) &encoded_original_query))
return false; return false;
*escaped_original_query = base::UTF8ToUTF16( *escaped_original_query = base::UTF8ToUTF16(
net::EscapeQueryParamValue(encoded_original_query, true)); net::EscapeQueryParamValue(encoded_original_query, true));
...@@ -1425,11 +1429,11 @@ void TemplateURL::EncodeSearchTerms( ...@@ -1425,11 +1429,11 @@ void TemplateURL::EncodeSearchTerms(
std::vector<std::string> encodings(input_encodings()); std::vector<std::string> encodings(input_encodings());
if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end()) if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end())
encodings.push_back("UTF-8"); encodings.push_back("UTF-8");
for (std::vector<std::string>::const_iterator i(encodings.begin()); for (auto i = encodings.begin(); i != encodings.end(); ++i) {
i != encodings.end(); ++i) {
if (TryEncoding(search_terms_args.search_terms, if (TryEncoding(search_terms_args.search_terms,
search_terms_args.original_query, i->c_str(), search_terms_args.original_query, i->c_str(), is_in_query,
is_in_query, encoded_terms, encoded_original_query)) { std::next(i) == encodings.end(), encoded_terms,
encoded_original_query)) {
*input_encoding = *i; *input_encoding = *i;
return; return;
} }
......
...@@ -522,18 +522,14 @@ TEST_F(TemplateURLTest, ReplaceArbitrarySearchTerms) { ...@@ -522,18 +522,14 @@ TEST_F(TemplateURLTest, ReplaceArbitrarySearchTerms) {
const std::string url; const std::string url;
const std::string expected_result; const std::string expected_result;
} test_data[] = { } test_data[] = {
{ "BIG5", base::WideToUTF16(L"\x60BD"), {"BIG5", base::WideToUTF16(L"\x60BD"),
"http://foo/?{searchTerms}{inputEncoding}", "http://foo/?{searchTerms}{inputEncoding}", "http://foo/?%B1~BIG5"},
"http://foo/?%B1~BIG5" }, {"UTF-8", ASCIIToUTF16("blah"),
{ "UTF-8", ASCIIToUTF16("blah"), "http://foo/?{searchTerms}{inputEncoding}", "http://foo/?blahUTF-8"},
"http://foo/?{searchTerms}{inputEncoding}", {"Shift_JIS", base::UTF8ToUTF16("\xe3\x81\x82"),
"http://foo/?blahUTF-8" }, "http://foo/{searchTerms}/bar", "http://foo/%82%A0/bar"},
{ "Shift_JIS", base::UTF8ToUTF16("\xe3\x81\x82"), {"Shift_JIS", base::UTF8ToUTF16("\xe3\x81\x82 \xe3\x81\x84"),
"http://foo/{searchTerms}/bar", "http://foo/{searchTerms}/bar", "http://foo/%82%A0%20%82%A2/bar"},
"http://foo/%82%A0/bar"},
{ "Shift_JIS", base::UTF8ToUTF16("\xe3\x81\x82 \xe3\x81\x84"),
"http://foo/{searchTerms}/bar",
"http://foo/%82%A0%20%82%A2/bar"},
}; };
TemplateURLData data; TemplateURLData data;
for (size_t i = 0; i < arraysize(test_data); ++i) { for (size_t i = 0; i < arraysize(test_data); ++i) {
...@@ -551,6 +547,57 @@ TEST_F(TemplateURLTest, ReplaceArbitrarySearchTerms) { ...@@ -551,6 +547,57 @@ TEST_F(TemplateURLTest, ReplaceArbitrarySearchTerms) {
} }
} }
// Test that encoding with several optional codepages works as intended.
// Codepages are tried in order, fallback is UTF-8.
TEST_F(TemplateURLTest, ReplaceSearchTermsMultipleEncodings) {
struct TestData {
const std::vector<std::string> encodings;
const base::string16 search_term;
const std::string url;
const std::string expected_result;
} test_data[] = {
// First and third encodings are valid. First is used.
{{"windows-1251", "cp-866", "UTF-8"},
base::UTF8ToUTF16("\xD1\x8F"),
"http://foo/?{searchTerms}{inputEncoding}",
"http://foo/?%FFwindows-1251"},
// Second and third encodings are valid, second is used.
{{"cp-866", "GB2312", "UTF-8"},
base::UTF8ToUTF16("\xE7\x8B\x97"),
"http://foo/?{searchTerms}{inputEncoding}",
"http://foo/?%B9%B7GB2312"},
// Second and third encodings are valid in another order, second is used.
{{"cp-866", "UTF-8", "GB2312"},
base::UTF8ToUTF16("\xE7\x8B\x97"),
"http://foo/?{searchTerms}{inputEncoding}",
"http://foo/?%E7%8B%97UTF-8"},
// Both encodings are invalid, fallback to UTF-8.
{{"cp-866", "windows-1251"},
base::UTF8ToUTF16("\xE7\x8B\x97"),
"http://foo/?{searchTerms}{inputEncoding}",
"http://foo/?%E7%8B%97UTF-8"},
// No encodings are given, fallback to UTF-8.
{{},
base::UTF8ToUTF16("\xE7\x8B\x97"),
"http://foo/?{searchTerms}{inputEncoding}",
"http://foo/?%E7%8B%97UTF-8"},
};
TemplateURLData data;
for (size_t i = 0; i < arraysize(test_data); ++i) {
data.SetURL(test_data[i].url);
data.input_encodings = test_data[i].encodings;
TemplateURL url(data);
EXPECT_TRUE(url.url_ref().IsValid(search_terms_data_));
ASSERT_TRUE(url.url_ref().SupportsReplacement(search_terms_data_));
GURL result(url.url_ref().ReplaceSearchTerms(
TemplateURLRef::SearchTermsArgs(test_data[i].search_term),
search_terms_data_));
ASSERT_TRUE(result.is_valid());
EXPECT_EQ(test_data[i].expected_result, result.spec());
}
}
// Tests replacing assisted query stats (AQS) in various scenarios. // Tests replacing assisted query stats (AQS) in various scenarios.
TEST_F(TemplateURLTest, ReplaceAssistedQueryStats) { TEST_F(TemplateURLTest, ReplaceAssistedQueryStats) {
struct TestData { struct TestData {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment