Commit 32ff845f authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

Declarative Net Request: Disallow non-ascii chars in url patterns and domains.

While comparing a rule against a url, url_pattern_index assumes that the
provided pattern and domains are in canonical form (internationalized domains in
punycode and non-ascii characters in percent-escaped utf8). Document this
expectation and disallow rules where the "urlFilter", "domains" and
"excludedDomains" properties consist of non-ascii characters.

BUG=878138

Change-Id: I0b23200349a1aee6ed13adb09f61621ab235917d
Reviewed-on: https://chromium-review.googlesource.com/1195095
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#587363}
parent bf70c90e
......@@ -19,6 +19,7 @@
#include "base/path_service.h"
#include "base/rand_util.h"
#include "base/run_loop.h"
#include "base/strings/utf_string_conversions.h"
#include "base/synchronization/lock.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/threading/thread_restrictions.h"
......@@ -482,7 +483,10 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
{"|http://*.us", 3},
{"pages_with_script/page2.html|", 4},
{"|http://msn*/pages_with_script/page.html|", 5},
{"%20", 6}, // Block any urls with space.
{"%20", 6}, // Block any urls with space.
{"%C3%A9", 7}, // Percent-encoded non-ascii character é.
// Internationalized domain "ⱴase.com" in punycode.
{"|http://xn--ase-7z0b.com", 8},
};
// Rule |i| is the rule with id |i|.
......@@ -506,6 +510,12 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
{"abc.com", "/pages_with_script/page.html?q=hi bye", false}, // Rule 6
{"abc.com", "/pages_with_script/page.html?q=hi%20bye", false}, // Rule 6
{"abc.com", "/pages_with_script/page.html?q=hibye", true},
{"abc.com",
"/pages_with_script/page.html?q=" + base::WideToUTF8(L"\u00E9"),
false}, // Rule 7
{base::WideToUTF8(L"\x2c74"
L"ase.com"),
"/pages_with_script/page.html", false}, // Rule 8
};
// Load the extension.
......@@ -655,7 +665,10 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
size_t id;
std::vector<std::string> domains;
std::vector<std::string> excluded_domains;
} rules_data[] = {{"child_frame.html?frame=1", 1, {"x.com"}, {"a.x.com"}},
} rules_data[] = {{"child_frame.html?frame=1",
1,
{"x.com", "xn--36c-tfa.com" /* punycode for 36°c.com */},
{"a.x.com"}},
{"child_frame.html?frame=2", 2, {}, {"a.y.com"}}};
std::vector<TestRule> rules;
......@@ -681,6 +694,9 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
bool expect_frame_2_loaded;
} test_cases[] = {
{"x.com", false /* Rule 1 */, false /* Rule 2 */},
{base::WideToUTF8(L"36\x00b0"
L"c.com" /* 36°c.com */),
false /*Rule 1*/, false /*Rule 2*/},
{"b.x.com", false /* Rule 1 */, false /* Rule 2 */},
{"a.x.com", true, false /* Rule 2 */},
{"b.a.x.com", true, false /* Rule 2 */},
......
......@@ -29,7 +29,8 @@ const char kErrorDuplicateIDs[] =
// detail.
const char kErrorPersisting[] = "*: Rules file could not be parsed.";
const char kErrorListNotPassed[] = "*: Rules file must contain a list.";
const char kErrorNonAscii[] =
"*: Rule at index * cannot have non-ascii characters as part of \"*\" key.";
const char kRulesNotParsedWarning[] =
"Declarative Net Request: Not all rules were successfully parsed.";
......
......@@ -27,6 +27,10 @@ enum class ParseResult {
ERROR_DUPLICATE_IDS,
ERROR_PERSISTING_RULESET,
ERROR_LIST_NOT_PASSED,
// Parse errors related to fields containing non-ascii characters.
ERROR_NON_ASCII_URL_FILTER,
ERROR_NON_ASCII_DOMAIN,
ERROR_NON_ASCII_EXCLUDED_DOMAIN,
};
// Rule parsing errors.
......@@ -41,6 +45,7 @@ extern const char kErrorListNotPassed[];
extern const char kErrorDuplicateIDs[];
extern const char kErrorPersisting[];
extern const char kErrorListNotPassed[];
extern const char kErrorNonAscii[];
// Rule parsing install warnings.
extern const char kRulesNotParsedWarning[];
......
......@@ -228,24 +228,31 @@ ParseResult ComputeElementTypes(const dnr_api::RuleCondition& condition,
return ParseResult::SUCCESS;
}
// Lower-cases and sorts domains, as required by the url_pattern_index
// component.
std::vector<std::string> CanonicalizeDomains(
std::unique_ptr<std::vector<std::string>> domains) {
// Lower-cases and sorts |domains|, as required by the url_pattern_index
// component and stores the result in |output|. Returns false in case of
// failure, when one of the input strings contains non-ascii characters.
bool CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,
std::vector<std::string>* output) {
DCHECK(output);
DCHECK(output->empty());
if (!domains)
return std::vector<std::string>();
return true;
// Convert to lower case as required by the url_pattern_index component.
for (size_t i = 0; i < domains->size(); i++)
(*domains)[i] = base::ToLowerASCII((*domains)[i]);
for (const std::string& domain : *domains) {
if (!base::IsStringASCII(domain))
return false;
output->push_back(base::ToLowerASCII(domain));
}
std::sort(domains->begin(), domains->end(),
std::sort(output->begin(), output->end(),
[](const std::string& left, const std::string& right) {
return url_pattern_index::CompareDomains(left, right) < 0;
});
// Move the vector, because it isn't eligible for return value optimization.
return std::move(*domains);
return true;
}
} // namespace
......@@ -288,9 +295,11 @@ ParseResult IndexedRule::CreateIndexedRule(
return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST;
}
if (parsed_rule->condition.url_filter &&
parsed_rule->condition.url_filter->empty()) {
return ParseResult::ERROR_EMPTY_URL_FILTER;
if (parsed_rule->condition.url_filter) {
if (parsed_rule->condition.url_filter->empty())
return ParseResult::ERROR_EMPTY_URL_FILTER;
if (!base::IsStringASCII(*parsed_rule->condition.url_filter))
return ParseResult::ERROR_NON_ASCII_URL_FILTER;
}
indexed_rule->id = base::checked_cast<uint32_t>(parsed_rule->id);
......@@ -306,10 +315,15 @@ ParseResult IndexedRule::CreateIndexedRule(
return result;
}
indexed_rule->domains =
CanonicalizeDomains(std::move(parsed_rule->condition.domains));
indexed_rule->excluded_domains =
CanonicalizeDomains(std::move(parsed_rule->condition.excluded_domains));
if (!CanonicalizeDomains(std::move(parsed_rule->condition.domains),
&indexed_rule->domains)) {
return ParseResult::ERROR_NON_ASCII_DOMAIN;
}
if (!CanonicalizeDomains(std::move(parsed_rule->condition.excluded_domains),
&indexed_rule->excluded_domains)) {
return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN;
}
if (is_redirect_rule)
indexed_rule->redirect_url = std::move(*parsed_rule->action.redirect_url);
......
......@@ -11,6 +11,7 @@
#include "base/macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "components/version_info/version_info.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/common/api/declarative_net_request.h"
......@@ -258,6 +259,11 @@ TEST_F(IndexedRuleTest, UrlFilterParsing) {
{std::make_unique<std::string>("||google.com"),
flat_rule::UrlPatternType_SUBSTRING, flat_rule::AnchorType_SUBDOMAIN,
flat_rule::AnchorType_NONE, "google.com", ParseResult::SUCCESS},
// Url pattern with non-ascii characters -ⱴase.com.
{std::make_unique<std::string>(base::WideToUTF8(L"\x2c74"
L"ase.com")),
flat_rule::UrlPatternType_SUBSTRING, flat_rule::AnchorType_NONE,
flat_rule::AnchorType_NONE, "", ParseResult::ERROR_NON_ASCII_URL_FILTER},
};
for (size_t i = 0; i < arraysize(cases); ++i) {
......@@ -306,7 +312,30 @@ TEST_F(IndexedRuleTest, DomainsParsing) {
DomainVec({"g.com", "XY.COM", "zzz.com", "a.com", "google.com"})),
ParseResult::SUCCESS,
{"a.com", "a.com", "b.com"},
{"google.com", "zzz.com", "xy.com", "a.com", "g.com"}}};
{"google.com", "zzz.com", "xy.com", "a.com", "g.com"}},
// Domain with non-ascii characters.
{std::make_unique<DomainVec>(
DomainVec({base::WideToUTF8(L"abc\x2010" /*hyphen*/ L"def.com")})),
nullptr,
ParseResult::ERROR_NON_ASCII_DOMAIN,
{},
{}},
// Excluded domain with non-ascii characters.
{nullptr,
std::make_unique<DomainVec>(
DomainVec({base::WideToUTF8(L"36\x00b0"
L"c.com" /*36°c.com*/)})),
ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN,
{},
{}},
// Internationalized domain in punycode.
{std::make_unique<DomainVec>(
DomainVec({"xn--36c-tfa.com" /* punycode for 36°c.com*/})),
nullptr,
ParseResult::SUCCESS,
{"xn--36c-tfa.com"},
{}},
};
for (size_t i = 0; i < arraysize(cases); ++i) {
SCOPED_TRACE(base::StringPrintf("Testing case[%" PRIuS "]", i));
......
......@@ -94,6 +94,21 @@ std::string ParseInfo::GetErrorDescription(
error = ErrorUtils::FormatErrorMessage(kErrorListNotPassed,
json_rules_filename);
break;
case ParseResult::ERROR_NON_ASCII_URL_FILTER:
error = ErrorUtils::FormatErrorMessage(
kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
kUrlFilterKey);
break;
case ParseResult::ERROR_NON_ASCII_DOMAIN:
error = ErrorUtils::FormatErrorMessage(
kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
kDomainsKey);
break;
case ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN:
error = ErrorUtils::FormatErrorMessage(
kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
kExcludedDomainsKey);
break;
}
return error;
}
......
......@@ -58,6 +58,12 @@ namespace declarativeNetRequest {
// Therefore urlFilter is composed of the following parts:
// (optional Left/Domain name anchor) + pattern + (optional Right anchor)
// If omitted, all urls are matched. An empty string is not allowed.
// Note: The urlFilter must be composed of only ASCII characters. This is
// matched against a url where the host is encoded in the punycode format
// (in case of internationalized domains) and any other non-ascii characters
// are url encoded in utf-8.
// For example, when the request url is http://abc.рф?q=ф, the urlFilter
// will be matched against the url http://abc.xn--p1ai/?q=%D1%84.
DOMString? urlFilter;
// Whether the |urlFilter| is case sensitive. Default is false.
......@@ -67,12 +73,16 @@ namespace declarativeNetRequest {
// |domains|. If the list is omitted, the rule is applied to requests from
// all domains. An empty list is not allowed.
// Note: sub-domains like "a.example.com" are also allowed.
// The entries must consist of only ascii characters. Use punycode encoding
// for internationalized domains.
DOMString[]? domains;
// The rule will not match network requests originating from the list of
// |excludedDomains|. If the list is empty or omitted, no domains are
// excluded. This takes precedence over |domains|.
// Note: sub-domains like "a.example.com" are also allowed.
// The entries must consist of only ascii characters. Use punycode encoding
// for internationalized domains.
DOMString[]? excludedDomains;
// List of resource types which the rule can match. An empty list is not
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment