Declarative Net Request: Disallow non-ascii chars in url patterns and domains.

While comparing a rule against a url, url_pattern_index assumes that the provided pattern and domains are in canonical form (internationalized domains in punycode and non-ascii characters in percent-escaped utf8). Document this expectation and disallow rules where the "urlFilter", "domains" and "excludedDomains" properties consist of non-ascii characters. BUG=878138 Change-Id: I0b23200349a1aee6ed13adb09f61621ab235917d Reviewed-on: https://chromium-review.googlesource.com/1195095 Commit-Queue: Karan Bhatia <karandeepb@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Cr-Commit-Position: refs/heads/master@{#587363}

Declarative Net Request: Disallow non-ascii chars in url patterns and domains.
While comparing a rule against a url, url_pattern_index assumes that the provided pattern and domains are in canonical form (internationalized domains in punycode and non-ascii characters in percent-escaped utf8). Document this expectation and disallow rules where the "urlFilter", "domains" and "excludedDomains" properties consist of non-ascii characters. BUG=878138 Change-Id: I0b23200349a1aee6ed13adb09f61621ab235917d Reviewed-on: https://chromium-review.googlesource.com/1195095 Commit-Queue: Karan Bhatia <karandeepb@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Cr-Commit-Position: refs/heads/master@{#587363}
32ff845f · Karan Bhatia · Commit Bot · bf70c90e · 32ff845f · 32ff845f
Commit 32ff845f authored Aug 30, 2018 by Karan Bhatia Committed by Commit Bot Aug 30, 2018
7 changed files
--- a/chrome/browser/extensions/api/declarative_net_request/declarative_net_request_browsertest.cc
+++ b/chrome/browser/extensions/api/declarative_net_request/declarative_net_request_browsertest.cc
@@ -19,6 +19,7 @@
 #include "base/path_service.h"
 #include "base/rand_util.h"
 #include "base/run_loop.h"
+#include "base/strings/utf_string_conversions.h"
 #include "base/synchronization/lock.h"
 #include "base/test/metrics/histogram_tester.h"
 #include "base/threading/thread_restrictions.h"
@@ -482,7 +483,10 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
      {"|http://*.us", 3},
      {"pages_with_script/page2.html|", 4},
      {"|http://msn*/pages_with_script/page.html|", 5},
-      {"%20", 6},  // Block any urls with space.
+      {"%20", 6},     // Block any urls with space.
+      {"%C3%A9", 7},  // Percent-encoded non-ascii character é.
+      // Internationalized domain "ⱴase.com" in punycode.
+      {"|http://xn--ase-7z0b.com", 8},
  };

  // Rule |i| is the rule with id |i|.
@@ -506,6 +510,12 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
      {"abc.com", "/pages_with_script/page.html?q=hi bye", false},    // Rule 6
      {"abc.com", "/pages_with_script/page.html?q=hi%20bye", false},  // Rule 6
      {"abc.com", "/pages_with_script/page.html?q=hibye", true},
+      {"abc.com",
+       "/pages_with_script/page.html?q=" + base::WideToUTF8(L"\u00E9"),
+       false},  // Rule 7
+      {base::WideToUTF8(L"\x2c74"
+                        L"ase.com"),
+       "/pages_with_script/page.html", false},  // Rule 8
  };

  // Load the extension.
@@ -655,7 +665,10 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
    size_t id;
    std::vector<std::string> domains;
    std::vector<std::string> excluded_domains;
-  } rules_data[] = {{"child_frame.html?frame=1", 1, {"x.com"}, {"a.x.com"}},
+  } rules_data[] = {{"child_frame.html?frame=1",
+                     1,
+                     {"x.com", "xn--36c-tfa.com" /* punycode for 36°c.com */},
+                     {"a.x.com"}},
                    {"child_frame.html?frame=2", 2, {}, {"a.y.com"}}};

  std::vector<TestRule> rules;
@@ -681,6 +694,9 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
    bool expect_frame_2_loaded;
  } test_cases[] = {
      {"x.com", false /* Rule 1 */, false /* Rule 2 */},
+      {base::WideToUTF8(L"36\x00b0"
+                        L"c.com" /* 36°c.com */),
+       false /*Rule 1*/, false /*Rule 2*/},
      {"b.x.com", false /* Rule 1 */, false /* Rule 2 */},
      {"a.x.com", true, false /* Rule 2 */},
      {"b.a.x.com", true, false /* Rule 2 */},

--- a/extensions/browser/api/declarative_net_request/constants.cc
+++ b/extensions/browser/api/declarative_net_request/constants.cc
@@ -29,7 +29,8 @@ const char kErrorDuplicateIDs[] =
 // detail.
 const char kErrorPersisting[] = "*: Rules file could not be parsed.";
 const char kErrorListNotPassed[] = "*: Rules file must contain a list.";
-
+const char kErrorNonAscii[] =
+    "*: Rule at index * cannot have non-ascii characters as part of \"*\" key.";
 const char kRulesNotParsedWarning[] =
    "Declarative Net Request: Not all rules were successfully parsed.";


--- a/extensions/browser/api/declarative_net_request/constants.h
+++ b/extensions/browser/api/declarative_net_request/constants.h
@@ -27,6 +27,10 @@ enum class ParseResult {
  ERROR_DUPLICATE_IDS,
  ERROR_PERSISTING_RULESET,
  ERROR_LIST_NOT_PASSED,
+  // Parse errors related to fields containing non-ascii characters.
+  ERROR_NON_ASCII_URL_FILTER,
+  ERROR_NON_ASCII_DOMAIN,
+  ERROR_NON_ASCII_EXCLUDED_DOMAIN,
 };

 // Rule parsing errors.
@@ -41,6 +45,7 @@ extern const char kErrorListNotPassed[];
 extern const char kErrorDuplicateIDs[];
 extern const char kErrorPersisting[];
 extern const char kErrorListNotPassed[];
+extern const char kErrorNonAscii[];

 // Rule parsing install warnings.
 extern const char kRulesNotParsedWarning[];

--- a/extensions/browser/api/declarative_net_request/indexed_rule.cc
+++ b/extensions/browser/api/declarative_net_request/indexed_rule.cc
@@ -228,24 +228,31 @@ ParseResult ComputeElementTypes(const dnr_api::RuleCondition& condition,
  return ParseResult::SUCCESS;
 }

-// Lower-cases and sorts domains, as required by the url_pattern_index
-// component.
-std::vector<std::string> CanonicalizeDomains(
-    std::unique_ptr<std::vector<std::string>> domains) {
+// Lower-cases and sorts |domains|, as required by the url_pattern_index
+// component and stores the result in |output|. Returns false in case of
+// failure, when one of the input strings contains non-ascii characters.
+bool CanonicalizeDomains(std::unique_ptr<std::vector<std::string>> domains,
+                         std::vector<std::string>* output) {
+  DCHECK(output);
+  DCHECK(output->empty());
+
  if (!domains)
-    return std::vector<std::string>();
+    return true;

  // Convert to lower case as required by the url_pattern_index component.
-  for (size_t i = 0; i < domains->size(); i++)
-    (*domains)[i] = base::ToLowerASCII((*domains)[i]);
+  for (const std::string& domain : *domains) {
+    if (!base::IsStringASCII(domain))
+      return false;
+
+    output->push_back(base::ToLowerASCII(domain));
+  }

-  std::sort(domains->begin(), domains->end(),
+  std::sort(output->begin(), output->end(),
            [](const std::string& left, const std::string& right) {
              return url_pattern_index::CompareDomains(left, right) < 0;
            });

-  // Move the vector, because it isn't eligible for return value optimization.
-  return std::move(*domains);
+  return true;
 }

 }  // namespace
@@ -288,9 +295,11 @@ ParseResult IndexedRule::CreateIndexedRule(
    return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST;
  }

-  if (parsed_rule->condition.url_filter &&
-      parsed_rule->condition.url_filter->empty()) {
-    return ParseResult::ERROR_EMPTY_URL_FILTER;
+  if (parsed_rule->condition.url_filter) {
+    if (parsed_rule->condition.url_filter->empty())
+      return ParseResult::ERROR_EMPTY_URL_FILTER;
+    if (!base::IsStringASCII(*parsed_rule->condition.url_filter))
+      return ParseResult::ERROR_NON_ASCII_URL_FILTER;
  }

  indexed_rule->id = base::checked_cast<uint32_t>(parsed_rule->id);
@@ -306,10 +315,15 @@ ParseResult IndexedRule::CreateIndexedRule(
      return result;
  }

-  indexed_rule->domains =
-      CanonicalizeDomains(std::move(parsed_rule->condition.domains));
-  indexed_rule->excluded_domains =
-      CanonicalizeDomains(std::move(parsed_rule->condition.excluded_domains));
+  if (!CanonicalizeDomains(std::move(parsed_rule->condition.domains),
+                           &indexed_rule->domains)) {
+    return ParseResult::ERROR_NON_ASCII_DOMAIN;
+  }
+
+  if (!CanonicalizeDomains(std::move(parsed_rule->condition.excluded_domains),
+                           &indexed_rule->excluded_domains)) {
+    return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN;
+  }

  if (is_redirect_rule)
    indexed_rule->redirect_url = std::move(*parsed_rule->action.redirect_url);

--- a/extensions/browser/api/declarative_net_request/indexed_rule_unittest.cc
+++ b/extensions/browser/api/declarative_net_request/indexed_rule_unittest.cc
@@ -11,6 +11,7 @@
 #include "base/macros.h"
 #include "base/numerics/safe_conversions.h"
 #include "base/strings/stringprintf.h"
+#include "base/strings/utf_string_conversions.h"
 #include "components/version_info/version_info.h"
 #include "extensions/browser/api/declarative_net_request/constants.h"
 #include "extensions/common/api/declarative_net_request.h"
@@ -258,6 +259,11 @@ TEST_F(IndexedRuleTest, UrlFilterParsing) {
      {std::make_unique<std::string>("||google.com"),
       flat_rule::UrlPatternType_SUBSTRING, flat_rule::AnchorType_SUBDOMAIN,
       flat_rule::AnchorType_NONE, "google.com", ParseResult::SUCCESS},
+      // Url pattern with non-ascii characters -ⱴase.com.
+      {std::make_unique<std::string>(base::WideToUTF8(L"\x2c74"
+                                                      L"ase.com")),
+       flat_rule::UrlPatternType_SUBSTRING, flat_rule::AnchorType_NONE,
+       flat_rule::AnchorType_NONE, "", ParseResult::ERROR_NON_ASCII_URL_FILTER},
  };

  for (size_t i = 0; i < arraysize(cases); ++i) {
@@ -306,7 +312,30 @@ TEST_F(IndexedRuleTest, DomainsParsing) {
           DomainVec({"g.com", "XY.COM", "zzz.com", "a.com", "google.com"})),
       ParseResult::SUCCESS,
       {"a.com", "a.com", "b.com"},
-       {"google.com", "zzz.com", "xy.com", "a.com", "g.com"}}};
+       {"google.com", "zzz.com", "xy.com", "a.com", "g.com"}},
+      // Domain with non-ascii characters.
+      {std::make_unique<DomainVec>(
+           DomainVec({base::WideToUTF8(L"abc\x2010" /*hyphen*/ L"def.com")})),
+       nullptr,
+       ParseResult::ERROR_NON_ASCII_DOMAIN,
+       {},
+       {}},
+      // Excluded domain with non-ascii characters.
+      {nullptr,
+       std::make_unique<DomainVec>(
+           DomainVec({base::WideToUTF8(L"36\x00b0"
+                                       L"c.com" /*36°c.com*/)})),
+       ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN,
+       {},
+       {}},
+      // Internationalized domain in punycode.
+      {std::make_unique<DomainVec>(
+           DomainVec({"xn--36c-tfa.com" /* punycode for 36°c.com*/})),
+       nullptr,
+       ParseResult::SUCCESS,
+       {"xn--36c-tfa.com"},
+       {}},
+  };

  for (size_t i = 0; i < arraysize(cases); ++i) {
    SCOPED_TRACE(base::StringPrintf("Testing case[%" PRIuS "]", i));

--- a/extensions/browser/api/declarative_net_request/parse_info.cc
+++ b/extensions/browser/api/declarative_net_request/parse_info.cc
@@ -94,6 +94,21 @@ std::string ParseInfo::GetErrorDescription(
      error = ErrorUtils::FormatErrorMessage(kErrorListNotPassed,
                                             json_rules_filename);
      break;
+    case ParseResult::ERROR_NON_ASCII_URL_FILTER:
+      error = ErrorUtils::FormatErrorMessage(
+          kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
+          kUrlFilterKey);
+      break;
+    case ParseResult::ERROR_NON_ASCII_DOMAIN:
+      error = ErrorUtils::FormatErrorMessage(
+          kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
+          kDomainsKey);
+      break;
+    case ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN:
+      error = ErrorUtils::FormatErrorMessage(
+          kErrorNonAscii, json_rules_filename, std::to_string(*rule_index_),
+          kExcludedDomainsKey);
+      break;
  }
  return error;
 }

--- a/extensions/common/api/declarative_net_request.idl
+++ b/extensions/common/api/declarative_net_request.idl
@@ -58,6 +58,12 @@ namespace declarativeNetRequest {
    // Therefore urlFilter is composed of the following parts:
    // (optional Left/Domain name anchor) + pattern + (optional Right anchor)
    // If omitted, all urls are matched. An empty string is not allowed.
+    // Note: The urlFilter must be composed of only ASCII characters. This is
+    // matched against a url where the host is encoded in the punycode format
+    // (in case of internationalized domains) and any other non-ascii characters
+    // are url encoded in utf-8.
+    // For example, when the request url is http://abc.рф?q=ф, the urlFilter
+    // will be matched against the url http://abc.xn--p1ai/?q=%D1%84.
    DOMString? urlFilter;

    // Whether the |urlFilter| is case sensitive. Default is false.
@@ -67,12 +73,16 @@ namespace declarativeNetRequest {
    // |domains|. If the list is omitted, the rule is applied to requests from
    // all domains. An empty list is not allowed.
    // Note: sub-domains like "a.example.com" are also allowed.
+    // The entries must consist of only ascii characters. Use punycode encoding
+    // for internationalized domains.
    DOMString[]? domains;

    // The rule will not match network requests originating from the list of
    // |excludedDomains|. If the list is empty or omitted, no domains are
    // excluded. This takes precedence over |domains|.
    // Note: sub-domains like "a.example.com" are also allowed.
+    // The entries must consist of only ascii characters. Use punycode encoding
+    // for internationalized domains.
    DOMString[]? excludedDomains;

    // List of resource types which the rule can match. An empty list is not