DNR: Implement evaluation of regex rules.

Introduce RegexRulesMatcher which evaluates all regex rules within an extension ruleset. RulesetMatcher combines the results from ExtensionUrlPatternIndexMatcher and RegexRulesMatcher and is an abstraction over the complete ruleset. RegexRulesMatcher uses the FilteredRE2 class from the re2 library to achieve fast matching of a set of declarative regex rules against a request. BUG=974391 Doc=https://docs.google.com/document/d/1mRErUMII_gSSPaHmxyn31UOYWUaZLj0xOaezekxD2-Y/edit?usp=sharing (Internal only) Change-Id: I82d70fcf381aeec1d5a93af33a36db41e4704608 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1892132 Commit-Queue: Karan Bhatia <karandeepb@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Dominic Battré <battre@chromium.org> Cr-Commit-Position: refs/heads/master@{#718869}

DNR: Implement evaluation of regex rules.
Introduce RegexRulesMatcher which evaluates all regex rules within an extension ruleset. RulesetMatcher combines the results from ExtensionUrlPatternIndexMatcher and RegexRulesMatcher and is an abstraction over the complete ruleset. RegexRulesMatcher uses the FilteredRE2 class from the re2 library to achieve fast matching of a set of declarative regex rules against a request. BUG=974391 Doc=https://docs.google.com/document/d/1mRErUMII_gSSPaHmxyn31UOYWUaZLj0xOaezekxD2-Y/edit?usp=sharing (Internal only) Change-Id: I82d70fcf381aeec1d5a93af33a36db41e4704608 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1892132 Commit-Queue: Karan Bhatia <karandeepb@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Dominic Battré <battre@chromium.org> Cr-Commit-Position: refs/heads/master@{#718869}
1539dcc8 · Karan Bhatia · Commit Bot · b695ad2b · 1539dcc8 · 1539dcc8
Commit 1539dcc8 authored Nov 25, 2019 by Karan Bhatia Committed by Commit Bot Nov 25, 2019
19 changed files
--- a/chrome/browser/extensions/api/declarative_net_request/declarative_net_request_browsertest.cc
+++ b/chrome/browser/extensions/api/declarative_net_request/declarative_net_request_browsertest.cc
@@ -588,22 +588,26 @@ using DeclarativeNetRequestBrowserTest_Unpacked =
 #else
 #define MAYBE_BlockRequests_UrlFilter BlockRequests_UrlFilter
 #endif
-// Tests the "urlFilter" property of a declarative rule condition.
+// Tests the "urlFilter" and "regexFilter" property of a declarative rule
+// condition.
 IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
                       MAYBE_BlockRequests_UrlFilter) {
  struct {
-    std::string url_filter;
+    std::string filter;
    int id;
+    bool is_regex_rule;
  } rules_data[] = {
-      {"pages_with_script/*ex", 1},
+      {"pages_with_script/*ex", 1, false},
-      {"||a.b.com", 2},
+      {"||a.b.com", 2, false},
-      {"|http://*.us", 3},
+      {"|http://*.us", 3, false},
-      {"pages_with_script/page2.html|", 4},
+      {"pages_with_script/page2.html|", 4, false},
-      {"|http://msn*/pages_with_script/page.html|", 5},
+      {"|http://msn*/pages_with_script/page.html|", 5, false},
-      {"%20", 6},     // Block any urls with space.
+      {"%20", 6, false},     // Block any urls with space.
-      {"%C3%A9", 7},  // Percent-encoded non-ascii character é.
+      {"%C3%A9", 7, false},  // Percent-encoded non-ascii character é.
      // Internationalized domain "ⱴase.com" in punycode.
-      {"|http://xn--ase-7z0b.com", 8},
+      {"|http://xn--ase-7z0b.com", 8, false},
+      {R"((http|https)://(\w+\.){1,2}com.*reg$)", 9, true},
+      {R"(\d+\.google\.com)", 10, true},
  };
  // Rule |i| is the rule with id |i|.
@@ -632,14 +636,26 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
       false},  // Rule 7
      {base::WideToUTF8(L"\x2c74"
                        L"ase.com"),
-       "/pages_with_script/page.html", false},  // Rule 8
+       "/pages_with_script/page.html", false},                  // Rule 8
+      {"abc.com", "/pages_with_script/page2.html?reg", false},  // Rule 9
+      {"abc.com", "/pages_with_script/page2.html?reg1", true},
+      {"w1.w2.com", "/pages_with_script/page2.html?reg", false},  // Rule 9
+      {"w1.w2.w3.com", "/pages_with_script/page2.html?reg", true},
+      {"24.google.com", "/pages_with_script/page.html", false},  // Rule 10
+      {"xyz.google.com", "/pages_with_script/page.html", true},
  };
  // Load the extension.
  std::vector<TestRule> rules;
  for (const auto& rule_data : rules_data) {
    TestRule rule = CreateGenericRule();
-    rule.condition->url_filter = rule_data.url_filter;
+    rule.condition->url_filter.reset();
+    if (rule_data.is_regex_rule)
+      rule.condition->regex_filter = rule_data.filter;
+    else
+      rule.condition->url_filter = rule_data.filter;
    rule.condition->resource_types = std::vector<std::string>({"main_frame"});
    rule.id = rule_data.id;
    rules.push_back(rule);

--- a/components/url_matcher/string_pattern.cc
+++ b/components/url_matcher/string_pattern.cc
@@ -5,15 +5,18 @@
 #include "components/url_matcher/string_pattern.h"
 #include <tuple>
+#include <utility>
 namespace url_matcher {
-StringPattern::StringPattern(const std::string& pattern,
+StringPattern::StringPattern(std::string pattern, StringPattern::ID id)
-                             StringPattern::ID id)
+    : pattern_(std::move(pattern)), id_(id) {}
-    : pattern_(pattern), id_(id) {}
 StringPattern::~StringPattern() {}
+StringPattern::StringPattern(StringPattern&&) = default;
+StringPattern& StringPattern::operator=(StringPattern&&) = default;
 bool StringPattern::operator<(const StringPattern& rhs) const {
  return std::tie(id_, pattern_) < std::tie(rhs.id_, rhs.pattern_);
 }

--- a/components/url_matcher/string_pattern.h
+++ b/components/url_matcher/string_pattern.h
@@ -24,8 +24,10 @@ class URL_MATCHER_EXPORT StringPattern {
 public:
  typedef int ID;
-  StringPattern(const std::string& pattern, ID id);
+  StringPattern(std::string pattern, ID id);
  ~StringPattern();
+  StringPattern(StringPattern&&);
+  StringPattern& operator=(StringPattern&&);
  const std::string& pattern() const { return pattern_; }
  ID id() const { return id_; }

--- a/components/url_pattern_index/url_pattern_index.cc
+++ b/components/url_pattern_index/url_pattern_index.cc
@@ -572,73 +572,6 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin,
  return 0;
 }
-// Returns whether the |origin| matches the domain list of the |rule|. A match
-// means that the longest domain in |domains| that |origin| is a sub-domain of
-// is not an exception OR all the |domains| are exceptions and neither matches
-// the |origin|. Thus, domain filters with more domain components trump filters
-// with fewer domain components, i.e. the more specific a filter is, the higher
-// the priority.
-//
-// A rule whose domain list is empty or contains only negative domains is still
-// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
-// this function will always return false for such rules.
-bool DoesOriginMatchDomainList(const url::Origin& origin,
-                               const flat::UrlRule& rule,
-                               bool disable_generic_rules) {
-  const bool is_generic = !rule.domains_included();
-  DCHECK(is_generic || rule.domains_included()->size());
-  if (disable_generic_rules && is_generic)
-    return false;
-  // Unique |origin| matches lists of exception domains only.
-  if (origin.opaque())
-    return is_generic;
-  size_t longest_matching_included_domain_length = 1;
-  if (!is_generic) {
-    longest_matching_included_domain_length =
-        GetLongestMatchingSubdomain(origin, *rule.domains_included());
-  }
-  if (longest_matching_included_domain_length && rule.domains_excluded()) {
-    return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
-           longest_matching_included_domain_length;
-  }
-  return !!longest_matching_included_domain_length;
-}
-// Returns whether the request matches flags of the specified URL |rule|. Takes
-// into account:
-//  - |element_type| of the requested resource, if not *_NONE.
-//  - |activation_type| for a subdocument request, if not *_NONE.
-//  - Whether the resource |is_third_party| w.r.t. its embedding document.
-bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
-                        flat::ElementType element_type,
-                        flat::ActivationType activation_type,
-                        bool is_third_party) {
-  DCHECK((element_type == flat::ElementType_NONE) !=
-         (activation_type == flat::ActivationType_NONE));
-  if (element_type != flat::ElementType_NONE &&
-      !(rule.element_types() & element_type)) {
-    return false;
-  }
-  if (activation_type != flat::ActivationType_NONE &&
-      !(rule.activation_types() & activation_type)) {
-    return false;
-  }
-  if (is_third_party &&
-      !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
-    return false;
-  }
-  if (!is_third_party &&
-      !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
-    return false;
-  }
-  return true;
-}
 // |sorted_candidates| is sorted in descending order by priority. This returns
 // the first matching rule i.e. the rule with the highest priority in
 // |sorted_candidates| or null if no rule matches.
@@ -758,6 +691,58 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
 }  // namespace
+bool DoesOriginMatchDomainList(const url::Origin& origin,
+                               const flat::UrlRule& rule,
+                               bool disable_generic_rules) {
+  const bool is_generic = !rule.domains_included();
+  DCHECK(is_generic || rule.domains_included()->size());
+  if (disable_generic_rules && is_generic)
+    return false;
+  // Unique |origin| matches lists of exception domains only.
+  if (origin.opaque())
+    return is_generic;
+  size_t longest_matching_included_domain_length = 1;
+  if (!is_generic) {
+    longest_matching_included_domain_length =
+        GetLongestMatchingSubdomain(origin, *rule.domains_included());
+  }
+  if (longest_matching_included_domain_length && rule.domains_excluded()) {
+    return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
+           longest_matching_included_domain_length;
+  }
+  return !!longest_matching_included_domain_length;
+}
+bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
+                        flat::ElementType element_type,
+                        flat::ActivationType activation_type,
+                        bool is_third_party) {
+  DCHECK((element_type == flat::ElementType_NONE) !=
+         (activation_type == flat::ActivationType_NONE));
+  if (element_type != flat::ElementType_NONE &&
+      !(rule.element_types() & element_type)) {
+    return false;
+  }
+  if (activation_type != flat::ActivationType_NONE &&
+      !(rule.activation_types() & activation_type)) {
+    return false;
+  }
+  if (is_third_party &&
+      !(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
+    return false;
+  }
+  if (!is_third_party &&
+      !(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
+    return false;
+  }
+  return true;
+}
 UrlPatternIndexMatcher::UrlPatternIndexMatcher(
    const flat::UrlPatternIndex* flat_index)
    : flat_index_(flat_index) {

--- a/components/url_pattern_index/url_pattern_index.h
+++ b/components/url_pattern_index/url_pattern_index.h
@@ -128,6 +128,30 @@ class UrlPatternIndexBuilder {
  DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexBuilder);
 };
+// Returns whether the |origin| matches the domain list of the |rule|. A match
+// means that the longest domain in |domains| that |origin| is a sub-domain of
+// is not an exception OR all the |domains| are exceptions and neither matches
+// the |origin|. Thus, domain filters with more domain components trump filters
+// with fewer domain components, i.e. the more specific a filter is, the higher
+// the priority.
+//
+// A rule whose domain list is empty or contains only negative domains is still
+// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
+// this function will always return false for such rules.
+bool DoesOriginMatchDomainList(const url::Origin& origin,
+                               const flat::UrlRule& rule,
+                               bool disable_generic_rules);
+// Returns whether the request matches flags of the specified |rule|. Takes into
+// account:
+//  - |element_type| of the requested resource, if not *_NONE.
+//  - |activation_type| for a subdocument request, if not *_NONE.
+//  - Whether the resource |is_third_party| w.r.t. its embedding document.
+bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
+                        flat::ElementType element_type,
+                        flat::ActivationType activation_type,
+                        bool is_third_party);
 // Encapsulates a read-only index built over the URL patterns of a set of URL
 // rules, and provides fast matching of network requests against these rules.
 class UrlPatternIndexMatcher {

--- a/extensions/browser/api/declarative_net_request/BUILD.gn
+++ b/extensions/browser/api/declarative_net_request/BUILD.gn
@@ -22,6 +22,8 @@ source_set("declarative_net_request") {
    "indexed_rule.h",
    "parse_info.cc",
    "parse_info.h",
+    "regex_rules_matcher.cc",
+    "regex_rules_matcher.h",
    "request_action.cc",
    "request_action.h",
    "request_params.cc",
@@ -41,8 +43,10 @@ source_set("declarative_net_request") {
  ]
  public_deps = [
+    "//components/url_matcher",
    "//components/url_pattern_index",
    "//extensions/browser/api/declarative_net_request/flat:extension_ruleset",
+    "//third_party/re2",
  ]
  deps = [
@@ -53,7 +57,6 @@ source_set("declarative_net_request") {
    "//extensions/common",
    "//extensions/common/api",
    "//net",
-    "//third_party/re2",
    "//tools/json_schema_compiler:generated_api_util",
    "//url",
  ]

--- a/extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.cc
+++ b/extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.cc
@@ -37,7 +37,7 @@ std::vector<url_pattern_index::UrlPatternIndexMatcher> GetMatchers(
  return matchers;
 }
-bool HasAnyRules(const url_pattern_index::flat::UrlPatternIndex* index) {
+bool HasAnyRules(const flat_rule::UrlPatternIndex* index) {
  DCHECK(index);
  if (index->fallback_rules()->size() > 0)

--- a/extensions/browser/api/declarative_net_request/indexed_rule.cc
+++ b/extensions/browser/api/declarative_net_request/indexed_rule.cc
@@ -13,6 +13,7 @@
 #include "base/strings/string_util.h"
 #include "components/url_pattern_index/url_pattern_index.h"
 #include "extensions/browser/api/declarative_net_request/constants.h"
+#include "extensions/browser/api/declarative_net_request/utils.h"
 #include "extensions/common/api/declarative_net_request.h"
 #include "extensions/common/api/declarative_net_request/utils.h"
 #include "third_party/re2/src/re2/re2.h"
@@ -362,24 +363,11 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect,
 bool IsValidRegex(const dnr_api::Rule& parsed_rule) {
  DCHECK(parsed_rule.condition.regex_filter);
-  re2::RE2::Options options;
+  // TODO(karandeepb): Regex compilation can be expensive. Also, these need to
+  // be compiled again once the ruleset is loaded, which means duplicate work.
-  // RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
+  // We should maintain a global cache of compiled regexes.
-  // use Latin1 encoding. This should also be more efficient than UTF-8.
+  re2::RE2 regex(*parsed_rule.condition.regex_filter,
-  // Note: Latin1 is an 8 bit extension to ASCII.
+                 CreateRE2Options(IsCaseSensitive(parsed_rule)));
-  options.set_encoding(re2::RE2::Options::EncodingLatin1);
-  options.set_case_sensitive(IsCaseSensitive(parsed_rule));
-  // Don't capture unless needed, for efficiency.
-  // TODO(crbug.com/974391): Capturing should be supported for regex based
-  // substitutions which are not implemented yet.
-  options.set_never_capture(true);
-  // TODO(crbug.com/974391): Regex compilation can be expensive. Also, these
-  // need to be compiled again once the ruleset is loaded, which means duplicate
-  // work. We should maintain a global cache of compiled regexes.
-  re2::RE2 regex(*parsed_rule.condition.regex_filter, options);
  return regex.ok();
 }

--- a/extensions/browser/api/declarative_net_request/regex_rules_matcher.cc
+++ b/extensions/browser/api/declarative_net_request/regex_rules_matcher.cc
--- a/extensions/browser/api/declarative_net_request/regex_rules_matcher.h
+++ b/extensions/browser/api/declarative_net_request/regex_rules_matcher.h
+// Copyright 2019 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#ifndef EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
+#define EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
+#include "base/macros.h"
+#include "components/url_matcher/substring_set_matcher.h"
+#include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
+#include "third_party/re2/src/re2/filtered_re2.h"
+namespace extensions {
+namespace declarative_net_request {
+// Structure to hold a RegexRule together with its corresponding compiled
+// re2::Re2 object.
+struct RegexRuleInfo {
+  RegexRuleInfo(const flat::RegexRule* regex_rule, const re2::RE2* regex);
+  RegexRuleInfo(const RegexRuleInfo& info);
+  RegexRuleInfo& operator=(const RegexRuleInfo& info);
+  const flat::RegexRule* regex_rule;
+  const re2::RE2* regex;
+};
+// RegexRulesMatcher deals with matching of regular expression rules. It is an
+// implementation detail of RulesetMatcher. This uses the FilteredRE2 class from
+// the re2 library to achieve fast matching of a set of declarative regex rules
+// against a request. How this works:
+//
+// Initialization:
+// 1. During initialization, we add each regex to the FilteredRE2 class.
+// 2. We compile the FilteredRE2 object which returns us a set of substrings.
+//    These are stored in |filtered_re2_strings_to_match_| below. These are also
+//    added to |substring_matcher_| for use in #3 below.
+//
+// Matching
+// 3. Given a request url, we find the set of strings from #2. that are
+//    substrings of the request url. This uses the
+//    url_matcher::SubstringSetMatcher class which internally uses the
+//    Aho-Corasick algorithm.
+// 4. Given the list of matched strings from #3, FilteredRE2 returns the list
+//    of regexes (rules) that might potentially match. To reduce the number of
+//    regexes that need to be matched (since it's expensive), we prune the list
+//    even further by checking if the rule metadata matches the request.
+// 5. Given the list of potentially matching rules, we finally match the actual
+//    regexes against the request url, as required.
+class RegexRulesMatcher final : public RulesetMatcherInterface {
+ public:
+  using RegexRulesList =
+      ::flatbuffers::Vector<flatbuffers::Offset<flat::RegexRule>>;
+  RegexRulesMatcher(const ExtensionId& extension_id,
+                    api::declarative_net_request::SourceType source_type,
+                    const RegexRulesList* regex_list,
+                    const ExtensionMetadataList* metadata_list);
+  // RulesetMatcherInterface override:
+  ~RegexRulesMatcher() override;
+  base::Optional<RequestAction> GetBlockOrCollapseAction(
+      const RequestParams& params) const override;
+  base::Optional<RequestAction> GetAllowAction(
+      const RequestParams& params) const override;
+  base::Optional<RequestAction> GetRedirectAction(
+      const RequestParams& params) const override;
+  base::Optional<RequestAction> GetUpgradeAction(
+      const RequestParams& params) const override;
+  uint8_t GetRemoveHeadersMask(
+      const RequestParams& params,
+      uint8_t ignored_mask,
+      std::vector<RequestAction>* remove_headers_actions) const override;
+  bool IsExtraHeadersMatcher() const override {
+    return is_extra_headers_matcher_;
+  }
+ private:
+  // Helper to build the necessary data structures for matching.
+  void InitializeMatcher();
+  // Returns the highest priority matching rule for the given request |params|
+  // and action |type|, or null if no rules match.
+  const url_pattern_index::flat::UrlRule* GetHighestPriorityMatchingRule(
+      const RequestParams& params,
+      flat::ActionType type) const;
+  // Returns the potentially matching rules for the given request. A potentially
+  // matching rule is one whose metadata matches the given request |params| and
+  // which is not ruled out as a potential match by the |filtered_re2_| object.
+  // Note: The returned vector is sorted in descending order of rule priority.
+  const std::vector<RegexRuleInfo>& GetPotentialMatches(
+      const RequestParams& params) const;
+  // Pointers to flatbuffer indexed data. Guaranteed to be valid through the
+  // lifetime of the object.
+  const RegexRulesList* const regex_list_;
+  const ExtensionMetadataList* const metadata_list_;
+  const bool is_extra_headers_matcher_;
+  // Data structures used for matching. Initialized during construction in
+  // InitializeMatcher() and immutable for the rest of the object lifetime.
+  // This provides a pre-filtering mechanism, to reduce the number of regular
+  // expressions that are actually matched against a request.
+  re2::FilteredRE2 filtered_re2_;
+  // Map from re2 ID (as used by |filtered_re2_|) to the flat::RegexRule in
+  // |regex_list_|.
+  std::map<int, const flat::RegexRule*> re2_id_to_rules_map_;
+  // Candidate strings to match for each request, for pre-filtering. The ID of
+  // each url_matcher::StringPattern is its index within the vector. All the
+  // strings are lower-cased.
+  std::vector<url_matcher::StringPattern> filtered_re2_strings_to_match_;
+  // Structure for fast substring matching. Given a string S and a set of
+  // candidate strings, returns the sub-set of candidate strings that are a
+  // substring of S. Uses the Aho-Corasick algorithm internally.
+  url_matcher::SubstringSetMatcher substring_matcher_;
+  DISALLOW_COPY_AND_ASSIGN(RegexRulesMatcher);
+};
+}  // namespace declarative_net_request
+}  // namespace extensions
+#endif  // EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
--- a/extensions/browser/api/declarative_net_request/request_params.h
+++ b/extensions/browser/api/declarative_net_request/request_params.h
@@ -7,11 +7,12 @@
 #include "base/containers/flat_map.h"
 #include "base/macros.h"
+#include "base/optional.h"
 #include "components/url_pattern_index/url_pattern_index.h"
+#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
+#include "url/gurl.h"
 #include "url/origin.h"
-class GURL;
 namespace extensions {
 struct WebRequestInfo;
@@ -36,6 +37,14 @@ struct RequestParams {
  // a cache to prevent additional calls to GetAllowAction.
  mutable base::flat_map<const RulesetMatcher*, bool> allow_rule_cache;
+  // Lower cased url, used for regex matching. Cached for performance.
+  mutable base::Optional<std::string> lower_cased_url_spec;
+  // Map from RegexRulesMatcher to a vector of potential matches for this
+  // request. Cached for performance.
+  mutable base::flat_map<const RegexRulesMatcher*, std::vector<RegexRuleInfo>>
+      potential_regex_matches;
  // Pointer to the corresponding WebRequestInfo object. Outlives this struct.
  // Can be null for some unit tests.
  const WebRequestInfo* request_info = nullptr;

--- a/extensions/browser/api/declarative_net_request/ruleset_matcher.cc
+++ b/extensions/browser/api/declarative_net_request/ruleset_matcher.cc
@@ -21,6 +21,21 @@
 namespace extensions {
 namespace declarative_net_request {
+namespace {
+base::Optional<RequestAction> GetMaxPriorityAction(
+    base::Optional<RequestAction> lhs,
+    base::Optional<RequestAction> rhs) {
+  if (!lhs)
+    return rhs;
+  if (!rhs)
+    return lhs;
+  return lhs->rule_priority > rhs->rule_priority ? std::move(lhs)
+                                                 : std::move(rhs);
+}
+}  // namespace
 // static
 RulesetMatcher::LoadRulesetResult RulesetMatcher::CreateVerifiedMatcher(
    const RulesetSource& source,
@@ -65,17 +80,27 @@ RulesetMatcher::~RulesetMatcher() = default;
 base::Optional<RequestAction> RulesetMatcher::GetBlockOrCollapseAction(
    const RequestParams& params) const {
-  return url_pattern_index_matcher_.GetBlockOrCollapseAction(params);
+  base::Optional<RequestAction> action =
+      url_pattern_index_matcher_.GetBlockOrCollapseAction(params);
+  if (!action)
+    action = regex_matcher_.GetBlockOrCollapseAction(params);
+  return action;
 }
 base::Optional<RequestAction> RulesetMatcher::GetAllowAction(
    const RequestParams& params) const {
-  return url_pattern_index_matcher_.GetAllowAction(params);
+  base::Optional<RequestAction> action =
+      url_pattern_index_matcher_.GetAllowAction(params);
+  if (!action)
+    action = regex_matcher_.GetAllowAction(params);
+  return action;
 }
 base::Optional<RequestAction> RulesetMatcher::GetRedirectAction(
    const RequestParams& params) const {
-  return url_pattern_index_matcher_.GetRedirectAction(params);
+  return GetMaxPriorityAction(
+      url_pattern_index_matcher_.GetRedirectAction(params),
+      regex_matcher_.GetRedirectAction(params));
 }
 base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
@@ -83,34 +108,36 @@ base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
  if (!IsUpgradeableRequest(params))
    return base::nullopt;
-  return url_pattern_index_matcher_.GetUpgradeAction(params);
+  return GetMaxPriorityAction(
+      url_pattern_index_matcher_.GetUpgradeAction(params),
+      regex_matcher_.GetUpgradeAction(params));
 }
 uint8_t RulesetMatcher::GetRemoveHeadersMask(
    const RequestParams& params,
    uint8_t ignored_mask,
    std::vector<RequestAction>* remove_headers_actions) const {
-  return url_pattern_index_matcher_.GetRemoveHeadersMask(
+  DCHECK(remove_headers_actions);
+  static_assert(
+      flat::RemoveHeaderType_ANY <= std::numeric_limits<uint8_t>::max(),
+      "flat::RemoveHeaderType can't fit in a uint8_t");
+  uint8_t mask = url_pattern_index_matcher_.GetRemoveHeadersMask(
      params, ignored_mask, remove_headers_actions);
+  return mask | regex_matcher_.GetRemoveHeadersMask(params, ignored_mask | mask,
+                                                    remove_headers_actions);
 }
 bool RulesetMatcher::IsExtraHeadersMatcher() const {
-  return url_pattern_index_matcher_.IsExtraHeadersMatcher();
+  return url_pattern_index_matcher_.IsExtraHeadersMatcher() ||
+         regex_matcher_.IsExtraHeadersMatcher();
 }
 base::Optional<RequestAction>
 RulesetMatcher::GetRedirectOrUpgradeActionByPriority(
    const RequestParams& params) const {
-  base::Optional<RequestAction> redirect_action = GetRedirectAction(params);
+  return GetMaxPriorityAction(GetRedirectAction(params),
-  base::Optional<RequestAction> upgrade_action = GetUpgradeAction(params);
+                              GetUpgradeAction(params));
-  if (!redirect_action)
-    return upgrade_action;
-  if (!upgrade_action)
-    return redirect_action;
-  if (upgrade_action->rule_priority >= redirect_action->rule_priority)
-    return upgrade_action;
-  return redirect_action;
 }
 RulesetMatcher::RulesetMatcher(
@@ -127,7 +154,11 @@ RulesetMatcher::RulesetMatcher(
      url_pattern_index_matcher_(extension_id,
                                 source_type,
                                 root_->index_list(),
-                                 root_->extension_metadata()) {}
+                                 root_->extension_metadata()),
+      regex_matcher_(extension_id,
+                     source_type,
+                     root_->regex_rules(),
+                     root_->extension_metadata()) {}
 }  // namespace declarative_net_request
 }  // namespace extensions
--- a/extensions/browser/api/declarative_net_request/ruleset_matcher.h
+++ b/extensions/browser/api/declarative_net_request/ruleset_matcher.h
@@ -11,6 +11,7 @@
 #include "extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.h"
 #include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h"
+#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
 #include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
 namespace extensions {
@@ -26,7 +27,7 @@ struct UrlRuleMetadata;
 // RulesetMatcher encapsulates the Declarative Net Request API ruleset
 // corresponding to a single RulesetSource. Since this class is immutable, it is
 // thread-safe.
-class RulesetMatcher : public RulesetMatcherInterface {
+class RulesetMatcher final : public RulesetMatcherInterface {
 public:
  // Describes the result of creating a RulesetMatcher instance.
  // This is logged as part of UMA. Hence existing values should not be re-
@@ -65,7 +66,6 @@ class RulesetMatcher : public RulesetMatcherInterface {
  // RulesetMatcherInterface overrides:
  ~RulesetMatcher() override;
  base::Optional<RequestAction> GetBlockOrCollapseAction(
      const RequestParams& params) const override;
  base::Optional<RequestAction> GetAllowAction(
@@ -112,6 +112,9 @@ class RulesetMatcher : public RulesetMatcherInterface {
  // |url_pattern_index| component.
  const ExtensionUrlPatternIndexMatcher url_pattern_index_matcher_;
+  // Underlying matcher for regex rules.
+  const RegexRulesMatcher regex_matcher_;
  DISALLOW_COPY_AND_ASSIGN(RulesetMatcher);
 };

--- a/extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h
+++ b/extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h
@@ -28,13 +28,13 @@ class RulesetMatcherInterface {
  virtual ~RulesetMatcherInterface();
-  // Returns the ruleset's matching RequestAction with type |BLOCK| or
+  // Returns any matching RequestAction with type |BLOCK| or |COLLAPSE|, or
-  // |COLLAPSE|, or base::nullopt if the ruleset has no matching blocking rule.
+  // base::nullopt if the ruleset has no matching blocking rule.
  virtual base::Optional<RequestAction> GetBlockOrCollapseAction(
      const RequestParams& params) const = 0;
-  // Returns the ruleset's matching RequestAction with type |ALLOW| or
+  // Returns any matching RequestAction with type |ALLOW| or base::nullopt if
-  // base::nullopt if the ruleset has no matching allow rule.
+  // the ruleset has no matching allow rule.
  virtual base::Optional<RequestAction> GetAllowAction(
      const RequestParams& params) const = 0;
@@ -53,6 +53,7 @@ class RulesetMatcherInterface {
  // Returns the bitmask of headers to remove from the request. The bitmask
  // corresponds to flat::RemoveHeaderType. |ignored_mask| denotes the mask of
  // headers to be skipped for evaluation and is excluded in the return value.
+  // TODO(karandeepb): Rename |ignored_mask| to |excluded_remove_headers_mask|.
  virtual uint8_t GetRemoveHeadersMask(
      const RequestParams& params,
      uint8_t ignored_mask,

--- a/extensions/browser/api/declarative_net_request/ruleset_matcher_unittest.cc
+++ b/extensions/browser/api/declarative_net_request/ruleset_matcher_unittest.cc
--- a/extensions/browser/api/declarative_net_request/utils.cc
+++ b/extensions/browser/api/declarative_net_request/utils.cc
@@ -221,5 +221,23 @@ dnr_api::RequestDetails CreateRequestDetails(const WebRequestInfo& request) {
  return details;
 }
+re2::RE2::Options CreateRE2Options(bool is_case_sensitive) {
+  re2::RE2::Options options;
+  // RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
+  // use Latin1 encoding. This should also be more efficient than UTF-8.
+  // Note: Latin1 is an 8 bit extension to ASCII.
+  options.set_encoding(re2::RE2::Options::EncodingLatin1);
+  options.set_case_sensitive(is_case_sensitive);
+  // Don't capture unless needed, for efficiency.
+  // TODO(crbug.com/974391): Capturing should be supported for regex based
+  // substitutions which are not implemented yet.
+  options.set_never_capture(true);
+  return options;
+}
 }  // namespace declarative_net_request
 }  // namespace extensions
--- a/extensions/browser/api/declarative_net_request/utils.h
+++ b/extensions/browser/api/declarative_net_request/utils.h
@@ -16,6 +16,7 @@
 #include "base/optional.h"
 #include "extensions/browser/api/declarative_net_request/ruleset_source.h"
 #include "extensions/common/api/declarative_net_request.h"
+#include "third_party/re2/src/re2/re2.h"
 namespace base {
 class FilePath;
@@ -64,6 +65,9 @@ void LogReadDynamicRulesStatus(ReadJSONRulesResult::Status status);
 api::declarative_net_request::RequestDetails CreateRequestDetails(
    const WebRequestInfo& request);
+// Creates default RE2::Options.
+re2::RE2::Options CreateRE2Options(bool is_case_sensitive);
 }  // namespace declarative_net_request
 }  // namespace extensions

--- a/extensions/common/api/declarative_net_request/test_utils.cc
+++ b/extensions/common/api/declarative_net_request/test_utils.cc
@@ -66,6 +66,7 @@ TestRuleCondition& TestRuleCondition::operator=(const TestRuleCondition&) =
 std::unique_ptr<base::DictionaryValue> TestRuleCondition::ToValue() const {
  auto dict = std::make_unique<base::DictionaryValue>();
  SetValue(dict.get(), kUrlFilterKey, url_filter);
+  SetValue(dict.get(), kRegexFilterKey, regex_filter);
  SetValue(dict.get(), kIsUrlFilterCaseSensitiveKey,
           is_url_filter_case_sensitive);
  SetValue(dict.get(), kDomainsKey, domains);

--- a/extensions/common/api/declarative_net_request/test_utils.h
+++ b/extensions/common/api/declarative_net_request/test_utils.h
@@ -39,6 +39,7 @@ struct TestRuleCondition : public DictionarySource {
  TestRuleCondition& operator=(const TestRuleCondition&);
  base::Optional<std::string> url_filter;
+  base::Optional<std::string> regex_filter;
  base::Optional<bool> is_url_filter_case_sensitive;
  base::Optional<std::vector<std::string>> domains;
  base::Optional<std::vector<std::string>> excluded_domains;