Commit 1539dcc8 authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

DNR: Implement evaluation of regex rules.

Introduce RegexRulesMatcher which evaluates all regex rules within an extension
ruleset. RulesetMatcher combines the results from
ExtensionUrlPatternIndexMatcher and RegexRulesMatcher and is an abstraction over
the complete ruleset.

RegexRulesMatcher uses the FilteredRE2 class from the re2 library to achieve
fast matching of a set of declarative regex rules against a request.

BUG=974391
Doc=https://docs.google.com/document/d/1mRErUMII_gSSPaHmxyn31UOYWUaZLj0xOaezekxD2-Y/edit?usp=sharing (Internal only)

Change-Id: I82d70fcf381aeec1d5a93af33a36db41e4704608
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1892132
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Reviewed-by: default avatarDominic Battré <battre@chromium.org>
Cr-Commit-Position: refs/heads/master@{#718869}
parent b695ad2b
...@@ -588,22 +588,26 @@ using DeclarativeNetRequestBrowserTest_Unpacked = ...@@ -588,22 +588,26 @@ using DeclarativeNetRequestBrowserTest_Unpacked =
#else #else
#define MAYBE_BlockRequests_UrlFilter BlockRequests_UrlFilter #define MAYBE_BlockRequests_UrlFilter BlockRequests_UrlFilter
#endif #endif
// Tests the "urlFilter" property of a declarative rule condition. // Tests the "urlFilter" and "regexFilter" property of a declarative rule
// condition.
IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest, IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
MAYBE_BlockRequests_UrlFilter) { MAYBE_BlockRequests_UrlFilter) {
struct { struct {
std::string url_filter; std::string filter;
int id; int id;
bool is_regex_rule;
} rules_data[] = { } rules_data[] = {
{"pages_with_script/*ex", 1}, {"pages_with_script/*ex", 1, false},
{"||a.b.com", 2}, {"||a.b.com", 2, false},
{"|http://*.us", 3}, {"|http://*.us", 3, false},
{"pages_with_script/page2.html|", 4}, {"pages_with_script/page2.html|", 4, false},
{"|http://msn*/pages_with_script/page.html|", 5}, {"|http://msn*/pages_with_script/page.html|", 5, false},
{"%20", 6}, // Block any urls with space. {"%20", 6, false}, // Block any urls with space.
{"%C3%A9", 7}, // Percent-encoded non-ascii character é. {"%C3%A9", 7, false}, // Percent-encoded non-ascii character é.
// Internationalized domain "ⱴase.com" in punycode. // Internationalized domain "ⱴase.com" in punycode.
{"|http://xn--ase-7z0b.com", 8}, {"|http://xn--ase-7z0b.com", 8, false},
{R"((http|https)://(\w+\.){1,2}com.*reg$)", 9, true},
{R"(\d+\.google\.com)", 10, true},
}; };
// Rule |i| is the rule with id |i|. // Rule |i| is the rule with id |i|.
...@@ -632,14 +636,26 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest, ...@@ -632,14 +636,26 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
false}, // Rule 7 false}, // Rule 7
{base::WideToUTF8(L"\x2c74" {base::WideToUTF8(L"\x2c74"
L"ase.com"), L"ase.com"),
"/pages_with_script/page.html", false}, // Rule 8 "/pages_with_script/page.html", false}, // Rule 8
{"abc.com", "/pages_with_script/page2.html?reg", false}, // Rule 9
{"abc.com", "/pages_with_script/page2.html?reg1", true},
{"w1.w2.com", "/pages_with_script/page2.html?reg", false}, // Rule 9
{"w1.w2.w3.com", "/pages_with_script/page2.html?reg", true},
{"24.google.com", "/pages_with_script/page.html", false}, // Rule 10
{"xyz.google.com", "/pages_with_script/page.html", true},
}; };
// Load the extension. // Load the extension.
std::vector<TestRule> rules; std::vector<TestRule> rules;
for (const auto& rule_data : rules_data) { for (const auto& rule_data : rules_data) {
TestRule rule = CreateGenericRule(); TestRule rule = CreateGenericRule();
rule.condition->url_filter = rule_data.url_filter; rule.condition->url_filter.reset();
if (rule_data.is_regex_rule)
rule.condition->regex_filter = rule_data.filter;
else
rule.condition->url_filter = rule_data.filter;
rule.condition->resource_types = std::vector<std::string>({"main_frame"}); rule.condition->resource_types = std::vector<std::string>({"main_frame"});
rule.id = rule_data.id; rule.id = rule_data.id;
rules.push_back(rule); rules.push_back(rule);
......
...@@ -5,15 +5,18 @@ ...@@ -5,15 +5,18 @@
#include "components/url_matcher/string_pattern.h" #include "components/url_matcher/string_pattern.h"
#include <tuple> #include <tuple>
#include <utility>
namespace url_matcher { namespace url_matcher {
StringPattern::StringPattern(const std::string& pattern, StringPattern::StringPattern(std::string pattern, StringPattern::ID id)
StringPattern::ID id) : pattern_(std::move(pattern)), id_(id) {}
: pattern_(pattern), id_(id) {}
StringPattern::~StringPattern() {} StringPattern::~StringPattern() {}
StringPattern::StringPattern(StringPattern&&) = default;
StringPattern& StringPattern::operator=(StringPattern&&) = default;
bool StringPattern::operator<(const StringPattern& rhs) const { bool StringPattern::operator<(const StringPattern& rhs) const {
return std::tie(id_, pattern_) < std::tie(rhs.id_, rhs.pattern_); return std::tie(id_, pattern_) < std::tie(rhs.id_, rhs.pattern_);
} }
......
...@@ -24,8 +24,10 @@ class URL_MATCHER_EXPORT StringPattern { ...@@ -24,8 +24,10 @@ class URL_MATCHER_EXPORT StringPattern {
public: public:
typedef int ID; typedef int ID;
StringPattern(const std::string& pattern, ID id); StringPattern(std::string pattern, ID id);
~StringPattern(); ~StringPattern();
StringPattern(StringPattern&&);
StringPattern& operator=(StringPattern&&);
const std::string& pattern() const { return pattern_; } const std::string& pattern() const { return pattern_; }
ID id() const { return id_; } ID id() const { return id_; }
......
...@@ -572,73 +572,6 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin, ...@@ -572,73 +572,6 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin,
return 0; return 0;
} }
// Returns whether the |origin| matches the domain list of the |rule|. A match
// means that the longest domain in |domains| that |origin| is a sub-domain of
// is not an exception OR all the |domains| are exceptions and neither matches
// the |origin|. Thus, domain filters with more domain components trump filters
// with fewer domain components, i.e. the more specific a filter is, the higher
// the priority.
//
// A rule whose domain list is empty or contains only negative domains is still
// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
// this function will always return false for such rules.
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules) {
const bool is_generic = !rule.domains_included();
DCHECK(is_generic || rule.domains_included()->size());
if (disable_generic_rules && is_generic)
return false;
// Unique |origin| matches lists of exception domains only.
if (origin.opaque())
return is_generic;
size_t longest_matching_included_domain_length = 1;
if (!is_generic) {
longest_matching_included_domain_length =
GetLongestMatchingSubdomain(origin, *rule.domains_included());
}
if (longest_matching_included_domain_length && rule.domains_excluded()) {
return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
longest_matching_included_domain_length;
}
return !!longest_matching_included_domain_length;
}
// Returns whether the request matches flags of the specified URL |rule|. Takes
// into account:
// - |element_type| of the requested resource, if not *_NONE.
// - |activation_type| for a subdocument request, if not *_NONE.
// - Whether the resource |is_third_party| w.r.t. its embedding document.
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party) {
DCHECK((element_type == flat::ElementType_NONE) !=
(activation_type == flat::ActivationType_NONE));
if (element_type != flat::ElementType_NONE &&
!(rule.element_types() & element_type)) {
return false;
}
if (activation_type != flat::ActivationType_NONE &&
!(rule.activation_types() & activation_type)) {
return false;
}
if (is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
return false;
}
if (!is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
return false;
}
return true;
}
// |sorted_candidates| is sorted in descending order by priority. This returns // |sorted_candidates| is sorted in descending order by priority. This returns
// the first matching rule i.e. the rule with the highest priority in // the first matching rule i.e. the rule with the highest priority in
// |sorted_candidates| or null if no rule matches. // |sorted_candidates| or null if no rule matches.
...@@ -758,6 +691,58 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex( ...@@ -758,6 +691,58 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
} // namespace } // namespace
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules) {
const bool is_generic = !rule.domains_included();
DCHECK(is_generic || rule.domains_included()->size());
if (disable_generic_rules && is_generic)
return false;
// Unique |origin| matches lists of exception domains only.
if (origin.opaque())
return is_generic;
size_t longest_matching_included_domain_length = 1;
if (!is_generic) {
longest_matching_included_domain_length =
GetLongestMatchingSubdomain(origin, *rule.domains_included());
}
if (longest_matching_included_domain_length && rule.domains_excluded()) {
return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
longest_matching_included_domain_length;
}
return !!longest_matching_included_domain_length;
}
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party) {
DCHECK((element_type == flat::ElementType_NONE) !=
(activation_type == flat::ActivationType_NONE));
if (element_type != flat::ElementType_NONE &&
!(rule.element_types() & element_type)) {
return false;
}
if (activation_type != flat::ActivationType_NONE &&
!(rule.activation_types() & activation_type)) {
return false;
}
if (is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
return false;
}
if (!is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
return false;
}
return true;
}
UrlPatternIndexMatcher::UrlPatternIndexMatcher( UrlPatternIndexMatcher::UrlPatternIndexMatcher(
const flat::UrlPatternIndex* flat_index) const flat::UrlPatternIndex* flat_index)
: flat_index_(flat_index) { : flat_index_(flat_index) {
......
...@@ -128,6 +128,30 @@ class UrlPatternIndexBuilder { ...@@ -128,6 +128,30 @@ class UrlPatternIndexBuilder {
DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexBuilder); DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexBuilder);
}; };
// Returns whether the |origin| matches the domain list of the |rule|. A match
// means that the longest domain in |domains| that |origin| is a sub-domain of
// is not an exception OR all the |domains| are exceptions and neither matches
// the |origin|. Thus, domain filters with more domain components trump filters
// with fewer domain components, i.e. the more specific a filter is, the higher
// the priority.
//
// A rule whose domain list is empty or contains only negative domains is still
// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
// this function will always return false for such rules.
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules);
// Returns whether the request matches flags of the specified |rule|. Takes into
// account:
// - |element_type| of the requested resource, if not *_NONE.
// - |activation_type| for a subdocument request, if not *_NONE.
// - Whether the resource |is_third_party| w.r.t. its embedding document.
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party);
// Encapsulates a read-only index built over the URL patterns of a set of URL // Encapsulates a read-only index built over the URL patterns of a set of URL
// rules, and provides fast matching of network requests against these rules. // rules, and provides fast matching of network requests against these rules.
class UrlPatternIndexMatcher { class UrlPatternIndexMatcher {
......
...@@ -22,6 +22,8 @@ source_set("declarative_net_request") { ...@@ -22,6 +22,8 @@ source_set("declarative_net_request") {
"indexed_rule.h", "indexed_rule.h",
"parse_info.cc", "parse_info.cc",
"parse_info.h", "parse_info.h",
"regex_rules_matcher.cc",
"regex_rules_matcher.h",
"request_action.cc", "request_action.cc",
"request_action.h", "request_action.h",
"request_params.cc", "request_params.cc",
...@@ -41,8 +43,10 @@ source_set("declarative_net_request") { ...@@ -41,8 +43,10 @@ source_set("declarative_net_request") {
] ]
public_deps = [ public_deps = [
"//components/url_matcher",
"//components/url_pattern_index", "//components/url_pattern_index",
"//extensions/browser/api/declarative_net_request/flat:extension_ruleset", "//extensions/browser/api/declarative_net_request/flat:extension_ruleset",
"//third_party/re2",
] ]
deps = [ deps = [
...@@ -53,7 +57,6 @@ source_set("declarative_net_request") { ...@@ -53,7 +57,6 @@ source_set("declarative_net_request") {
"//extensions/common", "//extensions/common",
"//extensions/common/api", "//extensions/common/api",
"//net", "//net",
"//third_party/re2",
"//tools/json_schema_compiler:generated_api_util", "//tools/json_schema_compiler:generated_api_util",
"//url", "//url",
] ]
......
...@@ -37,7 +37,7 @@ std::vector<url_pattern_index::UrlPatternIndexMatcher> GetMatchers( ...@@ -37,7 +37,7 @@ std::vector<url_pattern_index::UrlPatternIndexMatcher> GetMatchers(
return matchers; return matchers;
} }
bool HasAnyRules(const url_pattern_index::flat::UrlPatternIndex* index) { bool HasAnyRules(const flat_rule::UrlPatternIndex* index) {
DCHECK(index); DCHECK(index);
if (index->fallback_rules()->size() > 0) if (index->fallback_rules()->size() > 0)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "components/url_pattern_index/url_pattern_index.h" #include "components/url_pattern_index/url_pattern_index.h"
#include "extensions/browser/api/declarative_net_request/constants.h" #include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/common/api/declarative_net_request.h" #include "extensions/common/api/declarative_net_request.h"
#include "extensions/common/api/declarative_net_request/utils.h" #include "extensions/common/api/declarative_net_request/utils.h"
#include "third_party/re2/src/re2/re2.h" #include "third_party/re2/src/re2/re2.h"
...@@ -362,24 +363,11 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect, ...@@ -362,24 +363,11 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect,
bool IsValidRegex(const dnr_api::Rule& parsed_rule) { bool IsValidRegex(const dnr_api::Rule& parsed_rule) {
DCHECK(parsed_rule.condition.regex_filter); DCHECK(parsed_rule.condition.regex_filter);
re2::RE2::Options options; // TODO(karandeepb): Regex compilation can be expensive. Also, these need to
// be compiled again once the ruleset is loaded, which means duplicate work.
// RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so // We should maintain a global cache of compiled regexes.
// use Latin1 encoding. This should also be more efficient than UTF-8. re2::RE2 regex(*parsed_rule.condition.regex_filter,
// Note: Latin1 is an 8 bit extension to ASCII. CreateRE2Options(IsCaseSensitive(parsed_rule)));
options.set_encoding(re2::RE2::Options::EncodingLatin1);
options.set_case_sensitive(IsCaseSensitive(parsed_rule));
// Don't capture unless needed, for efficiency.
// TODO(crbug.com/974391): Capturing should be supported for regex based
// substitutions which are not implemented yet.
options.set_never_capture(true);
// TODO(crbug.com/974391): Regex compilation can be expensive. Also, these
// need to be compiled again once the ruleset is loaded, which means duplicate
// work. We should maintain a global cache of compiled regexes.
re2::RE2 regex(*parsed_rule.condition.regex_filter, options);
return regex.ok(); return regex.ok();
} }
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
#define EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
#include "base/macros.h"
#include "components/url_matcher/substring_set_matcher.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
#include "third_party/re2/src/re2/filtered_re2.h"
namespace extensions {
namespace declarative_net_request {
// Structure to hold a RegexRule together with its corresponding compiled
// re2::Re2 object.
struct RegexRuleInfo {
RegexRuleInfo(const flat::RegexRule* regex_rule, const re2::RE2* regex);
RegexRuleInfo(const RegexRuleInfo& info);
RegexRuleInfo& operator=(const RegexRuleInfo& info);
const flat::RegexRule* regex_rule;
const re2::RE2* regex;
};
// RegexRulesMatcher deals with matching of regular expression rules. It is an
// implementation detail of RulesetMatcher. This uses the FilteredRE2 class from
// the re2 library to achieve fast matching of a set of declarative regex rules
// against a request. How this works:
//
// Initialization:
// 1. During initialization, we add each regex to the FilteredRE2 class.
// 2. We compile the FilteredRE2 object which returns us a set of substrings.
// These are stored in |filtered_re2_strings_to_match_| below. These are also
// added to |substring_matcher_| for use in #3 below.
//
// Matching
// 3. Given a request url, we find the set of strings from #2. that are
// substrings of the request url. This uses the
// url_matcher::SubstringSetMatcher class which internally uses the
// Aho-Corasick algorithm.
// 4. Given the list of matched strings from #3, FilteredRE2 returns the list
// of regexes (rules) that might potentially match. To reduce the number of
// regexes that need to be matched (since it's expensive), we prune the list
// even further by checking if the rule metadata matches the request.
// 5. Given the list of potentially matching rules, we finally match the actual
// regexes against the request url, as required.
class RegexRulesMatcher final : public RulesetMatcherInterface {
public:
using RegexRulesList =
::flatbuffers::Vector<flatbuffers::Offset<flat::RegexRule>>;
RegexRulesMatcher(const ExtensionId& extension_id,
api::declarative_net_request::SourceType source_type,
const RegexRulesList* regex_list,
const ExtensionMetadataList* metadata_list);
// RulesetMatcherInterface override:
~RegexRulesMatcher() override;
base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetAllowAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetRedirectAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetUpgradeAction(
const RequestParams& params) const override;
uint8_t GetRemoveHeadersMask(
const RequestParams& params,
uint8_t ignored_mask,
std::vector<RequestAction>* remove_headers_actions) const override;
bool IsExtraHeadersMatcher() const override {
return is_extra_headers_matcher_;
}
private:
// Helper to build the necessary data structures for matching.
void InitializeMatcher();
// Returns the highest priority matching rule for the given request |params|
// and action |type|, or null if no rules match.
const url_pattern_index::flat::UrlRule* GetHighestPriorityMatchingRule(
const RequestParams& params,
flat::ActionType type) const;
// Returns the potentially matching rules for the given request. A potentially
// matching rule is one whose metadata matches the given request |params| and
// which is not ruled out as a potential match by the |filtered_re2_| object.
// Note: The returned vector is sorted in descending order of rule priority.
const std::vector<RegexRuleInfo>& GetPotentialMatches(
const RequestParams& params) const;
// Pointers to flatbuffer indexed data. Guaranteed to be valid through the
// lifetime of the object.
const RegexRulesList* const regex_list_;
const ExtensionMetadataList* const metadata_list_;
const bool is_extra_headers_matcher_;
// Data structures used for matching. Initialized during construction in
// InitializeMatcher() and immutable for the rest of the object lifetime.
// This provides a pre-filtering mechanism, to reduce the number of regular
// expressions that are actually matched against a request.
re2::FilteredRE2 filtered_re2_;
// Map from re2 ID (as used by |filtered_re2_|) to the flat::RegexRule in
// |regex_list_|.
std::map<int, const flat::RegexRule*> re2_id_to_rules_map_;
// Candidate strings to match for each request, for pre-filtering. The ID of
// each url_matcher::StringPattern is its index within the vector. All the
// strings are lower-cased.
std::vector<url_matcher::StringPattern> filtered_re2_strings_to_match_;
// Structure for fast substring matching. Given a string S and a set of
// candidate strings, returns the sub-set of candidate strings that are a
// substring of S. Uses the Aho-Corasick algorithm internally.
url_matcher::SubstringSetMatcher substring_matcher_;
DISALLOW_COPY_AND_ASSIGN(RegexRulesMatcher);
};
} // namespace declarative_net_request
} // namespace extensions
#endif // EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
...@@ -7,11 +7,12 @@ ...@@ -7,11 +7,12 @@
#include "base/containers/flat_map.h" #include "base/containers/flat_map.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/optional.h"
#include "components/url_pattern_index/url_pattern_index.h" #include "components/url_pattern_index/url_pattern_index.h"
#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
#include "url/gurl.h"
#include "url/origin.h" #include "url/origin.h"
class GURL;
namespace extensions { namespace extensions {
struct WebRequestInfo; struct WebRequestInfo;
...@@ -36,6 +37,14 @@ struct RequestParams { ...@@ -36,6 +37,14 @@ struct RequestParams {
// a cache to prevent additional calls to GetAllowAction. // a cache to prevent additional calls to GetAllowAction.
mutable base::flat_map<const RulesetMatcher*, bool> allow_rule_cache; mutable base::flat_map<const RulesetMatcher*, bool> allow_rule_cache;
// Lower cased url, used for regex matching. Cached for performance.
mutable base::Optional<std::string> lower_cased_url_spec;
// Map from RegexRulesMatcher to a vector of potential matches for this
// request. Cached for performance.
mutable base::flat_map<const RegexRulesMatcher*, std::vector<RegexRuleInfo>>
potential_regex_matches;
// Pointer to the corresponding WebRequestInfo object. Outlives this struct. // Pointer to the corresponding WebRequestInfo object. Outlives this struct.
// Can be null for some unit tests. // Can be null for some unit tests.
const WebRequestInfo* request_info = nullptr; const WebRequestInfo* request_info = nullptr;
......
...@@ -21,6 +21,21 @@ ...@@ -21,6 +21,21 @@
namespace extensions { namespace extensions {
namespace declarative_net_request { namespace declarative_net_request {
namespace {
base::Optional<RequestAction> GetMaxPriorityAction(
base::Optional<RequestAction> lhs,
base::Optional<RequestAction> rhs) {
if (!lhs)
return rhs;
if (!rhs)
return lhs;
return lhs->rule_priority > rhs->rule_priority ? std::move(lhs)
: std::move(rhs);
}
} // namespace
// static // static
RulesetMatcher::LoadRulesetResult RulesetMatcher::CreateVerifiedMatcher( RulesetMatcher::LoadRulesetResult RulesetMatcher::CreateVerifiedMatcher(
const RulesetSource& source, const RulesetSource& source,
...@@ -65,17 +80,27 @@ RulesetMatcher::~RulesetMatcher() = default; ...@@ -65,17 +80,27 @@ RulesetMatcher::~RulesetMatcher() = default;
base::Optional<RequestAction> RulesetMatcher::GetBlockOrCollapseAction( base::Optional<RequestAction> RulesetMatcher::GetBlockOrCollapseAction(
const RequestParams& params) const { const RequestParams& params) const {
return url_pattern_index_matcher_.GetBlockOrCollapseAction(params); base::Optional<RequestAction> action =
url_pattern_index_matcher_.GetBlockOrCollapseAction(params);
if (!action)
action = regex_matcher_.GetBlockOrCollapseAction(params);
return action;
} }
base::Optional<RequestAction> RulesetMatcher::GetAllowAction( base::Optional<RequestAction> RulesetMatcher::GetAllowAction(
const RequestParams& params) const { const RequestParams& params) const {
return url_pattern_index_matcher_.GetAllowAction(params); base::Optional<RequestAction> action =
url_pattern_index_matcher_.GetAllowAction(params);
if (!action)
action = regex_matcher_.GetAllowAction(params);
return action;
} }
base::Optional<RequestAction> RulesetMatcher::GetRedirectAction( base::Optional<RequestAction> RulesetMatcher::GetRedirectAction(
const RequestParams& params) const { const RequestParams& params) const {
return url_pattern_index_matcher_.GetRedirectAction(params); return GetMaxPriorityAction(
url_pattern_index_matcher_.GetRedirectAction(params),
regex_matcher_.GetRedirectAction(params));
} }
base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction( base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
...@@ -83,34 +108,36 @@ base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction( ...@@ -83,34 +108,36 @@ base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
if (!IsUpgradeableRequest(params)) if (!IsUpgradeableRequest(params))
return base::nullopt; return base::nullopt;
return url_pattern_index_matcher_.GetUpgradeAction(params); return GetMaxPriorityAction(
url_pattern_index_matcher_.GetUpgradeAction(params),
regex_matcher_.GetUpgradeAction(params));
} }
uint8_t RulesetMatcher::GetRemoveHeadersMask( uint8_t RulesetMatcher::GetRemoveHeadersMask(
const RequestParams& params, const RequestParams& params,
uint8_t ignored_mask, uint8_t ignored_mask,
std::vector<RequestAction>* remove_headers_actions) const { std::vector<RequestAction>* remove_headers_actions) const {
return url_pattern_index_matcher_.GetRemoveHeadersMask( DCHECK(remove_headers_actions);
static_assert(
flat::RemoveHeaderType_ANY <= std::numeric_limits<uint8_t>::max(),
"flat::RemoveHeaderType can't fit in a uint8_t");
uint8_t mask = url_pattern_index_matcher_.GetRemoveHeadersMask(
params, ignored_mask, remove_headers_actions); params, ignored_mask, remove_headers_actions);
return mask | regex_matcher_.GetRemoveHeadersMask(params, ignored_mask | mask,
remove_headers_actions);
} }
bool RulesetMatcher::IsExtraHeadersMatcher() const { bool RulesetMatcher::IsExtraHeadersMatcher() const {
return url_pattern_index_matcher_.IsExtraHeadersMatcher(); return url_pattern_index_matcher_.IsExtraHeadersMatcher() ||
regex_matcher_.IsExtraHeadersMatcher();
} }
base::Optional<RequestAction> base::Optional<RequestAction>
RulesetMatcher::GetRedirectOrUpgradeActionByPriority( RulesetMatcher::GetRedirectOrUpgradeActionByPriority(
const RequestParams& params) const { const RequestParams& params) const {
base::Optional<RequestAction> redirect_action = GetRedirectAction(params); return GetMaxPriorityAction(GetRedirectAction(params),
base::Optional<RequestAction> upgrade_action = GetUpgradeAction(params); GetUpgradeAction(params));
if (!redirect_action)
return upgrade_action;
if (!upgrade_action)
return redirect_action;
if (upgrade_action->rule_priority >= redirect_action->rule_priority)
return upgrade_action;
return redirect_action;
} }
RulesetMatcher::RulesetMatcher( RulesetMatcher::RulesetMatcher(
...@@ -127,7 +154,11 @@ RulesetMatcher::RulesetMatcher( ...@@ -127,7 +154,11 @@ RulesetMatcher::RulesetMatcher(
url_pattern_index_matcher_(extension_id, url_pattern_index_matcher_(extension_id,
source_type, source_type,
root_->index_list(), root_->index_list(),
root_->extension_metadata()) {} root_->extension_metadata()),
regex_matcher_(extension_id,
source_type,
root_->regex_rules(),
root_->extension_metadata()) {}
} // namespace declarative_net_request } // namespace declarative_net_request
} // namespace extensions } // namespace extensions
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.h" #include "extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.h"
#include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h" #include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h"
#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h" #include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
namespace extensions { namespace extensions {
...@@ -26,7 +27,7 @@ struct UrlRuleMetadata; ...@@ -26,7 +27,7 @@ struct UrlRuleMetadata;
// RulesetMatcher encapsulates the Declarative Net Request API ruleset // RulesetMatcher encapsulates the Declarative Net Request API ruleset
// corresponding to a single RulesetSource. Since this class is immutable, it is // corresponding to a single RulesetSource. Since this class is immutable, it is
// thread-safe. // thread-safe.
class RulesetMatcher : public RulesetMatcherInterface { class RulesetMatcher final : public RulesetMatcherInterface {
public: public:
// Describes the result of creating a RulesetMatcher instance. // Describes the result of creating a RulesetMatcher instance.
// This is logged as part of UMA. Hence existing values should not be re- // This is logged as part of UMA. Hence existing values should not be re-
...@@ -65,7 +66,6 @@ class RulesetMatcher : public RulesetMatcherInterface { ...@@ -65,7 +66,6 @@ class RulesetMatcher : public RulesetMatcherInterface {
// RulesetMatcherInterface overrides: // RulesetMatcherInterface overrides:
~RulesetMatcher() override; ~RulesetMatcher() override;
base::Optional<RequestAction> GetBlockOrCollapseAction( base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const override; const RequestParams& params) const override;
base::Optional<RequestAction> GetAllowAction( base::Optional<RequestAction> GetAllowAction(
...@@ -112,6 +112,9 @@ class RulesetMatcher : public RulesetMatcherInterface { ...@@ -112,6 +112,9 @@ class RulesetMatcher : public RulesetMatcherInterface {
// |url_pattern_index| component. // |url_pattern_index| component.
const ExtensionUrlPatternIndexMatcher url_pattern_index_matcher_; const ExtensionUrlPatternIndexMatcher url_pattern_index_matcher_;
// Underlying matcher for regex rules.
const RegexRulesMatcher regex_matcher_;
DISALLOW_COPY_AND_ASSIGN(RulesetMatcher); DISALLOW_COPY_AND_ASSIGN(RulesetMatcher);
}; };
......
...@@ -28,13 +28,13 @@ class RulesetMatcherInterface { ...@@ -28,13 +28,13 @@ class RulesetMatcherInterface {
virtual ~RulesetMatcherInterface(); virtual ~RulesetMatcherInterface();
// Returns the ruleset's matching RequestAction with type |BLOCK| or // Returns any matching RequestAction with type |BLOCK| or |COLLAPSE|, or
// |COLLAPSE|, or base::nullopt if the ruleset has no matching blocking rule. // base::nullopt if the ruleset has no matching blocking rule.
virtual base::Optional<RequestAction> GetBlockOrCollapseAction( virtual base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const = 0; const RequestParams& params) const = 0;
// Returns the ruleset's matching RequestAction with type |ALLOW| or // Returns any matching RequestAction with type |ALLOW| or base::nullopt if
// base::nullopt if the ruleset has no matching allow rule. // the ruleset has no matching allow rule.
virtual base::Optional<RequestAction> GetAllowAction( virtual base::Optional<RequestAction> GetAllowAction(
const RequestParams& params) const = 0; const RequestParams& params) const = 0;
...@@ -53,6 +53,7 @@ class RulesetMatcherInterface { ...@@ -53,6 +53,7 @@ class RulesetMatcherInterface {
// Returns the bitmask of headers to remove from the request. The bitmask // Returns the bitmask of headers to remove from the request. The bitmask
// corresponds to flat::RemoveHeaderType. |ignored_mask| denotes the mask of // corresponds to flat::RemoveHeaderType. |ignored_mask| denotes the mask of
// headers to be skipped for evaluation and is excluded in the return value. // headers to be skipped for evaluation and is excluded in the return value.
// TODO(karandeepb): Rename |ignored_mask| to |excluded_remove_headers_mask|.
virtual uint8_t GetRemoveHeadersMask( virtual uint8_t GetRemoveHeadersMask(
const RequestParams& params, const RequestParams& params,
uint8_t ignored_mask, uint8_t ignored_mask,
......
...@@ -221,5 +221,23 @@ dnr_api::RequestDetails CreateRequestDetails(const WebRequestInfo& request) { ...@@ -221,5 +221,23 @@ dnr_api::RequestDetails CreateRequestDetails(const WebRequestInfo& request) {
return details; return details;
} }
re2::RE2::Options CreateRE2Options(bool is_case_sensitive) {
re2::RE2::Options options;
// RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
// use Latin1 encoding. This should also be more efficient than UTF-8.
// Note: Latin1 is an 8 bit extension to ASCII.
options.set_encoding(re2::RE2::Options::EncodingLatin1);
options.set_case_sensitive(is_case_sensitive);
// Don't capture unless needed, for efficiency.
// TODO(crbug.com/974391): Capturing should be supported for regex based
// substitutions which are not implemented yet.
options.set_never_capture(true);
return options;
}
} // namespace declarative_net_request } // namespace declarative_net_request
} // namespace extensions } // namespace extensions
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "base/optional.h" #include "base/optional.h"
#include "extensions/browser/api/declarative_net_request/ruleset_source.h" #include "extensions/browser/api/declarative_net_request/ruleset_source.h"
#include "extensions/common/api/declarative_net_request.h" #include "extensions/common/api/declarative_net_request.h"
#include "third_party/re2/src/re2/re2.h"
namespace base { namespace base {
class FilePath; class FilePath;
...@@ -64,6 +65,9 @@ void LogReadDynamicRulesStatus(ReadJSONRulesResult::Status status); ...@@ -64,6 +65,9 @@ void LogReadDynamicRulesStatus(ReadJSONRulesResult::Status status);
api::declarative_net_request::RequestDetails CreateRequestDetails( api::declarative_net_request::RequestDetails CreateRequestDetails(
const WebRequestInfo& request); const WebRequestInfo& request);
// Creates default RE2::Options.
re2::RE2::Options CreateRE2Options(bool is_case_sensitive);
} // namespace declarative_net_request } // namespace declarative_net_request
} // namespace extensions } // namespace extensions
......
...@@ -66,6 +66,7 @@ TestRuleCondition& TestRuleCondition::operator=(const TestRuleCondition&) = ...@@ -66,6 +66,7 @@ TestRuleCondition& TestRuleCondition::operator=(const TestRuleCondition&) =
std::unique_ptr<base::DictionaryValue> TestRuleCondition::ToValue() const { std::unique_ptr<base::DictionaryValue> TestRuleCondition::ToValue() const {
auto dict = std::make_unique<base::DictionaryValue>(); auto dict = std::make_unique<base::DictionaryValue>();
SetValue(dict.get(), kUrlFilterKey, url_filter); SetValue(dict.get(), kUrlFilterKey, url_filter);
SetValue(dict.get(), kRegexFilterKey, regex_filter);
SetValue(dict.get(), kIsUrlFilterCaseSensitiveKey, SetValue(dict.get(), kIsUrlFilterCaseSensitiveKey,
is_url_filter_case_sensitive); is_url_filter_case_sensitive);
SetValue(dict.get(), kDomainsKey, domains); SetValue(dict.get(), kDomainsKey, domains);
......
...@@ -39,6 +39,7 @@ struct TestRuleCondition : public DictionarySource { ...@@ -39,6 +39,7 @@ struct TestRuleCondition : public DictionarySource {
TestRuleCondition& operator=(const TestRuleCondition&); TestRuleCondition& operator=(const TestRuleCondition&);
base::Optional<std::string> url_filter; base::Optional<std::string> url_filter;
base::Optional<std::string> regex_filter;
base::Optional<bool> is_url_filter_case_sensitive; base::Optional<bool> is_url_filter_case_sensitive;
base::Optional<std::vector<std::string>> domains; base::Optional<std::vector<std::string>> domains;
base::Optional<std::vector<std::string>> excluded_domains; base::Optional<std::vector<std::string>> excluded_domains;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment