Commit 1539dcc8 authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

DNR: Implement evaluation of regex rules.

Introduce RegexRulesMatcher which evaluates all regex rules within an extension
ruleset. RulesetMatcher combines the results from
ExtensionUrlPatternIndexMatcher and RegexRulesMatcher and is an abstraction over
the complete ruleset.

RegexRulesMatcher uses the FilteredRE2 class from the re2 library to achieve
fast matching of a set of declarative regex rules against a request.

BUG=974391
Doc=https://docs.google.com/document/d/1mRErUMII_gSSPaHmxyn31UOYWUaZLj0xOaezekxD2-Y/edit?usp=sharing (Internal only)

Change-Id: I82d70fcf381aeec1d5a93af33a36db41e4704608
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1892132
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Reviewed-by: default avatarDominic Battré <battre@chromium.org>
Cr-Commit-Position: refs/heads/master@{#718869}
parent b695ad2b
......@@ -588,22 +588,26 @@ using DeclarativeNetRequestBrowserTest_Unpacked =
#else
#define MAYBE_BlockRequests_UrlFilter BlockRequests_UrlFilter
#endif
// Tests the "urlFilter" property of a declarative rule condition.
// Tests the "urlFilter" and "regexFilter" property of a declarative rule
// condition.
IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
MAYBE_BlockRequests_UrlFilter) {
struct {
std::string url_filter;
std::string filter;
int id;
bool is_regex_rule;
} rules_data[] = {
{"pages_with_script/*ex", 1},
{"||a.b.com", 2},
{"|http://*.us", 3},
{"pages_with_script/page2.html|", 4},
{"|http://msn*/pages_with_script/page.html|", 5},
{"%20", 6}, // Block any urls with space.
{"%C3%A9", 7}, // Percent-encoded non-ascii character é.
{"pages_with_script/*ex", 1, false},
{"||a.b.com", 2, false},
{"|http://*.us", 3, false},
{"pages_with_script/page2.html|", 4, false},
{"|http://msn*/pages_with_script/page.html|", 5, false},
{"%20", 6, false}, // Block any urls with space.
{"%C3%A9", 7, false}, // Percent-encoded non-ascii character é.
// Internationalized domain "ⱴase.com" in punycode.
{"|http://xn--ase-7z0b.com", 8},
{"|http://xn--ase-7z0b.com", 8, false},
{R"((http|https)://(\w+\.){1,2}com.*reg$)", 9, true},
{R"(\d+\.google\.com)", 10, true},
};
// Rule |i| is the rule with id |i|.
......@@ -632,14 +636,26 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
false}, // Rule 7
{base::WideToUTF8(L"\x2c74"
L"ase.com"),
"/pages_with_script/page.html", false}, // Rule 8
"/pages_with_script/page.html", false}, // Rule 8
{"abc.com", "/pages_with_script/page2.html?reg", false}, // Rule 9
{"abc.com", "/pages_with_script/page2.html?reg1", true},
{"w1.w2.com", "/pages_with_script/page2.html?reg", false}, // Rule 9
{"w1.w2.w3.com", "/pages_with_script/page2.html?reg", true},
{"24.google.com", "/pages_with_script/page.html", false}, // Rule 10
{"xyz.google.com", "/pages_with_script/page.html", true},
};
// Load the extension.
std::vector<TestRule> rules;
for (const auto& rule_data : rules_data) {
TestRule rule = CreateGenericRule();
rule.condition->url_filter = rule_data.url_filter;
rule.condition->url_filter.reset();
if (rule_data.is_regex_rule)
rule.condition->regex_filter = rule_data.filter;
else
rule.condition->url_filter = rule_data.filter;
rule.condition->resource_types = std::vector<std::string>({"main_frame"});
rule.id = rule_data.id;
rules.push_back(rule);
......
......@@ -5,15 +5,18 @@
#include "components/url_matcher/string_pattern.h"
#include <tuple>
#include <utility>
namespace url_matcher {
StringPattern::StringPattern(const std::string& pattern,
StringPattern::ID id)
: pattern_(pattern), id_(id) {}
StringPattern::StringPattern(std::string pattern, StringPattern::ID id)
: pattern_(std::move(pattern)), id_(id) {}
StringPattern::~StringPattern() {}
StringPattern::StringPattern(StringPattern&&) = default;
StringPattern& StringPattern::operator=(StringPattern&&) = default;
bool StringPattern::operator<(const StringPattern& rhs) const {
return std::tie(id_, pattern_) < std::tie(rhs.id_, rhs.pattern_);
}
......
......@@ -24,8 +24,10 @@ class URL_MATCHER_EXPORT StringPattern {
public:
typedef int ID;
StringPattern(const std::string& pattern, ID id);
StringPattern(std::string pattern, ID id);
~StringPattern();
StringPattern(StringPattern&&);
StringPattern& operator=(StringPattern&&);
const std::string& pattern() const { return pattern_; }
ID id() const { return id_; }
......
......@@ -572,73 +572,6 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin,
return 0;
}
// Returns whether the |origin| matches the domain list of the |rule|. A match
// means that the longest domain in |domains| that |origin| is a sub-domain of
// is not an exception OR all the |domains| are exceptions and neither matches
// the |origin|. Thus, domain filters with more domain components trump filters
// with fewer domain components, i.e. the more specific a filter is, the higher
// the priority.
//
// A rule whose domain list is empty or contains only negative domains is still
// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
// this function will always return false for such rules.
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules) {
const bool is_generic = !rule.domains_included();
DCHECK(is_generic || rule.domains_included()->size());
if (disable_generic_rules && is_generic)
return false;
// Unique |origin| matches lists of exception domains only.
if (origin.opaque())
return is_generic;
size_t longest_matching_included_domain_length = 1;
if (!is_generic) {
longest_matching_included_domain_length =
GetLongestMatchingSubdomain(origin, *rule.domains_included());
}
if (longest_matching_included_domain_length && rule.domains_excluded()) {
return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
longest_matching_included_domain_length;
}
return !!longest_matching_included_domain_length;
}
// Returns whether the request matches flags of the specified URL |rule|. Takes
// into account:
// - |element_type| of the requested resource, if not *_NONE.
// - |activation_type| for a subdocument request, if not *_NONE.
// - Whether the resource |is_third_party| w.r.t. its embedding document.
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party) {
DCHECK((element_type == flat::ElementType_NONE) !=
(activation_type == flat::ActivationType_NONE));
if (element_type != flat::ElementType_NONE &&
!(rule.element_types() & element_type)) {
return false;
}
if (activation_type != flat::ActivationType_NONE &&
!(rule.activation_types() & activation_type)) {
return false;
}
if (is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
return false;
}
if (!is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
return false;
}
return true;
}
// |sorted_candidates| is sorted in descending order by priority. This returns
// the first matching rule i.e. the rule with the highest priority in
// |sorted_candidates| or null if no rule matches.
......@@ -758,6 +691,58 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
} // namespace
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules) {
const bool is_generic = !rule.domains_included();
DCHECK(is_generic || rule.domains_included()->size());
if (disable_generic_rules && is_generic)
return false;
// Unique |origin| matches lists of exception domains only.
if (origin.opaque())
return is_generic;
size_t longest_matching_included_domain_length = 1;
if (!is_generic) {
longest_matching_included_domain_length =
GetLongestMatchingSubdomain(origin, *rule.domains_included());
}
if (longest_matching_included_domain_length && rule.domains_excluded()) {
return GetLongestMatchingSubdomain(origin, *rule.domains_excluded()) <
longest_matching_included_domain_length;
}
return !!longest_matching_included_domain_length;
}
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party) {
DCHECK((element_type == flat::ElementType_NONE) !=
(activation_type == flat::ActivationType_NONE));
if (element_type != flat::ElementType_NONE &&
!(rule.element_types() & element_type)) {
return false;
}
if (activation_type != flat::ActivationType_NONE &&
!(rule.activation_types() & activation_type)) {
return false;
}
if (is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_THIRD_PARTY)) {
return false;
}
if (!is_third_party &&
!(rule.options() & flat::OptionFlag_APPLIES_TO_FIRST_PARTY)) {
return false;
}
return true;
}
UrlPatternIndexMatcher::UrlPatternIndexMatcher(
const flat::UrlPatternIndex* flat_index)
: flat_index_(flat_index) {
......
......@@ -128,6 +128,30 @@ class UrlPatternIndexBuilder {
DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexBuilder);
};
// Returns whether the |origin| matches the domain list of the |rule|. A match
// means that the longest domain in |domains| that |origin| is a sub-domain of
// is not an exception OR all the |domains| are exceptions and neither matches
// the |origin|. Thus, domain filters with more domain components trump filters
// with fewer domain components, i.e. the more specific a filter is, the higher
// the priority.
//
// A rule whose domain list is empty or contains only negative domains is still
// considered a "generic" rule. Therefore, if |disable_generic_rules| is set,
// this function will always return false for such rules.
bool DoesOriginMatchDomainList(const url::Origin& origin,
const flat::UrlRule& rule,
bool disable_generic_rules);
// Returns whether the request matches flags of the specified |rule|. Takes into
// account:
// - |element_type| of the requested resource, if not *_NONE.
// - |activation_type| for a subdocument request, if not *_NONE.
// - Whether the resource |is_third_party| w.r.t. its embedding document.
bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party);
// Encapsulates a read-only index built over the URL patterns of a set of URL
// rules, and provides fast matching of network requests against these rules.
class UrlPatternIndexMatcher {
......
......@@ -22,6 +22,8 @@ source_set("declarative_net_request") {
"indexed_rule.h",
"parse_info.cc",
"parse_info.h",
"regex_rules_matcher.cc",
"regex_rules_matcher.h",
"request_action.cc",
"request_action.h",
"request_params.cc",
......@@ -41,8 +43,10 @@ source_set("declarative_net_request") {
]
public_deps = [
"//components/url_matcher",
"//components/url_pattern_index",
"//extensions/browser/api/declarative_net_request/flat:extension_ruleset",
"//third_party/re2",
]
deps = [
......@@ -53,7 +57,6 @@ source_set("declarative_net_request") {
"//extensions/common",
"//extensions/common/api",
"//net",
"//third_party/re2",
"//tools/json_schema_compiler:generated_api_util",
"//url",
]
......
......@@ -37,7 +37,7 @@ std::vector<url_pattern_index::UrlPatternIndexMatcher> GetMatchers(
return matchers;
}
bool HasAnyRules(const url_pattern_index::flat::UrlPatternIndex* index) {
bool HasAnyRules(const flat_rule::UrlPatternIndex* index) {
DCHECK(index);
if (index->fallback_rules()->size() > 0)
......
......@@ -13,6 +13,7 @@
#include "base/strings/string_util.h"
#include "components/url_pattern_index/url_pattern_index.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/common/api/declarative_net_request.h"
#include "extensions/common/api/declarative_net_request/utils.h"
#include "third_party/re2/src/re2/re2.h"
......@@ -362,24 +363,11 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect,
bool IsValidRegex(const dnr_api::Rule& parsed_rule) {
DCHECK(parsed_rule.condition.regex_filter);
re2::RE2::Options options;
// RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
// use Latin1 encoding. This should also be more efficient than UTF-8.
// Note: Latin1 is an 8 bit extension to ASCII.
options.set_encoding(re2::RE2::Options::EncodingLatin1);
options.set_case_sensitive(IsCaseSensitive(parsed_rule));
// Don't capture unless needed, for efficiency.
// TODO(crbug.com/974391): Capturing should be supported for regex based
// substitutions which are not implemented yet.
options.set_never_capture(true);
// TODO(crbug.com/974391): Regex compilation can be expensive. Also, these
// need to be compiled again once the ruleset is loaded, which means duplicate
// work. We should maintain a global cache of compiled regexes.
re2::RE2 regex(*parsed_rule.condition.regex_filter, options);
// TODO(karandeepb): Regex compilation can be expensive. Also, these need to
// be compiled again once the ruleset is loaded, which means duplicate work.
// We should maintain a global cache of compiled regexes.
re2::RE2 regex(*parsed_rule.condition.regex_filter,
CreateRE2Options(IsCaseSensitive(parsed_rule)));
return regex.ok();
}
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
#include <algorithm>
#include "base/strings/string_util.h"
#include "components/url_pattern_index/url_pattern_index.h"
#include "extensions/browser/api/declarative_net_request/request_action.h"
#include "extensions/browser/api/declarative_net_request/request_params.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
namespace extensions {
namespace declarative_net_request {
namespace flat_rule = url_pattern_index::flat;
namespace {
bool IsExtraHeadersMatcherInternal(
const RegexRulesMatcher::RegexRulesList* regex_list) {
DCHECK(regex_list);
// We only support removing a subset of extra headers currently. If that
// changes, the implementation here should change as well.
static_assert(flat::ActionIndex_count == 7,
"Modify this method to ensure IsExtraHeadersMatcherInternal is "
"updated as new actions are added.");
return std::any_of(regex_list->begin(), regex_list->end(),
[](const flat::RegexRule* regex_rule) {
return regex_rule->action_type() ==
flat::ActionType_remove_headers;
});
}
re2::StringPiece ToRE2StringPiece(const ::flatbuffers::String& str) {
return re2::StringPiece(str.c_str(), str.size());
}
// Helper to check if the |rule| metadata matches the given request |params|.
bool DoesRuleMetadataMatchRequest(const flat_rule::UrlRule& rule,
const RequestParams& params) {
// Compares |element_type| and |is_third_party|.
if (!url_pattern_index::DoesRuleFlagsMatch(rule, params.element_type,
flat_rule::ActivationType_NONE,
params.is_third_party)) {
return false;
}
// Compares included and excluded domains.
return url_pattern_index::DoesOriginMatchDomainList(
params.first_party_origin, rule, false /* disable_generic_rules */);
}
} // namespace
RegexRuleInfo::RegexRuleInfo(const flat::RegexRule* regex_rule,
const re2::RE2* regex)
: regex_rule(regex_rule), regex(regex) {
DCHECK(regex_rule);
DCHECK(regex);
}
RegexRuleInfo::RegexRuleInfo(const RegexRuleInfo& info) = default;
RegexRuleInfo& RegexRuleInfo::operator=(const RegexRuleInfo& info) = default;
RegexRulesMatcher::RegexRulesMatcher(
const ExtensionId& extension_id,
api::declarative_net_request::SourceType source_type,
const RegexRulesList* regex_list,
const ExtensionMetadataList* metadata_list)
: RulesetMatcherInterface(extension_id, source_type),
regex_list_(regex_list),
metadata_list_(metadata_list),
is_extra_headers_matcher_(IsExtraHeadersMatcherInternal(regex_list)) {
InitializeMatcher();
}
RegexRulesMatcher::~RegexRulesMatcher() = default;
base::Optional<RequestAction> RegexRulesMatcher::GetBlockOrCollapseAction(
const RequestParams& params) const {
const flat_rule::UrlRule* rule =
GetHighestPriorityMatchingRule(params, flat::ActionType_block);
if (!rule)
return base::nullopt;
return CreateBlockOrCollapseRequestAction(params, *rule);
}
base::Optional<RequestAction> RegexRulesMatcher::GetAllowAction(
const RequestParams& params) const {
const flat_rule::UrlRule* rule =
GetHighestPriorityMatchingRule(params, flat::ActionType_allow);
if (!rule)
return base::nullopt;
return CreateAllowAction(params, *rule);
}
base::Optional<RequestAction> RegexRulesMatcher::GetRedirectAction(
const RequestParams& params) const {
const flat_rule::UrlRule* rule =
GetHighestPriorityMatchingRule(params, flat::ActionType_redirect);
if (!rule)
return base::nullopt;
return CreateRedirectAction(params, *rule, *metadata_list_);
}
base::Optional<RequestAction> RegexRulesMatcher::GetUpgradeAction(
const RequestParams& params) const {
DCHECK(IsUpgradeableRequest(params));
const flat_rule::UrlRule* rule =
GetHighestPriorityMatchingRule(params, flat::ActionType_upgrade_scheme);
if (!rule)
return base::nullopt;
return CreateUpgradeAction(params, *rule);
}
uint8_t RegexRulesMatcher::GetRemoveHeadersMask(
const RequestParams& params,
uint8_t ignored_mask,
std::vector<RequestAction>* remove_headers_actions) const {
DCHECK(remove_headers_actions);
const std::vector<RegexRuleInfo>& potential_matches =
GetPotentialMatches(params);
// Subtracts |mask2| from |mask1|.
auto subtract_mask = [](uint8_t mask1, uint8_t mask2) {
return mask1 & (~mask2);
};
uint8_t mask = 0;
for (const RegexRuleInfo& info : potential_matches) {
if (info.regex_rule->action_type() != flat::ActionType_remove_headers)
continue;
// The current rule won't be responsible for any headers already removed (in
// |mask|) or any headers to be ignored (in |ignored_mask|).
uint8_t effective_mask_for_rule = subtract_mask(
info.regex_rule->remove_headers_mask(), ignored_mask | mask);
if (!effective_mask_for_rule)
continue;
if (!re2::RE2::PartialMatch(params.url->spec(), *info.regex))
continue;
mask |= effective_mask_for_rule;
remove_headers_actions->push_back(GetRemoveHeadersActionForMask(
*info.regex_rule->url_rule(), effective_mask_for_rule));
}
DCHECK(!(mask & ignored_mask));
return mask;
}
void RegexRulesMatcher::InitializeMatcher() {
for (const auto* regex_rule : *regex_list_) {
const flat_rule::UrlRule* rule = regex_rule->url_rule();
const bool is_case_sensitive =
!(rule->options() & flat_rule::OptionFlag_IS_CASE_INSENSITIVE);
// TODO(karandeepb): Regex compilation can be expensive and sometimes we are
// compiling the same regex twice, once during rule indexing and now during
// ruleset loading. We should try maintaining a global cache of compiled
// regexes and modify FilteredRE2 to take a regex object directly.
int re2_id;
re2::RE2::ErrorCode error_code =
filtered_re2_.Add(ToRE2StringPiece(*rule->url_pattern()),
CreateRE2Options(is_case_sensitive), &re2_id);
// Ideally there shouldn't be any error, since we had already validated the
// regular expression while indexing the ruleset. That said, there are cases
// possible where this may happen, for example, the library's implementation
// may change etc.
if (error_code != re2::RE2::NoError)
continue;
const bool did_insert =
re2_id_to_rules_map_.insert({re2_id, regex_rule}).second;
DCHECK(did_insert) << "Duplicate |re2_id| seen.";
}
// FilteredRE2 on compilation yields a set of candidate strings. These aid in
// pre-filtering and obtaining the set of potential matches for a request.
std::vector<std::string> strings_to_match;
filtered_re2_.Compile(&strings_to_match);
// FilteredRE2 guarantees that the returned set of candidate strings is
// lower-cased.
DCHECK(std::all_of(strings_to_match.begin(), strings_to_match.end(),
[](const std::string& s) {
return std::all_of(s.begin(), s.end(), [](const char c) {
return !base::IsAsciiUpper(c);
});
}));
// Convert |strings_to_match| to |filtered_re2_strings_to_match_| which stores
// a vector of url_matcher::StringPattern(s). This is necessary to use
// url_matcher::SubstringSetMatcher.
for (size_t i = 0; i < strings_to_match.size(); ++i) {
filtered_re2_strings_to_match_.emplace_back(std::move(strings_to_match[i]),
i);
}
std::vector<const url_matcher::StringPattern*> patterns;
for (const auto& pattern : filtered_re2_strings_to_match_)
patterns.push_back(&pattern);
substring_matcher_.RegisterPatterns(patterns);
}
const flat_rule::UrlRule* RegexRulesMatcher::GetHighestPriorityMatchingRule(
const RequestParams& params,
flat::ActionType type) const {
const std::vector<RegexRuleInfo>& potential_matches =
GetPotentialMatches(params);
auto it = std::find_if(potential_matches.begin(), potential_matches.end(),
[&params, type](const RegexRuleInfo& info) {
return info.regex_rule->action_type() == type &&
re2::RE2::PartialMatch(params.url->spec(),
*info.regex);
});
if (it == potential_matches.end())
return nullptr;
return it->regex_rule->url_rule();
}
const std::vector<RegexRuleInfo>& RegexRulesMatcher::GetPotentialMatches(
const RequestParams& params) const {
auto iter = params.potential_regex_matches.find(this);
if (iter != params.potential_regex_matches.end())
return iter->second;
// Compute the potential matches. FilteredRE2 requires the text to be lower
// cased first.
if (!params.lower_cased_url_spec)
params.lower_cased_url_spec = base::ToLowerASCII(params.url->spec());
// To pre-filter the set of regexes to match against |params|, we first need
// to compute the set of candidate strings in |filtered_re2_strings_to_match_|
// within |params.lower_cased_url_spec|.
std::set<int> candidate_ids_set;
substring_matcher_.Match(*params.lower_cased_url_spec, &candidate_ids_set);
std::vector<int> candidate_ids_list(candidate_ids_set.begin(),
candidate_ids_set.end());
// FilteredRE2 then yields the set of potential regex matches.
std::vector<int> potential_re2_ids;
filtered_re2_.AllPotentials(candidate_ids_list, &potential_re2_ids);
// We prune the set of potential matches even further by matching request
// metadata.
std::vector<RegexRuleInfo> potential_matches;
for (int re2_id : potential_re2_ids) {
auto it = re2_id_to_rules_map_.find(re2_id);
DCHECK(it != re2_id_to_rules_map_.end());
const flat::RegexRule* rule = it->second;
if (!DoesRuleMetadataMatchRequest(*rule->url_rule(), params))
continue;
const RE2& regex = filtered_re2_.GetRE2(re2_id);
potential_matches.emplace_back(rule, &regex);
}
// Sort potential matches in descending order of priority.
std::sort(potential_matches.begin(), potential_matches.end(),
[](const RegexRuleInfo& lhs, const RegexRuleInfo& rhs) {
return lhs.regex_rule->url_rule()->priority() >
rhs.regex_rule->url_rule()->priority();
});
// Cache |potential_matches|.
auto result = params.potential_regex_matches.insert(
std::make_pair(this, std::move(potential_matches)));
return result.first->second;
}
} // namespace declarative_net_request
} // namespace extensions
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
#define EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
#include "base/macros.h"
#include "components/url_matcher/substring_set_matcher.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
#include "third_party/re2/src/re2/filtered_re2.h"
namespace extensions {
namespace declarative_net_request {
// Structure to hold a RegexRule together with its corresponding compiled
// re2::Re2 object.
struct RegexRuleInfo {
RegexRuleInfo(const flat::RegexRule* regex_rule, const re2::RE2* regex);
RegexRuleInfo(const RegexRuleInfo& info);
RegexRuleInfo& operator=(const RegexRuleInfo& info);
const flat::RegexRule* regex_rule;
const re2::RE2* regex;
};
// RegexRulesMatcher deals with matching of regular expression rules. It is an
// implementation detail of RulesetMatcher. This uses the FilteredRE2 class from
// the re2 library to achieve fast matching of a set of declarative regex rules
// against a request. How this works:
//
// Initialization:
// 1. During initialization, we add each regex to the FilteredRE2 class.
// 2. We compile the FilteredRE2 object which returns us a set of substrings.
// These are stored in |filtered_re2_strings_to_match_| below. These are also
// added to |substring_matcher_| for use in #3 below.
//
// Matching
// 3. Given a request url, we find the set of strings from #2. that are
// substrings of the request url. This uses the
// url_matcher::SubstringSetMatcher class which internally uses the
// Aho-Corasick algorithm.
// 4. Given the list of matched strings from #3, FilteredRE2 returns the list
// of regexes (rules) that might potentially match. To reduce the number of
// regexes that need to be matched (since it's expensive), we prune the list
// even further by checking if the rule metadata matches the request.
// 5. Given the list of potentially matching rules, we finally match the actual
// regexes against the request url, as required.
class RegexRulesMatcher final : public RulesetMatcherInterface {
public:
using RegexRulesList =
::flatbuffers::Vector<flatbuffers::Offset<flat::RegexRule>>;
RegexRulesMatcher(const ExtensionId& extension_id,
api::declarative_net_request::SourceType source_type,
const RegexRulesList* regex_list,
const ExtensionMetadataList* metadata_list);
// RulesetMatcherInterface override:
~RegexRulesMatcher() override;
base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetAllowAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetRedirectAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetUpgradeAction(
const RequestParams& params) const override;
uint8_t GetRemoveHeadersMask(
const RequestParams& params,
uint8_t ignored_mask,
std::vector<RequestAction>* remove_headers_actions) const override;
bool IsExtraHeadersMatcher() const override {
return is_extra_headers_matcher_;
}
private:
// Helper to build the necessary data structures for matching.
void InitializeMatcher();
// Returns the highest priority matching rule for the given request |params|
// and action |type|, or null if no rules match.
const url_pattern_index::flat::UrlRule* GetHighestPriorityMatchingRule(
const RequestParams& params,
flat::ActionType type) const;
// Returns the potentially matching rules for the given request. A potentially
// matching rule is one whose metadata matches the given request |params| and
// which is not ruled out as a potential match by the |filtered_re2_| object.
// Note: The returned vector is sorted in descending order of rule priority.
const std::vector<RegexRuleInfo>& GetPotentialMatches(
const RequestParams& params) const;
// Pointers to flatbuffer indexed data. Guaranteed to be valid through the
// lifetime of the object.
const RegexRulesList* const regex_list_;
const ExtensionMetadataList* const metadata_list_;
const bool is_extra_headers_matcher_;
// Data structures used for matching. Initialized during construction in
// InitializeMatcher() and immutable for the rest of the object lifetime.
// This provides a pre-filtering mechanism, to reduce the number of regular
// expressions that are actually matched against a request.
re2::FilteredRE2 filtered_re2_;
// Map from re2 ID (as used by |filtered_re2_|) to the flat::RegexRule in
// |regex_list_|.
std::map<int, const flat::RegexRule*> re2_id_to_rules_map_;
// Candidate strings to match for each request, for pre-filtering. The ID of
// each url_matcher::StringPattern is its index within the vector. All the
// strings are lower-cased.
std::vector<url_matcher::StringPattern> filtered_re2_strings_to_match_;
// Structure for fast substring matching. Given a string S and a set of
// candidate strings, returns the sub-set of candidate strings that are a
// substring of S. Uses the Aho-Corasick algorithm internally.
url_matcher::SubstringSetMatcher substring_matcher_;
DISALLOW_COPY_AND_ASSIGN(RegexRulesMatcher);
};
} // namespace declarative_net_request
} // namespace extensions
#endif // EXTENSIONS_BROWSER_API_DECLARATIVE_NET_REQUEST_REGEX_RULES_MATCHER_H_
......@@ -7,11 +7,12 @@
#include "base/containers/flat_map.h"
#include "base/macros.h"
#include "base/optional.h"
#include "components/url_pattern_index/url_pattern_index.h"
#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
#include "url/gurl.h"
#include "url/origin.h"
class GURL;
namespace extensions {
struct WebRequestInfo;
......@@ -36,6 +37,14 @@ struct RequestParams {
// a cache to prevent additional calls to GetAllowAction.
mutable base::flat_map<const RulesetMatcher*, bool> allow_rule_cache;
// Lower cased url, used for regex matching. Cached for performance.
mutable base::Optional<std::string> lower_cased_url_spec;
// Map from RegexRulesMatcher to a vector of potential matches for this
// request. Cached for performance.
mutable base::flat_map<const RegexRulesMatcher*, std::vector<RegexRuleInfo>>
potential_regex_matches;
// Pointer to the corresponding WebRequestInfo object. Outlives this struct.
// Can be null for some unit tests.
const WebRequestInfo* request_info = nullptr;
......
......@@ -21,6 +21,21 @@
namespace extensions {
namespace declarative_net_request {
namespace {
base::Optional<RequestAction> GetMaxPriorityAction(
base::Optional<RequestAction> lhs,
base::Optional<RequestAction> rhs) {
if (!lhs)
return rhs;
if (!rhs)
return lhs;
return lhs->rule_priority > rhs->rule_priority ? std::move(lhs)
: std::move(rhs);
}
} // namespace
// static
RulesetMatcher::LoadRulesetResult RulesetMatcher::CreateVerifiedMatcher(
const RulesetSource& source,
......@@ -65,17 +80,27 @@ RulesetMatcher::~RulesetMatcher() = default;
base::Optional<RequestAction> RulesetMatcher::GetBlockOrCollapseAction(
const RequestParams& params) const {
return url_pattern_index_matcher_.GetBlockOrCollapseAction(params);
base::Optional<RequestAction> action =
url_pattern_index_matcher_.GetBlockOrCollapseAction(params);
if (!action)
action = regex_matcher_.GetBlockOrCollapseAction(params);
return action;
}
base::Optional<RequestAction> RulesetMatcher::GetAllowAction(
const RequestParams& params) const {
return url_pattern_index_matcher_.GetAllowAction(params);
base::Optional<RequestAction> action =
url_pattern_index_matcher_.GetAllowAction(params);
if (!action)
action = regex_matcher_.GetAllowAction(params);
return action;
}
base::Optional<RequestAction> RulesetMatcher::GetRedirectAction(
const RequestParams& params) const {
return url_pattern_index_matcher_.GetRedirectAction(params);
return GetMaxPriorityAction(
url_pattern_index_matcher_.GetRedirectAction(params),
regex_matcher_.GetRedirectAction(params));
}
base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
......@@ -83,34 +108,36 @@ base::Optional<RequestAction> RulesetMatcher::GetUpgradeAction(
if (!IsUpgradeableRequest(params))
return base::nullopt;
return url_pattern_index_matcher_.GetUpgradeAction(params);
return GetMaxPriorityAction(
url_pattern_index_matcher_.GetUpgradeAction(params),
regex_matcher_.GetUpgradeAction(params));
}
uint8_t RulesetMatcher::GetRemoveHeadersMask(
const RequestParams& params,
uint8_t ignored_mask,
std::vector<RequestAction>* remove_headers_actions) const {
return url_pattern_index_matcher_.GetRemoveHeadersMask(
DCHECK(remove_headers_actions);
static_assert(
flat::RemoveHeaderType_ANY <= std::numeric_limits<uint8_t>::max(),
"flat::RemoveHeaderType can't fit in a uint8_t");
uint8_t mask = url_pattern_index_matcher_.GetRemoveHeadersMask(
params, ignored_mask, remove_headers_actions);
return mask | regex_matcher_.GetRemoveHeadersMask(params, ignored_mask | mask,
remove_headers_actions);
}
bool RulesetMatcher::IsExtraHeadersMatcher() const {
return url_pattern_index_matcher_.IsExtraHeadersMatcher();
return url_pattern_index_matcher_.IsExtraHeadersMatcher() ||
regex_matcher_.IsExtraHeadersMatcher();
}
base::Optional<RequestAction>
RulesetMatcher::GetRedirectOrUpgradeActionByPriority(
const RequestParams& params) const {
base::Optional<RequestAction> redirect_action = GetRedirectAction(params);
base::Optional<RequestAction> upgrade_action = GetUpgradeAction(params);
if (!redirect_action)
return upgrade_action;
if (!upgrade_action)
return redirect_action;
if (upgrade_action->rule_priority >= redirect_action->rule_priority)
return upgrade_action;
return redirect_action;
return GetMaxPriorityAction(GetRedirectAction(params),
GetUpgradeAction(params));
}
RulesetMatcher::RulesetMatcher(
......@@ -127,7 +154,11 @@ RulesetMatcher::RulesetMatcher(
url_pattern_index_matcher_(extension_id,
source_type,
root_->index_list(),
root_->extension_metadata()) {}
root_->extension_metadata()),
regex_matcher_(extension_id,
source_type,
root_->regex_rules(),
root_->extension_metadata()) {}
} // namespace declarative_net_request
} // namespace extensions
......@@ -11,6 +11,7 @@
#include "extensions/browser/api/declarative_net_request/extension_url_pattern_index_matcher.h"
#include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h"
#include "extensions/browser/api/declarative_net_request/regex_rules_matcher.h"
#include "extensions/browser/api/declarative_net_request/ruleset_matcher_interface.h"
namespace extensions {
......@@ -26,7 +27,7 @@ struct UrlRuleMetadata;
// RulesetMatcher encapsulates the Declarative Net Request API ruleset
// corresponding to a single RulesetSource. Since this class is immutable, it is
// thread-safe.
class RulesetMatcher : public RulesetMatcherInterface {
class RulesetMatcher final : public RulesetMatcherInterface {
public:
// Describes the result of creating a RulesetMatcher instance.
// This is logged as part of UMA. Hence existing values should not be re-
......@@ -65,7 +66,6 @@ class RulesetMatcher : public RulesetMatcherInterface {
// RulesetMatcherInterface overrides:
~RulesetMatcher() override;
base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const override;
base::Optional<RequestAction> GetAllowAction(
......@@ -112,6 +112,9 @@ class RulesetMatcher : public RulesetMatcherInterface {
// |url_pattern_index| component.
const ExtensionUrlPatternIndexMatcher url_pattern_index_matcher_;
// Underlying matcher for regex rules.
const RegexRulesMatcher regex_matcher_;
DISALLOW_COPY_AND_ASSIGN(RulesetMatcher);
};
......
......@@ -28,13 +28,13 @@ class RulesetMatcherInterface {
virtual ~RulesetMatcherInterface();
// Returns the ruleset's matching RequestAction with type |BLOCK| or
// |COLLAPSE|, or base::nullopt if the ruleset has no matching blocking rule.
// Returns any matching RequestAction with type |BLOCK| or |COLLAPSE|, or
// base::nullopt if the ruleset has no matching blocking rule.
virtual base::Optional<RequestAction> GetBlockOrCollapseAction(
const RequestParams& params) const = 0;
// Returns the ruleset's matching RequestAction with type |ALLOW| or
// base::nullopt if the ruleset has no matching allow rule.
// Returns any matching RequestAction with type |ALLOW| or base::nullopt if
// the ruleset has no matching allow rule.
virtual base::Optional<RequestAction> GetAllowAction(
const RequestParams& params) const = 0;
......@@ -53,6 +53,7 @@ class RulesetMatcherInterface {
// Returns the bitmask of headers to remove from the request. The bitmask
// corresponds to flat::RemoveHeaderType. |ignored_mask| denotes the mask of
// headers to be skipped for evaluation and is excluded in the return value.
// TODO(karandeepb): Rename |ignored_mask| to |excluded_remove_headers_mask|.
virtual uint8_t GetRemoveHeadersMask(
const RequestParams& params,
uint8_t ignored_mask,
......
......@@ -8,6 +8,7 @@
#include <vector>
#include "base/files/file_util.h"
#include "base/format_macros.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
......@@ -498,6 +499,514 @@ TEST_F(RulesetMatcherTest, UrlTransform) {
}
}
// Tests regex rules are evaluated correctly for different action types.
TEST_F(RulesetMatcherTest, RegexRules) {
auto create_regex_rule = [](size_t id, const std::string& regex_filter) {
TestRule rule = CreateGenericRule();
rule.id = id;
rule.condition->url_filter.reset();
rule.condition->regex_filter = regex_filter;
return rule;
};
std::vector<TestRule> rules;
// Add a blocking rule.
TestRule block_rule = create_regex_rule(1, R"((?:block|collapse)\.com/path)");
rules.push_back(block_rule);
// Add an allowlist rule.
TestRule allow_rule = create_regex_rule(2, R"(http://(\w+\.)+allow\.com)");
allow_rule.action->type = "allow";
rules.push_back(allow_rule);
// Add a redirect rule.
TestRule redirect_rule = create_regex_rule(3, R"(redirect\.com)");
redirect_rule.action->type = "redirect";
redirect_rule.action->redirect.emplace();
redirect_rule.priority = kMinValidPriority;
redirect_rule.action->redirect->url = "https://google.com";
rules.push_back(redirect_rule);
// Add a upgrade rule.
TestRule upgrade_rule = create_regex_rule(4, "upgrade");
upgrade_rule.action->type = "upgradeScheme";
upgrade_rule.priority = kMinValidPriority;
rules.push_back(upgrade_rule);
// Add a remove headers rule.
TestRule remove_headers_rule =
create_regex_rule(5, R"(^(?:http|https)://[a-z\.]+\.in)");
remove_headers_rule.action->type = "removeHeaders";
remove_headers_rule.action->remove_headers_list =
std::vector<std::string>({"referer", "cookie"});
rules.push_back(remove_headers_rule);
std::unique_ptr<RulesetMatcher> matcher;
ASSERT_TRUE(CreateVerifiedMatcher(rules, CreateTemporarySource(), &matcher));
struct TestCase {
const char* url = nullptr;
base::Optional<RequestAction> expected_block_or_collapse_action;
base::Optional<RequestAction> expected_allow_action;
base::Optional<RequestAction> expected_redirect_action;
base::Optional<RequestAction> expected_upgrade_action;
uint8_t expected_remove_headers_mask = 0u;
base::Optional<RequestAction> expected_remove_header_action;
};
std::vector<TestCase> test_cases;
{
TestCase test_case = {"http://www.block.com/path"};
test_case.expected_block_or_collapse_action = CreateRequestActionForTesting(
RequestAction::Type::BLOCK, *block_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://www.collapse.com/PATH"};
test_cases.push_back(std::move(test_case));
// Filters are case sensitive by default, hence the request doesn't match.
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc.xyz.allow.com/path"};
test_case.expected_allow_action = CreateRequestActionForTesting(
RequestAction::Type::ALLOW, *allow_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://allow.com/path"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://redirect.com?path=abc"};
test_case.expected_redirect_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, *redirect_rule.id);
test_case.expected_redirect_action->redirect_url =
GURL(*redirect_rule.action->redirect->url);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://redirect.eu#query"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/upgrade"};
test_case.expected_upgrade_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, *upgrade_rule.id);
test_case.expected_upgrade_action->redirect_url.emplace(
"https://example.com/upgrade");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc.in/path"};
test_case.expected_remove_headers_mask =
flat::RemoveHeaderType_cookie | flat::RemoveHeaderType_referer;
test_case.expected_remove_header_action = CreateRequestActionForTesting(
RequestAction::Type::REMOVE_HEADERS, *remove_headers_rule.id);
test_case.expected_remove_header_action->request_headers_to_remove = {
net::HttpRequestHeaders::kCookie, net::HttpRequestHeaders::kReferer};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc123.in/path"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com"};
test_cases.push_back(std::move(test_case));
}
for (const auto& test_case : test_cases) {
SCOPED_TRACE(test_case.url);
GURL url(test_case.url);
RequestParams params;
params.url = &url;
EXPECT_EQ(test_case.expected_block_or_collapse_action,
matcher->GetBlockOrCollapseAction(params));
EXPECT_EQ(test_case.expected_allow_action, matcher->GetAllowAction(params));
EXPECT_EQ(test_case.expected_redirect_action,
matcher->GetRedirectAction(params));
EXPECT_EQ(test_case.expected_upgrade_action,
matcher->GetUpgradeAction(params));
std::vector<RequestAction> remove_header_actions;
EXPECT_EQ(test_case.expected_remove_headers_mask,
matcher->GetRemoveHeadersMask(params, 0u /* ignored_mask */,
&remove_header_actions));
if (test_case.expected_remove_header_action) {
EXPECT_THAT(remove_header_actions,
testing::ElementsAre(testing::Eq(testing::ByRef(
test_case.expected_remove_header_action))));
} else {
EXPECT_TRUE(remove_header_actions.empty());
}
}
EXPECT_TRUE(matcher->IsExtraHeadersMatcher());
}
// Ensure that the rule metadata is checked correctly for regex rules.
TEST_F(RulesetMatcherTest, RegexRules_Metadata) {
auto create_regex_rule = [](size_t id, const std::string& regex_filter) {
TestRule rule = CreateGenericRule();
rule.id = id;
rule.condition->url_filter.reset();
rule.condition->regex_filter = regex_filter;
return rule;
};
std::vector<TestRule> rules;
// Add a case sensitive rule.
TestRule path_rule = create_regex_rule(1, "/PATH");
rules.push_back(path_rule);
// Add a case insensitive rule.
TestRule xyz_rule = create_regex_rule(2, "/XYZ");
xyz_rule.condition->is_url_filter_case_sensitive = false;
rules.push_back(xyz_rule);
// Test |domains|, |excludedDomains|.
TestRule google_rule = create_regex_rule(3, "google");
google_rule.condition->domains = std::vector<std::string>({"example.com"});
google_rule.condition->excluded_domains =
std::vector<std::string>({"b.example.com"});
rules.push_back(google_rule);
// Test |resourceTypes|.
TestRule sub_frame_rule = create_regex_rule(4, R"((abc|def)\.com)");
sub_frame_rule.condition->resource_types =
std::vector<std::string>({"sub_frame"});
rules.push_back(sub_frame_rule);
// Test |domainType|.
TestRule third_party_rule = create_regex_rule(5, R"(http://(\d+)\.com)");
third_party_rule.condition->domain_type = "thirdParty";
rules.push_back(third_party_rule);
std::unique_ptr<RulesetMatcher> matcher;
ASSERT_TRUE(CreateVerifiedMatcher(rules, CreateTemporarySource(), &matcher));
struct TestCase {
const char* url = nullptr;
url::Origin first_party_origin;
url_pattern_index::flat::ElementType element_type =
url_pattern_index::flat::ElementType_OTHER;
bool is_third_party = false;
base::Optional<RequestAction> expected_block_or_collapse_action;
};
std::vector<TestCase> test_cases;
{
TestCase test_case = {"http://example.com/path/abc"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/PATH/abc"};
test_case.expected_block_or_collapse_action = CreateRequestActionForTesting(
RequestAction::Type::BLOCK, *path_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/xyz/abc"};
test_case.expected_block_or_collapse_action =
CreateRequestActionForTesting(RequestAction::Type::BLOCK, *xyz_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/XYZ/abc"};
test_case.expected_block_or_collapse_action =
CreateRequestActionForTesting(RequestAction::Type::BLOCK, *xyz_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/google"};
test_case.first_party_origin =
url::Origin::Create(GURL("http://a.example.com"));
test_case.is_third_party = true;
test_case.expected_block_or_collapse_action = CreateRequestActionForTesting(
RequestAction::Type::BLOCK, *google_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com/google"};
test_case.first_party_origin =
url::Origin::Create(GURL("http://b.example.com"));
test_case.is_third_party = true;
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc.com"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc.com"};
test_case.element_type = url_pattern_index::flat::ElementType_SUBDOCUMENT;
test_case.expected_block_or_collapse_action = CreateRequestActionForTesting(
RequestAction::Type::COLLAPSE, *sub_frame_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://243.com"};
test_case.is_third_party = true;
test_case.expected_block_or_collapse_action = CreateRequestActionForTesting(
RequestAction::Type::BLOCK, *third_party_rule.id);
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://243.com"};
test_case.first_party_origin = url::Origin::Create(GURL(test_case.url));
test_case.is_third_party = false;
test_cases.push_back(std::move(test_case));
}
for (size_t i = 0; i < base::size(test_cases); ++i) {
SCOPED_TRACE(base::StringPrintf("Case number-%" PRIuS " url-%s", i,
test_cases[i].url));
GURL url(test_cases[i].url);
RequestParams params;
params.url = &url;
params.first_party_origin = test_cases[i].first_party_origin;
params.element_type = test_cases[i].element_type;
params.is_third_party = test_cases[i].is_third_party;
EXPECT_EQ(test_cases[i].expected_block_or_collapse_action,
matcher->GetBlockOrCollapseAction(params));
}
}
// Ensures that RulesetMatcher combines the results of regex and filter-list
// style redirect rules correctly.
TEST_F(RulesetMatcherTest, RegexAndFilterListRules_RedirectPriority) {
struct {
size_t id;
size_t priority;
const char* action_type;
const char* filter;
bool is_regex_rule;
base::Optional<std::string> redirect_url;
} rule_info[] = {
{1, 1, "redirect", "filter.com", false, "http://redirect_filter.com"},
{2, 1, "upgradeScheme", "regex\\.com", true, base::nullopt},
{3, 9, "redirect", "common1.com", false, "http://common1_filter.com"},
{4, 10, "redirect", "common1\\.com", true, "http://common1_regex.com"},
{5, 10, "upgradeScheme", "common2.com", false, base::nullopt},
{6, 9, "upgradeScheme", "common2\\.com", true, base::nullopt},
{7, 10, "redirect", "abc\\.com", true, "http://example1.com"},
{8, 9, "redirect", "abc", true, "http://example2.com"},
};
std::vector<TestRule> rules;
for (const auto& info : rule_info) {
TestRule rule = CreateGenericRule();
rule.id = info.id;
rule.priority = info.priority;
rule.action->type = info.action_type;
rule.condition->url_filter.reset();
if (info.is_regex_rule)
rule.condition->regex_filter = info.filter;
else
rule.condition->url_filter = info.filter;
if (info.redirect_url) {
rule.action->redirect.emplace();
rule.action->redirect->url = info.redirect_url;
}
rules.push_back(rule);
}
std::unique_ptr<RulesetMatcher> matcher;
ASSERT_TRUE(CreateVerifiedMatcher(rules, CreateTemporarySource(), &matcher));
struct TestCase {
const char* url = nullptr;
base::Optional<RequestAction> expected_redirect_action;
base::Optional<RequestAction> expected_upgrade_action;
};
std::vector<TestCase> test_cases;
{
TestCase test_case = {"http://filter.com"};
test_case.expected_redirect_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[0].id, rule_info[0].priority);
test_case.expected_redirect_action->redirect_url.emplace(
"http://redirect_filter.com");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://regex.com"};
test_case.expected_upgrade_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[1].id, rule_info[1].priority);
test_case.expected_upgrade_action->redirect_url.emplace(
"https://regex.com");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://common1.com"};
test_case.expected_redirect_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[3].id, rule_info[3].priority);
test_case.expected_redirect_action->redirect_url.emplace(
"http://common1_regex.com");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://common2.com"};
test_case.expected_upgrade_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[4].id, rule_info[4].priority);
test_case.expected_upgrade_action->redirect_url.emplace(
"https://common2.com");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"https://common2.com"};
// No action since request is not upgradeable.
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://example.com"};
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://abc.com"};
test_case.expected_redirect_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[6].id, rule_info[6].priority);
test_case.expected_redirect_action->redirect_url.emplace(
"http://example1.com");
test_cases.push_back(std::move(test_case));
}
{
TestCase test_case = {"http://xyz.com/abc"};
test_case.expected_redirect_action = CreateRequestActionForTesting(
RequestAction::Type::REDIRECT, rule_info[7].id, rule_info[7].priority);
test_case.expected_redirect_action->redirect_url.emplace(
"http://example2.com");
test_cases.push_back(std::move(test_case));
}
for (const auto& test_case : test_cases) {
SCOPED_TRACE(test_case.url);
GURL url(test_case.url);
RequestParams params;
params.url = &url;
EXPECT_EQ(test_case.expected_redirect_action,
matcher->GetRedirectAction(params));
EXPECT_EQ(test_case.expected_upgrade_action,
matcher->GetUpgradeAction(params));
}
}
// Ensures that RulesetMatcher combines the results of regex and filter-list
// style remove headers rules correctly.
TEST_F(RulesetMatcherTest, RegexAndFilterListRules_RemoveHeaders) {
std::vector<TestRule> rules;
TestRule rule = CreateGenericRule();
rule.id = 1;
rule.action->type = "removeHeaders";
rule.condition->url_filter = "abc";
rule.action->remove_headers_list = std::vector<std::string>({"cookie"});
rules.push_back(rule);
RequestAction action_1 =
CreateRequestActionForTesting(RequestAction::Type::REMOVE_HEADERS, 1);
action_1.request_headers_to_remove = {net::HttpRequestHeaders::kCookie};
rule = CreateGenericRule();
rule.id = 2;
rule.condition->url_filter.reset();
rule.condition->regex_filter = "example";
rule.action->type = "removeHeaders";
rule.action->remove_headers_list =
std::vector<std::string>({"cookie", "setCookie"});
rules.push_back(rule);
RequestAction action_2 =
CreateRequestActionForTesting(RequestAction::Type::REMOVE_HEADERS, 2);
action_2.request_headers_to_remove = {net::HttpRequestHeaders::kCookie};
action_2.response_headers_to_remove = {"set-cookie"};
std::unique_ptr<RulesetMatcher> matcher;
ASSERT_TRUE(CreateVerifiedMatcher(rules, CreateTemporarySource(), &matcher));
{
GURL url("http://abc.com");
SCOPED_TRACE(url.spec());
RequestParams params;
params.url = &url;
std::vector<RequestAction> actions;
EXPECT_EQ(
flat::RemoveHeaderType_cookie,
matcher->GetRemoveHeadersMask(params, 0 /* ignored_mask */, &actions));
EXPECT_THAT(actions, testing::UnorderedElementsAre(
testing::Eq(testing::ByRef(action_1))));
}
{
GURL url("http://example.com");
SCOPED_TRACE(url.spec());
RequestParams params;
params.url = &url;
std::vector<RequestAction> actions;
EXPECT_EQ(
flat::RemoveHeaderType_cookie | flat::RemoveHeaderType_set_cookie,
matcher->GetRemoveHeadersMask(params, 0 /* ignored_mask */, &actions));
EXPECT_THAT(actions, testing::UnorderedElementsAre(
testing::Eq(testing::ByRef(action_2))));
}
{
GURL url("http://abc.com/example");
SCOPED_TRACE(url.spec());
RequestParams params;
params.url = &url;
std::vector<RequestAction> actions;
EXPECT_EQ(
flat::RemoveHeaderType_cookie | flat::RemoveHeaderType_set_cookie,
matcher->GetRemoveHeadersMask(params, 0 /* ignored_mask */, &actions));
// Removal of the cookie header will be attributed to rule 1 since filter
// list style rules are evaluated first for efficiency reasons. (Note this
// is an internal implementation detail). Hence only the set-cookie header
// removal will be attributed to rule 2.
action_2.request_headers_to_remove = {};
EXPECT_THAT(actions, testing::UnorderedElementsAre(
testing::Eq(testing::ByRef(action_1)),
testing::Eq(testing::ByRef(action_2))));
}
}
} // namespace
} // namespace declarative_net_request
} // namespace extensions
......@@ -221,5 +221,23 @@ dnr_api::RequestDetails CreateRequestDetails(const WebRequestInfo& request) {
return details;
}
re2::RE2::Options CreateRE2Options(bool is_case_sensitive) {
re2::RE2::Options options;
// RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
// use Latin1 encoding. This should also be more efficient than UTF-8.
// Note: Latin1 is an 8 bit extension to ASCII.
options.set_encoding(re2::RE2::Options::EncodingLatin1);
options.set_case_sensitive(is_case_sensitive);
// Don't capture unless needed, for efficiency.
// TODO(crbug.com/974391): Capturing should be supported for regex based
// substitutions which are not implemented yet.
options.set_never_capture(true);
return options;
}
} // namespace declarative_net_request
} // namespace extensions
......@@ -16,6 +16,7 @@
#include "base/optional.h"
#include "extensions/browser/api/declarative_net_request/ruleset_source.h"
#include "extensions/common/api/declarative_net_request.h"
#include "third_party/re2/src/re2/re2.h"
namespace base {
class FilePath;
......@@ -64,6 +65,9 @@ void LogReadDynamicRulesStatus(ReadJSONRulesResult::Status status);
api::declarative_net_request::RequestDetails CreateRequestDetails(
const WebRequestInfo& request);
// Creates default RE2::Options.
re2::RE2::Options CreateRE2Options(bool is_case_sensitive);
} // namespace declarative_net_request
} // namespace extensions
......
......@@ -66,6 +66,7 @@ TestRuleCondition& TestRuleCondition::operator=(const TestRuleCondition&) =
std::unique_ptr<base::DictionaryValue> TestRuleCondition::ToValue() const {
auto dict = std::make_unique<base::DictionaryValue>();
SetValue(dict.get(), kUrlFilterKey, url_filter);
SetValue(dict.get(), kRegexFilterKey, regex_filter);
SetValue(dict.get(), kIsUrlFilterCaseSensitiveKey,
is_url_filter_case_sensitive);
SetValue(dict.get(), kDomainsKey, domains);
......
......@@ -39,6 +39,7 @@ struct TestRuleCondition : public DictionarySource {
TestRuleCondition& operator=(const TestRuleCondition&);
base::Optional<std::string> url_filter;
base::Optional<std::string> regex_filter;
base::Optional<bool> is_url_filter_case_sensitive;
base::Optional<std::vector<std::string>> domains;
base::Optional<std::vector<std::string>> excluded_domains;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment