Commit 18b72c26 authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

DNR: Support indexing of regex rules.

This CL adds "regexFilter" to schema of declarative net request rules. These
rules are also serialized to flatbuffer. Tests are added to check rule
validation and serialization logic.

In subsequent CLs matching for these rules will be implemented.

Skipping presubmits due to crbug.com/956368.

BUG=974391
Doc=https://docs.google.com/document/d/1mRErUMII_gSSPaHmxyn31UOYWUaZLj0xOaezekxD2-Y/edit?usp=sharing (Internal)
NOPRESUBMIT=true

Change-Id: I356e57ed25ce187f145306e50ca169369f09262e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869556
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#709187}
parent 98c6d551
...@@ -47,6 +47,7 @@ source_set("declarative_net_request") { ...@@ -47,6 +47,7 @@ source_set("declarative_net_request") {
"//extensions/common", "//extensions/common",
"//extensions/common/api", "//extensions/common/api",
"//net", "//net",
"//third_party/re2",
"//tools/json_schema_compiler:generated_api_util", "//tools/json_schema_compiler:generated_api_util",
"//url", "//url",
] ]
......
...@@ -29,7 +29,7 @@ const char kErrorNoApplicableResourceTypes[] = ...@@ -29,7 +29,7 @@ const char kErrorNoApplicableResourceTypes[] =
"Rule with id * is not applicable to any resource type."; "Rule with id * is not applicable to any resource type.";
const char kErrorEmptyList[] = const char kErrorEmptyList[] =
"Rule with id * cannot have an empty list as the value for * key."; "Rule with id * cannot have an empty list as the value for * key.";
const char kErrorEmptyUrlFilter[] = const char kErrorEmptyKey[] =
"Rule with id * cannot have an empty value for * key."; "Rule with id * cannot have an empty value for * key.";
const char kErrorInvalidRedirectUrl[] = const char kErrorInvalidRedirectUrl[] =
"Rule with id * does not provide a valid URL for * key."; "Rule with id * does not provide a valid URL for * key.";
...@@ -52,6 +52,9 @@ const char kErrorQueryAndTransformBothSpecified[] = ...@@ -52,6 +52,9 @@ const char kErrorQueryAndTransformBothSpecified[] =
const char kErrorJavascriptRedirect[] = const char kErrorJavascriptRedirect[] =
"Rule with id * specifies an incorrect value for the \"*\" key. Redirects " "Rule with id * specifies an incorrect value for the \"*\" key. Redirects "
"to javascript urls are not supported."; "to javascript urls are not supported.";
const char kErrorMultipleFilters[] =
"Rule with id * can only specify one of \"*\" or \"*\" keys.";
const char kErrorListNotPassed[] = "Rules file must contain a list."; const char kErrorListNotPassed[] = "Rules file must contain a list.";
const char kRuleCountExceeded[] = const char kRuleCountExceeded[] =
......
...@@ -46,6 +46,10 @@ enum class ParseResult { ...@@ -46,6 +46,10 @@ enum class ParseResult {
ERROR_INVALID_TRANSFORM_FRAGMENT, ERROR_INVALID_TRANSFORM_FRAGMENT,
ERROR_QUERY_AND_TRANSFORM_BOTH_SPECIFIED, ERROR_QUERY_AND_TRANSFORM_BOTH_SPECIFIED,
ERROR_JAVASCRIPT_REDIRECT, ERROR_JAVASCRIPT_REDIRECT,
ERROR_EMPTY_REGEX_FILTER,
ERROR_NON_ASCII_REGEX_FILTER,
ERROR_INVALID_REGEX_FILTER,
ERROR_MULTIPLE_FILTERS_SPECIFIED,
}; };
// Describes the ways in which updating dynamic rules can fail. // Describes the ways in which updating dynamic rules can fail.
...@@ -100,7 +104,7 @@ extern const char kErrorEmptyUpgradeRulePriority[]; ...@@ -100,7 +104,7 @@ extern const char kErrorEmptyUpgradeRulePriority[];
extern const char kErrorInvalidRuleKey[]; extern const char kErrorInvalidRuleKey[];
extern const char kErrorNoApplicableResourceTypes[]; extern const char kErrorNoApplicableResourceTypes[];
extern const char kErrorEmptyList[]; extern const char kErrorEmptyList[];
extern const char kErrorEmptyUrlFilter[]; extern const char kErrorEmptyKey[];
extern const char kErrorInvalidRedirectUrl[]; extern const char kErrorInvalidRedirectUrl[];
extern const char kErrorDuplicateIDs[]; extern const char kErrorDuplicateIDs[];
extern const char kErrorPersisting[]; extern const char kErrorPersisting[];
...@@ -110,6 +114,7 @@ extern const char kErrorInvalidKey[]; ...@@ -110,6 +114,7 @@ extern const char kErrorInvalidKey[];
extern const char kErrorInvalidTransformScheme[]; extern const char kErrorInvalidTransformScheme[];
extern const char kErrorQueryAndTransformBothSpecified[]; extern const char kErrorQueryAndTransformBothSpecified[];
extern const char kErrorJavascriptRedirect[]; extern const char kErrorJavascriptRedirect[];
extern const char kErrorMultipleFilters[];
extern const char kErrorListNotPassed[]; extern const char kErrorListNotPassed[];
......
...@@ -62,6 +62,8 @@ table UrlRuleMetadata { ...@@ -62,6 +62,8 @@ table UrlRuleMetadata {
/// This provides a mapping from an action to its index within the |index_list| /// This provides a mapping from an action to its index within the |index_list|
/// vector. /// vector.
/// TODO(crbug.com/1017868): This should be unified with ActionType once generic
/// priorities are implemented.
enum ActionIndex : ubyte { enum ActionIndex : ubyte {
block = 0, block = 0,
allow, allow,
...@@ -74,6 +76,36 @@ enum ActionIndex : ubyte { ...@@ -74,6 +76,36 @@ enum ActionIndex : ubyte {
count count
} }
/// The type of an action. Corresponds to
/// extensions::api::declarative_net_request::RuleActionType.
enum ActionType : ubyte {
block,
allow,
redirect,
upgrade_scheme,
remove_headers
}
/// The type of header to remove. Corresponds to
/// extensions::api::declarative_net_request::RemoveHeaderType.
enum RemoveHeaderType : ubyte (bit_flags) {
cookie,
referer,
set_cookie
}
/// Completely represents a rule with a regex filter.
table RegexRule {
/// The underlying UrlRule.
url_rule: url_pattern_index.flat.UrlRule;
/// The action to take.
action_type: ActionType;
/// The headers to be removed. Mask of RemoveHeaderType.
remove_headers_mask: ubyte;
}
/// The top-level data structure used to store extensions URL rules for the /// The top-level data structure used to store extensions URL rules for the
/// Declarative Net Request API. /// Declarative Net Request API.
table ExtensionIndexedRuleset { table ExtensionIndexedRuleset {
...@@ -81,6 +113,10 @@ table ExtensionIndexedRuleset { ...@@ -81,6 +113,10 @@ table ExtensionIndexedRuleset {
/// of ActionIndex_count indices. /// of ActionIndex_count indices.
index_list : [url_pattern_index.flat.UrlPatternIndex]; index_list : [url_pattern_index.flat.UrlPatternIndex];
// Regex rules are not matched by UrlPatternIndex and so we don't build an
// index for them.
regex_rules: [RegexRule];
/// Extension related metadata. Sorted by id, to support fast lookup. /// Extension related metadata. Sorted by id, to support fast lookup.
/// Currently this is only used for redirect rules. /// Currently this is only used for redirect rules.
extension_metadata : [UrlRuleMetadata]; extension_metadata : [UrlRuleMetadata];
......
...@@ -177,10 +177,19 @@ void FlatRulesetIndexer::AddUrlRule(const IndexedRule& indexed_rule) { ...@@ -177,10 +177,19 @@ void FlatRulesetIndexer::AddUrlRule(const IndexedRule& indexed_rule) {
domains_included_offset, domains_excluded_offset, url_pattern_offset, domains_included_offset, domains_excluded_offset, url_pattern_offset,
indexed_rule.id, indexed_rule.priority); indexed_rule.id, indexed_rule.priority);
std::vector<UrlPatternIndexBuilder*> builders = GetBuilders(indexed_rule); if (indexed_rule.url_pattern_type !=
DCHECK(!builders.empty()); url_pattern_index::flat::UrlPatternType_REGEXP) {
for (UrlPatternIndexBuilder* builder : builders) std::vector<UrlPatternIndexBuilder*> builders = GetBuilders(indexed_rule);
builder->IndexUrlRule(offset); DCHECK(!builders.empty());
for (UrlPatternIndexBuilder* builder : builders)
builder->IndexUrlRule(offset);
} else {
// A UrlPatternIndex is not built for regex rules. These are stored
// separately.
regex_rules_.push_back(
flat::CreateRegexRule(builder_, offset, GetActionType(indexed_rule),
GetRemoveHeadersMask(indexed_rule)));
}
// Store additional metadata required for a redirect rule. // Store additional metadata required for a redirect rule.
if (indexed_rule.action_type == dnr_api::RULE_ACTION_TYPE_REDIRECT) { if (indexed_rule.action_type == dnr_api::RULE_ACTION_TYPE_REDIRECT) {
...@@ -220,8 +229,12 @@ void FlatRulesetIndexer::Finish() { ...@@ -220,8 +229,12 @@ void FlatRulesetIndexer::Finish() {
FlatVectorOffset<flat::UrlRuleMetadata> extension_metadata_offset = FlatVectorOffset<flat::UrlRuleMetadata> extension_metadata_offset =
builder_.CreateVectorOfSortedTables(&metadata_); builder_.CreateVectorOfSortedTables(&metadata_);
FlatVectorOffset<flat::RegexRule> regex_rules_offset =
builder_.CreateVector(regex_rules_);
FlatOffset<flat::ExtensionIndexedRuleset> root_offset = FlatOffset<flat::ExtensionIndexedRuleset> root_offset =
flat::CreateExtensionIndexedRuleset(builder_, index_vector_offset, flat::CreateExtensionIndexedRuleset(builder_, index_vector_offset,
regex_rules_offset,
extension_metadata_offset); extension_metadata_offset);
flat::FinishExtensionIndexedRulesetBuffer(builder_, root_offset); flat::FinishExtensionIndexedRulesetBuffer(builder_, root_offset);
} }
...@@ -231,6 +244,48 @@ base::span<const uint8_t> FlatRulesetIndexer::GetData() { ...@@ -231,6 +244,48 @@ base::span<const uint8_t> FlatRulesetIndexer::GetData() {
return base::make_span(builder_.GetBufferPointer(), builder_.GetSize()); return base::make_span(builder_.GetBufferPointer(), builder_.GetSize());
} }
flat::ActionType FlatRulesetIndexer::GetActionType(
const IndexedRule& indexed_rule) const {
switch (indexed_rule.action_type) {
case dnr_api::RULE_ACTION_TYPE_BLOCK:
return flat::ActionType_block;
case dnr_api::RULE_ACTION_TYPE_ALLOW:
return flat::ActionType_allow;
case dnr_api::RULE_ACTION_TYPE_REDIRECT:
return flat::ActionType_redirect;
case dnr_api::RULE_ACTION_TYPE_REMOVEHEADERS:
return flat::ActionType_remove_headers;
case dnr_api::RULE_ACTION_TYPE_UPGRADESCHEME:
return flat::ActionType_upgrade_scheme;
case dnr_api::RULE_ACTION_TYPE_NONE:
break;
}
NOTREACHED();
return flat::ActionType_block;
}
uint8_t FlatRulesetIndexer::GetRemoveHeadersMask(
const IndexedRule& indexed_rule) const {
uint8_t mask = 0;
for (const dnr_api::RemoveHeaderType type : indexed_rule.remove_headers_set) {
switch (type) {
case dnr_api::REMOVE_HEADER_TYPE_NONE:
NOTREACHED();
break;
case dnr_api::REMOVE_HEADER_TYPE_COOKIE:
mask |= flat::RemoveHeaderType_cookie;
break;
case dnr_api::REMOVE_HEADER_TYPE_REFERER:
mask |= flat::RemoveHeaderType_referer;
break;
case dnr_api::REMOVE_HEADER_TYPE_SETCOOKIE:
mask |= flat::RemoveHeaderType_set_cookie;
break;
}
}
return mask;
}
std::vector<FlatRulesetIndexer::UrlPatternIndexBuilder*> std::vector<FlatRulesetIndexer::UrlPatternIndexBuilder*>
FlatRulesetIndexer::GetBuilders(const IndexedRule& indexed_rule) { FlatRulesetIndexer::GetBuilders(const IndexedRule& indexed_rule) {
switch (indexed_rule.action_type) { switch (indexed_rule.action_type) {
......
...@@ -45,6 +45,8 @@ class FlatRulesetIndexer { ...@@ -45,6 +45,8 @@ class FlatRulesetIndexer {
private: private:
using UrlPatternIndexBuilder = url_pattern_index::UrlPatternIndexBuilder; using UrlPatternIndexBuilder = url_pattern_index::UrlPatternIndexBuilder;
flat::ActionType GetActionType(const IndexedRule& indexed_rule) const;
uint8_t GetRemoveHeadersMask(const IndexedRule& indexed_rule) const;
std::vector<UrlPatternIndexBuilder*> GetBuilders( std::vector<UrlPatternIndexBuilder*> GetBuilders(
const IndexedRule& indexed_rule); const IndexedRule& indexed_rule);
std::vector<UrlPatternIndexBuilder*> GetRemoveHeaderBuilders( std::vector<UrlPatternIndexBuilder*> GetRemoveHeaderBuilders(
...@@ -58,6 +60,8 @@ class FlatRulesetIndexer { ...@@ -58,6 +60,8 @@ class FlatRulesetIndexer {
std::vector<flatbuffers::Offset<flat::UrlRuleMetadata>> metadata_; std::vector<flatbuffers::Offset<flat::UrlRuleMetadata>> metadata_;
std::vector<flatbuffers::Offset<flat::RegexRule>> regex_rules_;
size_t indexed_rules_count_ = 0; // Number of rules indexed till now. size_t indexed_rules_count_ = 0; // Number of rules indexed till now.
bool finished_ = false; // Whether Finish() has been called. bool finished_ = false; // Whether Finish() has been called.
......
...@@ -270,25 +270,34 @@ void VerifyExtensionMetadata( ...@@ -270,25 +270,34 @@ void VerifyExtensionMetadata(
} }
} }
const flat::ExtensionIndexedRuleset* AddRuleAndGetRuleset(
const std::vector<IndexedRule>& rules_to_index,
FlatRulesetIndexer* indexer) {
for (const auto& rule : rules_to_index)
indexer->AddUrlRule(rule);
indexer->Finish();
base::span<const uint8_t> data = indexer->GetData();
EXPECT_EQ(rules_to_index.size(), indexer->indexed_rules_count());
flatbuffers::Verifier verifier(data.data(), data.size());
if (!flat::VerifyExtensionIndexedRulesetBuffer(verifier))
return nullptr;
return flat::GetExtensionIndexedRuleset(data.data());
}
// Helper which: // Helper which:
// - Constructs an ExtensionIndexedRuleset flatbuffer from the passed // - Constructs an ExtensionIndexedRuleset flatbuffer from the passed
// IndexedRule(s) using FlatRulesetIndexer. // IndexedRule(s) using FlatRulesetIndexer.
// - Verifies that the ExtensionIndexedRuleset created is valid. // - Verifies that the ExtensionIndexedRuleset created is valid.
// Note: this does not test regex rules which are part of the
// ExtensionIndexedRuleset.
void AddRulesAndVerifyIndex(const std::vector<IndexedRule>& rules_to_index, void AddRulesAndVerifyIndex(const std::vector<IndexedRule>& rules_to_index,
const std::vector<const IndexedRule*> const std::vector<const IndexedRule*>
expected_index_lists[flat::ActionIndex_count]) { expected_index_lists[flat::ActionIndex_count]) {
FlatRulesetIndexer indexer; FlatRulesetIndexer indexer;
for (const auto& rule : rules_to_index)
indexer.AddUrlRule(rule);
indexer.Finish();
base::span<const uint8_t> data = indexer.GetData();
EXPECT_EQ(rules_to_index.size(), indexer.indexed_rules_count());
flatbuffers::Verifier verifier(data.data(), data.size());
ASSERT_TRUE(flat::VerifyExtensionIndexedRulesetBuffer(verifier));
const flat::ExtensionIndexedRuleset* ruleset = const flat::ExtensionIndexedRuleset* ruleset =
flat::GetExtensionIndexedRuleset(data.data()); AddRuleAndGetRuleset(rules_to_index, &indexer);
ASSERT_TRUE(ruleset); ASSERT_TRUE(ruleset);
for (size_t i = 0; i < flat::ActionIndex_count; ++i) { for (size_t i = 0; i < flat::ActionIndex_count; ++i) {
...@@ -412,6 +421,82 @@ TEST_F(FlatRulesetIndexerTest, MultipleRules) { ...@@ -412,6 +421,82 @@ TEST_F(FlatRulesetIndexerTest, MultipleRules) {
AddRulesAndVerifyIndex(rules_to_index, expected_index_lists); AddRulesAndVerifyIndex(rules_to_index, expected_index_lists);
} }
// Verify that the serialized flatbuffer data is valid for regex rules.
TEST_F(FlatRulesetIndexerTest, RegexRules) {
std::vector<IndexedRule> rules_to_index;
// Blocking rule.
rules_to_index.push_back(CreateIndexedRule(
7, kMinValidPriority, flat_rule::OptionFlag_NONE,
flat_rule::ElementType_OBJECT, flat_rule::ActivationType_NONE,
flat_rule::UrlPatternType_REGEXP, flat_rule::AnchorType_NONE,
flat_rule::AnchorType_NONE, R"(^https://(abc|def))", {"a.com"},
{"x.a.com"}, base::nullopt, dnr_api::RULE_ACTION_TYPE_BLOCK, {}));
// Redirect rule.
rules_to_index.push_back(CreateIndexedRule(
15, 2, flat_rule::OptionFlag_APPLIES_TO_FIRST_PARTY,
flat_rule::ElementType_IMAGE, flat_rule::ActivationType_NONE,
flat_rule::UrlPatternType_REGEXP, flat_rule::AnchorType_NONE,
flat_rule::AnchorType_NONE, R"(^(http|https))", {}, {},
"http://example1.com", dnr_api::RULE_ACTION_TYPE_REDIRECT, {}));
// Remove headers rule.
rules_to_index.push_back(CreateIndexedRule(
20, kMinValidPriority, flat_rule::OptionFlag_IS_CASE_INSENSITIVE,
flat_rule::ElementType_SUBDOCUMENT, flat_rule::ActivationType_NONE,
flat_rule::UrlPatternType_REGEXP, flat_rule::AnchorType_NONE,
flat_rule::AnchorType_NONE, "*", {}, {}, base::nullopt,
dnr_api::RULE_ACTION_TYPE_REMOVEHEADERS,
{dnr_api::REMOVE_HEADER_TYPE_COOKIE,
dnr_api::REMOVE_HEADER_TYPE_SETCOOKIE}));
FlatRulesetIndexer indexer;
const flat::ExtensionIndexedRuleset* ruleset =
AddRuleAndGetRuleset(rules_to_index, &indexer);
ASSERT_TRUE(ruleset);
// All the indices should be empty, since we only have regex rules.
for (size_t i = 0; i < flat::ActionIndex_count; ++i) {
SCOPED_TRACE(base::StringPrintf("Testing index %" PRIuS, i));
VerifyIndexEquality({}, ruleset->index_list()->Get(i));
}
// We should have metadata for the redirect rule.
{
SCOPED_TRACE("Testing extension metadata");
VerifyExtensionMetadata({&rules_to_index[1]},
ruleset->extension_metadata());
}
ASSERT_TRUE(ruleset->regex_rules());
ASSERT_EQ(3u, ruleset->regex_rules()->size());
const flat::RegexRule* blocking_rule = nullptr;
const flat::RegexRule* redirect_rule = nullptr;
const flat::RegexRule* remove_header_rule = nullptr;
for (const auto* regex_rule : *ruleset->regex_rules()) {
if (regex_rule->action_type() == flat::ActionType_block)
blocking_rule = regex_rule;
else if (regex_rule->action_type() == flat::ActionType_redirect)
redirect_rule = regex_rule;
else if (regex_rule->action_type() == flat::ActionType_remove_headers)
remove_header_rule = regex_rule;
}
ASSERT_TRUE(blocking_rule);
EXPECT_TRUE(AreRulesEqual(&rules_to_index[0], blocking_rule->url_rule()));
EXPECT_EQ(0u, blocking_rule->remove_headers_mask());
ASSERT_TRUE(redirect_rule);
EXPECT_TRUE(AreRulesEqual(&rules_to_index[1], redirect_rule->url_rule()));
EXPECT_EQ(0u, redirect_rule->remove_headers_mask());
ASSERT_TRUE(remove_header_rule);
EXPECT_TRUE(
AreRulesEqual(&rules_to_index[2], remove_header_rule->url_rule()));
EXPECT_EQ(flat::RemoveHeaderType_cookie | flat::RemoveHeaderType_set_cookie,
remove_header_rule->remove_headers_mask());
}
} // namespace } // namespace
} // namespace declarative_net_request } // namespace declarative_net_request
} // namespace extensions } // namespace extensions
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "extensions/browser/api/declarative_net_request/constants.h" #include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/common/api/declarative_net_request.h" #include "extensions/common/api/declarative_net_request.h"
#include "extensions/common/api/declarative_net_request/utils.h" #include "extensions/common/api/declarative_net_request/utils.h"
#include "third_party/re2/src/re2/re2.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/url_constants.h" #include "url/url_constants.h"
...@@ -123,16 +124,24 @@ class UrlFilterParser { ...@@ -123,16 +124,24 @@ class UrlFilterParser {
DISALLOW_COPY_AND_ASSIGN(UrlFilterParser); DISALLOW_COPY_AND_ASSIGN(UrlFilterParser);
}; };
bool IsCaseSensitive(const dnr_api::Rule& parsed_rule) {
// If case sensitivity is not explicitly specified, rules are considered case
// sensitive by default.
if (!parsed_rule.condition.is_url_filter_case_sensitive)
return true;
return *parsed_rule.condition.is_url_filter_case_sensitive;
}
// Returns a bitmask of flat_rule::OptionFlag corresponding to |parsed_rule|. // Returns a bitmask of flat_rule::OptionFlag corresponding to |parsed_rule|.
uint8_t GetOptionsMask(const dnr_api::Rule& parsed_rule) { uint8_t GetOptionsMask(const dnr_api::Rule& parsed_rule) {
uint8_t mask = flat_rule::OptionFlag_NONE; uint8_t mask = flat_rule::OptionFlag_NONE;
if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOW) if (parsed_rule.action.type == dnr_api::RULE_ACTION_TYPE_ALLOW)
mask |= flat_rule::OptionFlag_IS_WHITELIST; mask |= flat_rule::OptionFlag_IS_WHITELIST;
if (parsed_rule.condition.is_url_filter_case_sensitive &&
!*parsed_rule.condition.is_url_filter_case_sensitive) { if (!IsCaseSensitive(parsed_rule))
mask |= flat_rule::OptionFlag_IS_CASE_INSENSITIVE; mask |= flat_rule::OptionFlag_IS_CASE_INSENSITIVE;
}
switch (parsed_rule.condition.domain_type) { switch (parsed_rule.condition.domain_type) {
case dnr_api::DOMAIN_TYPE_FIRSTPARTY: case dnr_api::DOMAIN_TYPE_FIRSTPARTY:
...@@ -350,6 +359,30 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect, ...@@ -350,6 +359,30 @@ ParseResult ParseRedirect(dnr_api::Redirect redirect,
return ParseResult::ERROR_INVALID_REDIRECT; return ParseResult::ERROR_INVALID_REDIRECT;
} }
bool IsValidRegex(const dnr_api::Rule& parsed_rule) {
DCHECK(parsed_rule.condition.regex_filter);
re2::RE2::Options options;
// RE2 supports UTF-8 and Latin1 encoding. We only need to support ASCII, so
// use Latin1 encoding. This should also be more efficient than UTF-8.
// Note: Latin1 is an 8 bit extension to ASCII.
options.set_encoding(re2::RE2::Options::EncodingLatin1);
options.set_case_sensitive(IsCaseSensitive(parsed_rule));
// Don't capture unless needed, for efficiency.
// TODO(crbug.com/974391): Capturing should be supported for regex based
// substitutions which are not implemented yet.
options.set_never_capture(true);
// TODO(crbug.com/974391): Regex compilation can be expensive. Also, these
// need to be compiled again once the ruleset is loaded, which means duplicate
// work. We should maintain a global cache of compiled regexes.
re2::RE2 regex(*parsed_rule.condition.regex_filter, options);
return regex.ok();
}
} // namespace } // namespace
IndexedRule::IndexedRule() = default; IndexedRule::IndexedRule() = default;
...@@ -399,9 +432,27 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule, ...@@ -399,9 +432,27 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST; return ParseResult::ERROR_EMPTY_RESOURCE_TYPES_LIST;
} }
if (parsed_rule.condition.url_filter && parsed_rule.condition.regex_filter)
return ParseResult::ERROR_MULTIPLE_FILTERS_SPECIFIED;
// TODO(crbug.com/974391): Implement limits on the number of regex rules an
// extension can specify.
const bool is_regex_rule = !!parsed_rule.condition.regex_filter;
if (is_regex_rule) {
if (parsed_rule.condition.regex_filter->empty())
return ParseResult::ERROR_EMPTY_REGEX_FILTER;
if (!base::IsStringASCII(*parsed_rule.condition.regex_filter))
return ParseResult::ERROR_NON_ASCII_REGEX_FILTER;
if (!IsValidRegex(parsed_rule))
return ParseResult::ERROR_INVALID_REGEX_FILTER;
}
if (parsed_rule.condition.url_filter) { if (parsed_rule.condition.url_filter) {
if (parsed_rule.condition.url_filter->empty()) if (parsed_rule.condition.url_filter->empty())
return ParseResult::ERROR_EMPTY_URL_FILTER; return ParseResult::ERROR_EMPTY_URL_FILTER;
if (!base::IsStringASCII(*parsed_rule.condition.url_filter)) if (!base::IsStringASCII(*parsed_rule.condition.url_filter))
return ParseResult::ERROR_NON_ASCII_URL_FILTER; return ParseResult::ERROR_NON_ASCII_URL_FILTER;
} }
...@@ -431,10 +482,16 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule, ...@@ -431,10 +482,16 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN; return ParseResult::ERROR_NON_ASCII_EXCLUDED_DOMAIN;
} }
// Parse the |anchor_left|, |anchor_right|, |url_pattern_type| and if (is_regex_rule) {
// |url_pattern| fields. indexed_rule->url_pattern_type =
UrlFilterParser::Parse(std::move(parsed_rule.condition.url_filter), url_pattern_index::flat::UrlPatternType_REGEXP;
indexed_rule); indexed_rule->url_pattern = std::move(*parsed_rule.condition.regex_filter);
} else {
// Parse the |anchor_left|, |anchor_right|, |url_pattern_type| and
// |url_pattern| fields.
UrlFilterParser::Parse(std::move(parsed_rule.condition.url_filter),
indexed_rule);
}
// url_pattern_index doesn't support patterns starting with a domain anchor // url_pattern_index doesn't support patterns starting with a domain anchor
// followed by a wildcard, e.g. ||*xyz. // followed by a wildcard, e.g. ||*xyz.
...@@ -465,7 +522,6 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule, ...@@ -465,7 +522,6 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
DCHECK(IsSubset(indexed_rule->options, flat_rule::OptionFlag_ANY)); DCHECK(IsSubset(indexed_rule->options, flat_rule::OptionFlag_ANY));
DCHECK(IsSubset(indexed_rule->element_types, flat_rule::ElementType_ANY)); DCHECK(IsSubset(indexed_rule->element_types, flat_rule::ElementType_ANY));
DCHECK_EQ(flat_rule::ActivationType_NONE, indexed_rule->activation_types); DCHECK_EQ(flat_rule::ActivationType_NONE, indexed_rule->activation_types);
DCHECK_NE(flat_rule::UrlPatternType_REGEXP, indexed_rule->url_pattern_type);
DCHECK_NE(flat_rule::AnchorType_SUBDOMAIN, indexed_rule->anchor_right); DCHECK_NE(flat_rule::AnchorType_SUBDOMAIN, indexed_rule->anchor_right);
return ParseResult::SUCCESS; return ParseResult::SUCCESS;
......
...@@ -51,6 +51,7 @@ struct IndexedRule { ...@@ -51,6 +51,7 @@ struct IndexedRule {
url_pattern_index::flat::AnchorType anchor_right = url_pattern_index::flat::AnchorType anchor_right =
url_pattern_index::flat::AnchorType_NONE; url_pattern_index::flat::AnchorType_NONE;
std::string url_pattern; std::string url_pattern;
// Lower-cased and sorted as required by the url_pattern_index component. // Lower-cased and sorted as required by the url_pattern_index component.
std::vector<std::string> domains; std::vector<std::string> domains;
std::vector<std::string> excluded_domains; std::vector<std::string> excluded_domains;
......
...@@ -637,6 +637,42 @@ TEST_F(IndexedRuleTest, RedirectParsing) { ...@@ -637,6 +637,42 @@ TEST_F(IndexedRuleTest, RedirectParsing) {
} }
} }
TEST_F(IndexedRuleTest, RegexFilterParsing) {
struct {
std::string regex_filter;
ParseResult result;
} cases[] = {{"", ParseResult::ERROR_EMPTY_REGEX_FILTER},
// Filter with non-ascii characters.
{"αcd", ParseResult::ERROR_NON_ASCII_REGEX_FILTER},
// Invalid regex: Unterminated character class.
{"x[ab", ParseResult::ERROR_INVALID_REGEX_FILTER},
// Invalid regex: Incomplete capturing group.
{"x(", ParseResult::ERROR_INVALID_REGEX_FILTER},
// Invalid regex: Invalid escape sequence \x.
{R"(ij\x1)", ParseResult::ERROR_INVALID_REGEX_FILTER},
{R"(ij\\x1)", ParseResult::SUCCESS},
{R"(^http://www\.(abc|def)\.xyz\.com/)", ParseResult::SUCCESS}};
for (const auto& test_case : cases) {
SCOPED_TRACE(test_case.regex_filter);
dnr_api::Rule rule = CreateGenericParsedRule();
rule.condition.url_filter.reset();
rule.condition.regex_filter =
std::make_unique<std::string>(test_case.regex_filter);
IndexedRule indexed_rule;
ParseResult result = IndexedRule::CreateIndexedRule(
std::move(rule), GetBaseURL(), &indexed_rule);
EXPECT_EQ(result, test_case.result);
if (result == ParseResult::SUCCESS) {
EXPECT_EQ(indexed_rule.url_pattern, test_case.regex_filter);
EXPECT_EQ(flat_rule::UrlPatternType_REGEXP,
indexed_rule.url_pattern_type);
}
}
}
} // namespace } // namespace
} // namespace declarative_net_request } // namespace declarative_net_request
} // namespace extensions } // namespace extensions
...@@ -55,8 +55,26 @@ enum ActionIndex : ubyte { ...@@ -55,8 +55,26 @@ enum ActionIndex : ubyte {
remove_set_cookie_header, remove_set_cookie_header,
count count
} }
enum ActionType : ubyte {
block,
allow,
redirect,
upgrade_scheme,
remove_headers
}
enum RemoveHeaderType : ubyte (bit_flags) {
cookie,
referer,
set_cookie
}
table RegexRule {
url_rule: url_pattern_index.flat.UrlRule;
action_type: ActionType;
remove_headers_mask: ubyte;
}
table ExtensionIndexedRuleset { table ExtensionIndexedRuleset {
index_list : [url_pattern_index.flat.UrlPatternIndex]; index_list : [url_pattern_index.flat.UrlPatternIndex];
regex_rules: [RegexRule];
extension_metadata : [UrlRuleMetadata]; extension_metadata : [UrlRuleMetadata];
} }
root_type ExtensionIndexedRuleset; root_type ExtensionIndexedRuleset;
...@@ -125,7 +143,7 @@ TEST_F(IndexedRulesetFormatVersionTest, CheckVersionUpdated) { ...@@ -125,7 +143,7 @@ TEST_F(IndexedRulesetFormatVersionTest, CheckVersionUpdated) {
EXPECT_EQ(StripCommentsAndWhitespace(kFlatbufferSchemaExpected), EXPECT_EQ(StripCommentsAndWhitespace(kFlatbufferSchemaExpected),
StripCommentsAndWhitespace(flatbuffer_schema)) StripCommentsAndWhitespace(flatbuffer_schema))
<< "Schema change detected; update this test and the schema version."; << "Schema change detected; update this test and the schema version.";
EXPECT_EQ(11, GetIndexedRulesetFormatVersionForTesting()) EXPECT_EQ(12, GetIndexedRulesetFormatVersionForTesting())
<< "Update this test if you update the schema version."; << "Update this test if you update the schema version.";
} }
......
...@@ -81,7 +81,7 @@ std::string ParseInfo::GetErrorDescription() const { ...@@ -81,7 +81,7 @@ std::string ParseInfo::GetErrorDescription() const {
break; break;
case ParseResult::ERROR_EMPTY_URL_FILTER: case ParseResult::ERROR_EMPTY_URL_FILTER:
error = ErrorUtils::FormatErrorMessage( error = ErrorUtils::FormatErrorMessage(
kErrorEmptyUrlFilter, base::NumberToString(*rule_id_), kUrlFilterKey); kErrorEmptyKey, base::NumberToString(*rule_id_), kUrlFilterKey);
break; break;
case ParseResult::ERROR_INVALID_REDIRECT_URL: case ParseResult::ERROR_INVALID_REDIRECT_URL:
error = ErrorUtils::FormatErrorMessage(kErrorInvalidRedirectUrl, error = ErrorUtils::FormatErrorMessage(kErrorInvalidRedirectUrl,
...@@ -156,6 +156,23 @@ std::string ParseInfo::GetErrorDescription() const { ...@@ -156,6 +156,23 @@ std::string ParseInfo::GetErrorDescription() const {
base::NumberToString(*rule_id_), base::NumberToString(*rule_id_),
kRedirectUrlPath); kRedirectUrlPath);
break; break;
case ParseResult::ERROR_EMPTY_REGEX_FILTER:
error = ErrorUtils::FormatErrorMessage(
kErrorEmptyKey, base::NumberToString(*rule_id_), kRegexFilterKey);
break;
case ParseResult::ERROR_NON_ASCII_REGEX_FILTER:
error = ErrorUtils::FormatErrorMessage(
kErrorNonAscii, base::NumberToString(*rule_id_), kRegexFilterKey);
break;
case ParseResult::ERROR_INVALID_REGEX_FILTER:
error = ErrorUtils::FormatErrorMessage(
kErrorInvalidKey, base::NumberToString(*rule_id_), kRegexFilterKey);
break;
case ParseResult::ERROR_MULTIPLE_FILTERS_SPECIFIED:
error = ErrorUtils::FormatErrorMessage(kErrorMultipleFilters,
base::NumberToString(*rule_id_),
kUrlFilterKey, kRegexFilterKey);
break;
} }
return error; return error;
} }
......
...@@ -33,7 +33,7 @@ namespace { ...@@ -33,7 +33,7 @@ namespace {
// url_pattern_index.fbs. Whenever an extension with an indexed ruleset format // url_pattern_index.fbs. Whenever an extension with an indexed ruleset format
// version different from the one currently used by Chrome is loaded, the // version different from the one currently used by Chrome is loaded, the
// extension ruleset will be reindexed. // extension ruleset will be reindexed.
constexpr int kIndexedRulesetFormatVersion = 11; constexpr int kIndexedRulesetFormatVersion = 12;
// This static assert is meant to catch cases where // This static assert is meant to catch cases where
// url_pattern_index::kUrlPatternIndexFormatVersion is incremented without // url_pattern_index::kUrlPatternIndexFormatVersion is incremented without
......
...@@ -143,6 +143,11 @@ namespace declarativeNetRequest { ...@@ -143,6 +143,11 @@ namespace declarativeNetRequest {
// http://abc.xn--p1ai/?q=%D1%84. // http://abc.xn--p1ai/?q=%D1%84.
DOMString? urlFilter; DOMString? urlFilter;
// TODO(crbug.com/974391): Add documentation once the implementation is
// complete.
[nodoc]
DOMString? regexFilter;
// Whether the <code>urlFilter</code> is case sensitive. Default is true. // Whether the <code>urlFilter</code> is case sensitive. Default is true.
boolean? isUrlFilterCaseSensitive; boolean? isUrlFilterCaseSensitive;
......
...@@ -15,6 +15,7 @@ const char kPriorityKey[] = "priority"; ...@@ -15,6 +15,7 @@ const char kPriorityKey[] = "priority";
const char kRuleConditionKey[] = "condition"; const char kRuleConditionKey[] = "condition";
const char kRuleActionKey[] = "action"; const char kRuleActionKey[] = "action";
const char kUrlFilterKey[] = "urlFilter"; const char kUrlFilterKey[] = "urlFilter";
const char kRegexFilterKey[] = "regexFilter";
const char kIsUrlFilterCaseSensitiveKey[] = "isUrlFilterCaseSensitive"; const char kIsUrlFilterCaseSensitiveKey[] = "isUrlFilterCaseSensitive";
const char kDomainsKey[] = "domains"; const char kDomainsKey[] = "domains";
const char kExcludedDomainsKey[] = "excludedDomains"; const char kExcludedDomainsKey[] = "excludedDomains";
......
...@@ -30,6 +30,7 @@ extern const char kPriorityKey[]; ...@@ -30,6 +30,7 @@ extern const char kPriorityKey[];
extern const char kRuleConditionKey[]; extern const char kRuleConditionKey[];
extern const char kRuleActionKey[]; extern const char kRuleActionKey[];
extern const char kUrlFilterKey[]; extern const char kUrlFilterKey[];
extern const char kRegexFilterKey[];
extern const char kIsUrlFilterCaseSensitiveKey[]; extern const char kIsUrlFilterCaseSensitiveKey[];
extern const char kDomainsKey[]; extern const char kDomainsKey[];
extern const char kExcludedDomainsKey[]; extern const char kExcludedDomainsKey[];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment