Commit 9208d688 authored by Kelvin Jiang's avatar Kelvin Jiang Committed by Commit Bot

[UrlPatternIndex] Add multiple rule matching for url pattern index

This CL adds functions for UrlPatternIndex which return all matching
UrlRules. These functions will be used to evaluate modifyHeaders rules
for the declarativeNetRequest API.

Bug: 1064497
Change-Id: Iab0260892a2d7ad4ab5180b84320dfa2675cbf14
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2116563
Commit-Queue: Kelvin Jiang <kelvinjiang@chromium.org>
Reviewed-by: default avatarCharlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarKaran Bhatia <karandeepb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#756099}
parent 033f06d6
...@@ -61,6 +61,7 @@ source_set("unit_tests") { ...@@ -61,6 +61,7 @@ source_set("unit_tests") {
":test_support", ":test_support",
":url_pattern_index", ":url_pattern_index",
"//base", "//base",
"//testing/gmock",
"//testing/gtest", "//testing/gtest",
"//third_party/protobuf:protobuf_lite", "//third_party/protobuf:protobuf_lite",
"//url", "//url",
......
...@@ -572,9 +572,11 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin, ...@@ -572,9 +572,11 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin,
return 0; return 0;
} }
// |sorted_candidates| is sorted in descending order by priority. This returns // |sorted_candidates| is sorted in descending order by priority. If
// the first matching rule i.e. the rule with the highest priority in // |matched_rules| is specified, then all rule matches in |sorted_candidates|
// |sorted_candidates| or null if no rule matches. // will be added to |matched_rules| and null is returned. If |matched_rules| is
// not specified, then this returns the first matching rule i.e. the rule with
// the highest priority in |sorted_candidates| or null if no rule matches.
const flat::UrlRule* FindMatchAmongCandidates( const flat::UrlRule* FindMatchAmongCandidates(
const FlatUrlRuleList* sorted_candidates, const FlatUrlRuleList* sorted_candidates,
const UrlPattern::UrlInfo& url, const UrlPattern::UrlInfo& url,
...@@ -582,7 +584,8 @@ const flat::UrlRule* FindMatchAmongCandidates( ...@@ -582,7 +584,8 @@ const flat::UrlRule* FindMatchAmongCandidates(
flat::ElementType element_type, flat::ElementType element_type,
flat::ActivationType activation_type, flat::ActivationType activation_type,
bool is_third_party, bool is_third_party,
bool disable_generic_rules) { bool disable_generic_rules,
std::vector<const flat::UrlRule*>* matched_rules) {
if (!sorted_candidates) if (!sorted_candidates)
return nullptr; return nullptr;
...@@ -601,6 +604,9 @@ const flat::UrlRule* FindMatchAmongCandidates( ...@@ -601,6 +604,9 @@ const flat::UrlRule* FindMatchAmongCandidates(
if (DoesOriginMatchDomainList(document_origin, *rule, if (DoesOriginMatchDomainList(document_origin, *rule,
disable_generic_rules)) { disable_generic_rules)) {
if (matched_rules)
matched_rules->push_back(rule);
else
return rule; return rule;
} }
} }
...@@ -610,7 +616,9 @@ const flat::UrlRule* FindMatchAmongCandidates( ...@@ -610,7 +616,9 @@ const flat::UrlRule* FindMatchAmongCandidates(
// Returns whether the network request matches a UrlPattern |index| represented // Returns whether the network request matches a UrlPattern |index| represented
// in its FlatBuffers format. |is_third_party| should reflect the relation // in its FlatBuffers format. |is_third_party| should reflect the relation
// between |url| and |document_origin|. // between |url| and |document_origin|. If |strategy| is kAll, then
// |matched_rules| will be populated with all matching UrlRules and nullptr is
// returned.
const flat::UrlRule* FindMatchInFlatUrlPatternIndex( const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
const flat::UrlPatternIndex& index, const flat::UrlPatternIndex& index,
const UrlPattern::UrlInfo& url, const UrlPattern::UrlInfo& url,
...@@ -619,9 +627,14 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex( ...@@ -619,9 +627,14 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
flat::ActivationType activation_type, flat::ActivationType activation_type,
bool is_third_party, bool is_third_party,
bool disable_generic_rules, bool disable_generic_rules,
UrlPatternIndexMatcher::FindRuleStrategy strategy) { UrlPatternIndexMatcher::FindRuleStrategy strategy,
std::vector<const flat::UrlRule*>* matched_rules) {
using FindRuleStrategy = UrlPatternIndexMatcher::FindRuleStrategy; using FindRuleStrategy = UrlPatternIndexMatcher::FindRuleStrategy;
// Check that the outparam |matched_rules| is specified if and only if
// |strategy| is kAll.
DCHECK_EQ(strategy == FindRuleStrategy::kAll, !!matched_rules);
const FlatNGramIndex* hash_table = index.ngram_index(); const FlatNGramIndex* hash_table = index.ngram_index();
const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
DCHECK_NE(hash_table, nullptr); DCHECK_NE(hash_table, nullptr);
...@@ -659,7 +672,7 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex( ...@@ -659,7 +672,7 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
continue; continue;
const flat::UrlRule* rule = FindMatchAmongCandidates( const flat::UrlRule* rule = FindMatchAmongCandidates(
entry->rule_list(), url, document_origin, element_type, activation_type, entry->rule_list(), url, document_origin, element_type, activation_type,
is_third_party, disable_generic_rules); is_third_party, disable_generic_rules, matched_rules);
if (!rule) if (!rule)
continue; continue;
...@@ -671,18 +684,22 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex( ...@@ -671,18 +684,22 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
case FindRuleStrategy::kHighestPriority: case FindRuleStrategy::kHighestPriority:
max_priority_rule = get_max_priority_rule(max_priority_rule, rule); max_priority_rule = get_max_priority_rule(max_priority_rule, rule);
break; break;
case FindRuleStrategy::kAll:
continue;
} }
} }
const flat::UrlRule* rule = FindMatchAmongCandidates( const flat::UrlRule* rule = FindMatchAmongCandidates(
index.fallback_rules(), url, document_origin, element_type, index.fallback_rules(), url, document_origin, element_type,
activation_type, is_third_party, disable_generic_rules); activation_type, is_third_party, disable_generic_rules, matched_rules);
switch (strategy) { switch (strategy) {
case FindRuleStrategy::kAny: case FindRuleStrategy::kAny:
return rule; return rule;
case FindRuleStrategy::kHighestPriority: case FindRuleStrategy::kHighestPriority:
return get_max_priority_rule(max_priority_rule, rule); return get_max_priority_rule(max_priority_rule, rule);
case FindRuleStrategy::kAll:
return nullptr;
} }
NOTREACHED(); NOTREACHED();
...@@ -789,9 +806,13 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( ...@@ -789,9 +806,13 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
return nullptr; return nullptr;
} }
// FindAllMatches should be used instead to find all matches.
DCHECK_NE(strategy, FindRuleStrategy::kAll);
auto* rule = FindMatchInFlatUrlPatternIndex( auto* rule = FindMatchInFlatUrlPatternIndex(
*flat_index_, UrlPattern::UrlInfo(url), first_party_origin, element_type, *flat_index_, UrlPattern::UrlInfo(url), first_party_origin, element_type,
activation_type, is_third_party, disable_generic_rules, strategy); activation_type, is_third_party, disable_generic_rules, strategy,
nullptr /* matched_rules */);
if (rule) { if (rule) {
TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("loading"), TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("loading"),
"UrlPatternIndexMatcher::FindMatch", "pattern", "UrlPatternIndexMatcher::FindMatch", "pattern",
...@@ -800,4 +821,45 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( ...@@ -800,4 +821,45 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
return rule; return rule;
} }
std::vector<const flat::UrlRule*> UrlPatternIndexMatcher::FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const {
return FindAllMatches(url, first_party_origin,
ProtoToFlatElementType(element_type),
ProtoToFlatActivationType(activation_type),
is_third_party, disable_generic_rules);
}
std::vector<const flat::UrlRule*> UrlPatternIndexMatcher::FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const {
// Ignore URLs that are greater than the max URL length. Since those will be
// disallowed elsewhere in the loading stack, we can save compute time by
// avoiding matching here.
if (!flat_index_ || !url.is_valid() ||
url.spec().length() > url::kMaxURLChars) {
return std::vector<const flat::UrlRule*>();
}
if ((element_type == flat::ElementType_NONE) ==
(activation_type == flat::ActivationType_NONE)) {
return std::vector<const flat::UrlRule*>();
}
std::vector<const flat::UrlRule*> rules;
FindMatchInFlatUrlPatternIndex(
*flat_index_, UrlPattern::UrlInfo(url), first_party_origin, element_type,
activation_type, is_third_party, disable_generic_rules,
FindRuleStrategy::kAll, &rules);
return rules;
}
} // namespace url_pattern_index } // namespace url_pattern_index
...@@ -162,7 +162,10 @@ class UrlPatternIndexMatcher { ...@@ -162,7 +162,10 @@ class UrlPatternIndexMatcher {
// If multiple rules match, any of the rules with the highest priority is // If multiple rules match, any of the rules with the highest priority is
// returned. // returned.
kHighestPriority kHighestPriority,
// All matching rules are returned.
kAll,
}; };
// Creates an instance to access the given |flat_index|. If |flat_index| is // Creates an instance to access the given |flat_index|. If |flat_index| is
...@@ -213,6 +216,27 @@ class UrlPatternIndexMatcher { ...@@ -213,6 +216,27 @@ class UrlPatternIndexMatcher {
bool disable_generic_rules, bool disable_generic_rules,
FindRuleStrategy strategy) const; FindRuleStrategy strategy) const;
// Same as FindMatch, except this function returns all UrlRules that match the
// request for the index. If no UrlRules match, returns an empty vector.
std::vector<const flat::UrlRule*> FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const;
// Helper function to work with flat::*Type(s). Returns all UrlRules that
// match the request for the index. If no UrlRules match, returns an empty
// vector.
std::vector<const flat::UrlRule*> FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const;
private: private:
// Must outlive this instance. // Must outlive this instance.
const flat::UrlPatternIndex* flat_index_; const flat::UrlPatternIndex* flat_index_;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "base/strings/string_piece.h" #include "base/strings/string_piece.h"
#include "components/url_pattern_index/url_pattern.h" #include "components/url_pattern_index/url_pattern.h"
#include "components/url_pattern_index/url_rule_test_support.h" #include "components/url_pattern_index/url_rule_test_support.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/origin.h" #include "url/origin.h"
...@@ -50,7 +51,8 @@ class UrlPatternIndexTest : public ::testing::Test { ...@@ -50,7 +51,8 @@ class UrlPatternIndexTest : public ::testing::Test {
void AddSimpleUrlRule(std::string pattern, void AddSimpleUrlRule(std::string pattern,
uint32_t id, uint32_t id,
uint32_t priority, uint32_t priority,
uint8_t options) { uint8_t options,
uint16_t element_types) {
auto pattern_offset = flat_builder_->CreateString(pattern); auto pattern_offset = flat_builder_->CreateString(pattern);
flat::UrlRuleBuilder rule_builder(*flat_builder_); flat::UrlRuleBuilder rule_builder(*flat_builder_);
...@@ -58,6 +60,7 @@ class UrlPatternIndexTest : public ::testing::Test { ...@@ -58,6 +60,7 @@ class UrlPatternIndexTest : public ::testing::Test {
rule_builder.add_url_pattern(pattern_offset); rule_builder.add_url_pattern(pattern_offset);
rule_builder.add_id(id); rule_builder.add_id(id);
rule_builder.add_priority(priority); rule_builder.add_priority(priority);
rule_builder.add_element_types(element_types);
auto rule_offset = rule_builder.Finish(); auto rule_offset = rule_builder.Finish();
index_builder_->IndexUrlRule(rule_offset); index_builder_->IndexUrlRule(rule_offset);
...@@ -87,6 +90,20 @@ class UrlPatternIndexTest : public ::testing::Test { ...@@ -87,6 +90,20 @@ class UrlPatternIndexTest : public ::testing::Test {
UrlPatternIndexMatcher::FindRuleStrategy::kAny); UrlPatternIndexMatcher::FindRuleStrategy::kAny);
} }
std::vector<const flat::UrlRule*> FindAllMatches(
base::StringPiece url_string,
base::StringPiece document_origin_string,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool disable_generic_rules) const {
const GURL url(url_string);
const url::Origin document_origin =
testing::GetOrigin(document_origin_string);
return index_matcher_->FindAllMatches(
url, document_origin, element_type, activation_type,
testing::IsThirdParty(url, document_origin), disable_generic_rules);
}
const flat::UrlRule* FindHighestPriorityMatch( const flat::UrlRule* FindHighestPriorityMatch(
base::StringPiece url_string) const { base::StringPiece url_string) const {
return index_matcher_->FindMatch( return index_matcher_->FindMatch(
...@@ -167,9 +184,10 @@ TEST_F(UrlPatternIndexTest, CaseSensitivity) { ...@@ -167,9 +184,10 @@ TEST_F(UrlPatternIndexTest, CaseSensitivity) {
uint8_t common_options = flat::OptionFlag_APPLIES_TO_FIRST_PARTY | uint8_t common_options = flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY; flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
AddSimpleUrlRule("case-insensitive", 0 /* id */, 0 /* priority */, AddSimpleUrlRule("case-insensitive", 0 /* id */, 0 /* priority */,
common_options | flat::OptionFlag_IS_CASE_INSENSITIVE); common_options | flat::OptionFlag_IS_CASE_INSENSITIVE,
flat::ElementType_ANY);
AddSimpleUrlRule("case-sensitive", 0 /* id */, 0 /* priority */, AddSimpleUrlRule("case-sensitive", 0 /* id */, 0 /* priority */,
common_options); common_options, flat::ElementType_ANY);
Finish(); Finish();
EXPECT_TRUE(FindMatch("http://abc.com/type=CASE-insEnsitIVe")); EXPECT_TRUE(FindMatch("http://abc.com/type=CASE-insEnsitIVe"));
...@@ -616,6 +634,60 @@ TEST_F(UrlPatternIndexTest, OneRuleWithElementAndActivationTypes) { ...@@ -616,6 +634,60 @@ TEST_F(UrlPatternIndexTest, OneRuleWithElementAndActivationTypes) {
testing::kNoElement, kDocument)); testing::kNoElement, kDocument));
} }
// Test that FindAllMatches will return the correct number of UrlRule matches
// for incoming requests.
TEST_F(UrlPatternIndexTest, MultipleRuleMatches) {
const struct {
uint32_t id;
const char* url_pattern;
uint16_t element_types;
} kRules[] = {{0, "ex1", flat::ElementType_ANY},
{1, "ex1", flat::ElementType_IMAGE},
{2, "ex1", flat::ElementType_IMAGE | flat::ElementType_FONT},
{3, "ex12", flat::ElementType_ANY},
{4, "google", flat::ElementType_ANY},
{5, "google", flat::ElementType_IMAGE}};
for (const auto& rule_data : kRules) {
AddSimpleUrlRule(rule_data.url_pattern, rule_data.id, 0 /* priority */,
flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY,
rule_data.element_types);
}
Finish();
const struct {
const char* url;
proto::ElementType element_type;
std::vector<uint32_t> expected_matched_ids;
} kTestCases[] = {{"http://ex1.com", proto::ELEMENT_TYPE_OTHER, {0}},
{"http://ex1.com/font", kFont, {0, 2}},
{"http://ex1.com/img", kImage, {0, 1, 2}},
{"http://ex12.com", proto::ELEMENT_TYPE_OTHER, {0, 3}},
{"http://ex12.com/img", kImage, {0, 1, 2, 3}},
{"http://google.com", proto::ELEMENT_TYPE_OTHER, {4}},
{"http://google.com/img", kImage, {4, 5}},
{"http://ex12google.com/img", kImage, {0, 1, 2, 3, 4, 5}},
{"http://nomatch.com/img", kImage, {}}};
for (const auto& test_case : kTestCases) {
SCOPED_TRACE(::testing::Message()
<< "UrlPattern: " << test_case.url << "; ElementTypes: "
<< static_cast<int>(test_case.element_type));
std::vector<uint32_t> actual_matched_ids;
std::vector<const flat::UrlRule*> matched_rules = FindAllMatches(
test_case.url, "" /* document_origin_string */, test_case.element_type,
kNoActivation, false /* disable_generic_rules */);
for (const auto* rule : matched_rules)
actual_matched_ids.push_back(rule->id());
EXPECT_THAT(actual_matched_ids, ::testing::UnorderedElementsAreArray(
test_case.expected_matched_ids));
}
}
TEST_F(UrlPatternIndexTest, MatchWithDisableGenericRules) { TEST_F(UrlPatternIndexTest, MatchWithDisableGenericRules) {
const struct { const struct {
const char* url_pattern; const char* url_pattern;
...@@ -804,7 +876,8 @@ TEST_F(UrlPatternIndexTest, FindMatchHighestPriority) { ...@@ -804,7 +876,8 @@ TEST_F(UrlPatternIndexTest, FindMatchHighestPriority) {
for (size_t j = 0; j < i; j++) { for (size_t j = 0; j < i; j++) {
AddSimpleUrlRule(pattern, id, priorities[j], AddSimpleUrlRule(pattern, id, priorities[j],
flat::OptionFlag_APPLIES_TO_FIRST_PARTY | flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY); flat::OptionFlag_APPLIES_TO_THIRD_PARTY,
flat::ElementType_ANY);
id++; id++;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment