Commit 9208d688 authored by Kelvin Jiang's avatar Kelvin Jiang Committed by Commit Bot

[UrlPatternIndex] Add multiple rule matching for url pattern index

This CL adds functions for UrlPatternIndex which return all matching
UrlRules. These functions will be used to evaluate modifyHeaders rules
for the declarativeNetRequest API.

Bug: 1064497
Change-Id: Iab0260892a2d7ad4ab5180b84320dfa2675cbf14
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2116563
Commit-Queue: Kelvin Jiang <kelvinjiang@chromium.org>
Reviewed-by: default avatarCharlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarKaran Bhatia <karandeepb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#756099}
parent 033f06d6
......@@ -61,6 +61,7 @@ source_set("unit_tests") {
":test_support",
":url_pattern_index",
"//base",
"//testing/gmock",
"//testing/gtest",
"//third_party/protobuf:protobuf_lite",
"//url",
......
......@@ -572,9 +572,11 @@ size_t GetLongestMatchingSubdomain(const url::Origin& origin,
return 0;
}
// |sorted_candidates| is sorted in descending order by priority. This returns
// the first matching rule i.e. the rule with the highest priority in
// |sorted_candidates| or null if no rule matches.
// |sorted_candidates| is sorted in descending order by priority. If
// |matched_rules| is specified, then all rule matches in |sorted_candidates|
// will be added to |matched_rules| and null is returned. If |matched_rules| is
// not specified, then this returns the first matching rule i.e. the rule with
// the highest priority in |sorted_candidates| or null if no rule matches.
const flat::UrlRule* FindMatchAmongCandidates(
const FlatUrlRuleList* sorted_candidates,
const UrlPattern::UrlInfo& url,
......@@ -582,7 +584,8 @@ const flat::UrlRule* FindMatchAmongCandidates(
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) {
bool disable_generic_rules,
std::vector<const flat::UrlRule*>* matched_rules) {
if (!sorted_candidates)
return nullptr;
......@@ -601,7 +604,10 @@ const flat::UrlRule* FindMatchAmongCandidates(
if (DoesOriginMatchDomainList(document_origin, *rule,
disable_generic_rules)) {
return rule;
if (matched_rules)
matched_rules->push_back(rule);
else
return rule;
}
}
......@@ -610,7 +616,9 @@ const flat::UrlRule* FindMatchAmongCandidates(
// Returns whether the network request matches a UrlPattern |index| represented
// in its FlatBuffers format. |is_third_party| should reflect the relation
// between |url| and |document_origin|.
// between |url| and |document_origin|. If |strategy| is kAll, then
// |matched_rules| will be populated with all matching UrlRules and nullptr is
// returned.
const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
const flat::UrlPatternIndex& index,
const UrlPattern::UrlInfo& url,
......@@ -619,9 +627,14 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules,
UrlPatternIndexMatcher::FindRuleStrategy strategy) {
UrlPatternIndexMatcher::FindRuleStrategy strategy,
std::vector<const flat::UrlRule*>* matched_rules) {
using FindRuleStrategy = UrlPatternIndexMatcher::FindRuleStrategy;
// Check that the outparam |matched_rules| is specified if and only if
// |strategy| is kAll.
DCHECK_EQ(strategy == FindRuleStrategy::kAll, !!matched_rules);
const FlatNGramIndex* hash_table = index.ngram_index();
const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
DCHECK_NE(hash_table, nullptr);
......@@ -659,7 +672,7 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
continue;
const flat::UrlRule* rule = FindMatchAmongCandidates(
entry->rule_list(), url, document_origin, element_type, activation_type,
is_third_party, disable_generic_rules);
is_third_party, disable_generic_rules, matched_rules);
if (!rule)
continue;
......@@ -671,18 +684,22 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
case FindRuleStrategy::kHighestPriority:
max_priority_rule = get_max_priority_rule(max_priority_rule, rule);
break;
case FindRuleStrategy::kAll:
continue;
}
}
const flat::UrlRule* rule = FindMatchAmongCandidates(
index.fallback_rules(), url, document_origin, element_type,
activation_type, is_third_party, disable_generic_rules);
activation_type, is_third_party, disable_generic_rules, matched_rules);
switch (strategy) {
case FindRuleStrategy::kAny:
return rule;
case FindRuleStrategy::kHighestPriority:
return get_max_priority_rule(max_priority_rule, rule);
case FindRuleStrategy::kAll:
return nullptr;
}
NOTREACHED();
......@@ -789,9 +806,13 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
return nullptr;
}
// FindAllMatches should be used instead to find all matches.
DCHECK_NE(strategy, FindRuleStrategy::kAll);
auto* rule = FindMatchInFlatUrlPatternIndex(
*flat_index_, UrlPattern::UrlInfo(url), first_party_origin, element_type,
activation_type, is_third_party, disable_generic_rules, strategy);
activation_type, is_third_party, disable_generic_rules, strategy,
nullptr /* matched_rules */);
if (rule) {
TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("loading"),
"UrlPatternIndexMatcher::FindMatch", "pattern",
......@@ -800,4 +821,45 @@ const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
return rule;
}
std::vector<const flat::UrlRule*> UrlPatternIndexMatcher::FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const {
return FindAllMatches(url, first_party_origin,
ProtoToFlatElementType(element_type),
ProtoToFlatActivationType(activation_type),
is_third_party, disable_generic_rules);
}
std::vector<const flat::UrlRule*> UrlPatternIndexMatcher::FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const {
// Ignore URLs that are greater than the max URL length. Since those will be
// disallowed elsewhere in the loading stack, we can save compute time by
// avoiding matching here.
if (!flat_index_ || !url.is_valid() ||
url.spec().length() > url::kMaxURLChars) {
return std::vector<const flat::UrlRule*>();
}
if ((element_type == flat::ElementType_NONE) ==
(activation_type == flat::ActivationType_NONE)) {
return std::vector<const flat::UrlRule*>();
}
std::vector<const flat::UrlRule*> rules;
FindMatchInFlatUrlPatternIndex(
*flat_index_, UrlPattern::UrlInfo(url), first_party_origin, element_type,
activation_type, is_third_party, disable_generic_rules,
FindRuleStrategy::kAll, &rules);
return rules;
}
} // namespace url_pattern_index
......@@ -162,7 +162,10 @@ class UrlPatternIndexMatcher {
// If multiple rules match, any of the rules with the highest priority is
// returned.
kHighestPriority
kHighestPriority,
// All matching rules are returned.
kAll,
};
// Creates an instance to access the given |flat_index|. If |flat_index| is
......@@ -213,6 +216,27 @@ class UrlPatternIndexMatcher {
bool disable_generic_rules,
FindRuleStrategy strategy) const;
// Same as FindMatch, except this function returns all UrlRules that match the
// request for the index. If no UrlRules match, returns an empty vector.
std::vector<const flat::UrlRule*> FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const;
// Helper function to work with flat::*Type(s). Returns all UrlRules that
// match the request for the index. If no UrlRules match, returns an empty
// vector.
std::vector<const flat::UrlRule*> FindAllMatches(
const GURL& url,
const url::Origin& first_party_origin,
flat::ElementType element_type,
flat::ActivationType activation_type,
bool is_third_party,
bool disable_generic_rules) const;
private:
// Must outlive this instance.
const flat::UrlPatternIndex* flat_index_;
......
......@@ -16,6 +16,7 @@
#include "base/strings/string_piece.h"
#include "components/url_pattern_index/url_pattern.h"
#include "components/url_pattern_index/url_rule_test_support.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
......@@ -50,7 +51,8 @@ class UrlPatternIndexTest : public ::testing::Test {
void AddSimpleUrlRule(std::string pattern,
uint32_t id,
uint32_t priority,
uint8_t options) {
uint8_t options,
uint16_t element_types) {
auto pattern_offset = flat_builder_->CreateString(pattern);
flat::UrlRuleBuilder rule_builder(*flat_builder_);
......@@ -58,6 +60,7 @@ class UrlPatternIndexTest : public ::testing::Test {
rule_builder.add_url_pattern(pattern_offset);
rule_builder.add_id(id);
rule_builder.add_priority(priority);
rule_builder.add_element_types(element_types);
auto rule_offset = rule_builder.Finish();
index_builder_->IndexUrlRule(rule_offset);
......@@ -87,6 +90,20 @@ class UrlPatternIndexTest : public ::testing::Test {
UrlPatternIndexMatcher::FindRuleStrategy::kAny);
}
std::vector<const flat::UrlRule*> FindAllMatches(
base::StringPiece url_string,
base::StringPiece document_origin_string,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool disable_generic_rules) const {
const GURL url(url_string);
const url::Origin document_origin =
testing::GetOrigin(document_origin_string);
return index_matcher_->FindAllMatches(
url, document_origin, element_type, activation_type,
testing::IsThirdParty(url, document_origin), disable_generic_rules);
}
const flat::UrlRule* FindHighestPriorityMatch(
base::StringPiece url_string) const {
return index_matcher_->FindMatch(
......@@ -167,9 +184,10 @@ TEST_F(UrlPatternIndexTest, CaseSensitivity) {
uint8_t common_options = flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY;
AddSimpleUrlRule("case-insensitive", 0 /* id */, 0 /* priority */,
common_options | flat::OptionFlag_IS_CASE_INSENSITIVE);
common_options | flat::OptionFlag_IS_CASE_INSENSITIVE,
flat::ElementType_ANY);
AddSimpleUrlRule("case-sensitive", 0 /* id */, 0 /* priority */,
common_options);
common_options, flat::ElementType_ANY);
Finish();
EXPECT_TRUE(FindMatch("http://abc.com/type=CASE-insEnsitIVe"));
......@@ -616,6 +634,60 @@ TEST_F(UrlPatternIndexTest, OneRuleWithElementAndActivationTypes) {
testing::kNoElement, kDocument));
}
// Test that FindAllMatches will return the correct number of UrlRule matches
// for incoming requests.
TEST_F(UrlPatternIndexTest, MultipleRuleMatches) {
const struct {
uint32_t id;
const char* url_pattern;
uint16_t element_types;
} kRules[] = {{0, "ex1", flat::ElementType_ANY},
{1, "ex1", flat::ElementType_IMAGE},
{2, "ex1", flat::ElementType_IMAGE | flat::ElementType_FONT},
{3, "ex12", flat::ElementType_ANY},
{4, "google", flat::ElementType_ANY},
{5, "google", flat::ElementType_IMAGE}};
for (const auto& rule_data : kRules) {
AddSimpleUrlRule(rule_data.url_pattern, rule_data.id, 0 /* priority */,
flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY,
rule_data.element_types);
}
Finish();
const struct {
const char* url;
proto::ElementType element_type;
std::vector<uint32_t> expected_matched_ids;
} kTestCases[] = {{"http://ex1.com", proto::ELEMENT_TYPE_OTHER, {0}},
{"http://ex1.com/font", kFont, {0, 2}},
{"http://ex1.com/img", kImage, {0, 1, 2}},
{"http://ex12.com", proto::ELEMENT_TYPE_OTHER, {0, 3}},
{"http://ex12.com/img", kImage, {0, 1, 2, 3}},
{"http://google.com", proto::ELEMENT_TYPE_OTHER, {4}},
{"http://google.com/img", kImage, {4, 5}},
{"http://ex12google.com/img", kImage, {0, 1, 2, 3, 4, 5}},
{"http://nomatch.com/img", kImage, {}}};
for (const auto& test_case : kTestCases) {
SCOPED_TRACE(::testing::Message()
<< "UrlPattern: " << test_case.url << "; ElementTypes: "
<< static_cast<int>(test_case.element_type));
std::vector<uint32_t> actual_matched_ids;
std::vector<const flat::UrlRule*> matched_rules = FindAllMatches(
test_case.url, "" /* document_origin_string */, test_case.element_type,
kNoActivation, false /* disable_generic_rules */);
for (const auto* rule : matched_rules)
actual_matched_ids.push_back(rule->id());
EXPECT_THAT(actual_matched_ids, ::testing::UnorderedElementsAreArray(
test_case.expected_matched_ids));
}
}
TEST_F(UrlPatternIndexTest, MatchWithDisableGenericRules) {
const struct {
const char* url_pattern;
......@@ -804,7 +876,8 @@ TEST_F(UrlPatternIndexTest, FindMatchHighestPriority) {
for (size_t j = 0; j < i; j++) {
AddSimpleUrlRule(pattern, id, priorities[j],
flat::OptionFlag_APPLIES_TO_FIRST_PARTY |
flat::OptionFlag_APPLIES_TO_THIRD_PARTY);
flat::OptionFlag_APPLIES_TO_THIRD_PARTY,
flat::ElementType_ANY);
id++;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment