Commit a9c4e1d4 authored by Karan Bhatia's avatar Karan Bhatia Committed by Commit Bot

UrlPatternIndex: Implement case-insensitive matching.

This fixes an existing TODO in url_pattern_index to implement case-insensitive
matching. To do so:
  - Lower case the stored n grams. These n grams are used to find prospective
    matches for a particular url.
  - Modify URLPattern to handle case-insensitive matching. This involves
    lower-casing the url pattern and the url, before comparing them.

BUG=767605

Cq-Include-Trybots: master.tryserver.chromium.perf:obbs_fyi
Change-Id: I1377c216d61392c508280141d0d947a47a3ee494
Reviewed-on: https://chromium-review.googlesource.com/1212183Reviewed-by: default avatarCharlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarNed Nguyen <nednguyen@google.com>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#590118}
parent a9970a07
...@@ -604,10 +604,8 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest, ...@@ -604,10 +604,8 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
} }
// Tests case sensitive url filters. // Tests case sensitive url filters.
// TODO(crbug.com/767605): Enable once case-insensitive matching is implemented
// in url_pattern_index.
IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest, IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
DISABLED_BlockRequests_IsUrlFilterCaseSensitive) { BlockRequests_IsUrlFilterCaseSensitive) {
struct { struct {
std::string url_filter; std::string url_filter;
size_t id; size_t id;
...@@ -622,6 +620,7 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest, ...@@ -622,6 +620,7 @@ IN_PROC_BROWSER_TEST_P(DeclarativeNetRequestBrowserTest,
rule.id = rule_data.id; rule.id = rule_data.id;
rule.condition->is_url_filter_case_sensitive = rule.condition->is_url_filter_case_sensitive =
rule_data.is_url_filter_case_sensitive; rule_data.is_url_filter_case_sensitive;
rule.condition->resource_types = std::vector<std::string>({"main_frame"});
rules.push_back(rule); rules.push_back(rule);
} }
ASSERT_NO_FATAL_FAILURE(LoadExtensionWithRules(rules)); ASSERT_NO_FATAL_FAILURE(LoadExtensionWithRules(rules));
......
...@@ -55,12 +55,12 @@ VerifyStatus GetVerifyStatus(const uint8_t* buffer, ...@@ -55,12 +55,12 @@ VerifyStatus GetVerifyStatus(const uint8_t* buffer,
// static // static
// Keep this in sync with the version number in // Keep this in sync with the version number in
// tools/perf/core/default_local_state.json. // tools/perf/core/default_local_state.json.
const int RulesetIndexer::kIndexedFormatVersion = 20; const int RulesetIndexer::kIndexedFormatVersion = 21;
// This static assert is meant to catch cases where // This static assert is meant to catch cases where
// url_pattern_index::kUrlPatternIndexFormatVersion is incremented without // url_pattern_index::kUrlPatternIndexFormatVersion is incremented without
// updating RulesetIndexer::kIndexedFormatVersion. // updating RulesetIndexer::kIndexedFormatVersion.
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 1, static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 2,
"kUrlPatternIndexFormatVersion has changed, make sure you've " "kUrlPatternIndexFormatVersion has changed, make sure you've "
"also updated RulesetIndexer::kIndexedFormatVersion above."); "also updated RulesetIndexer::kIndexedFormatVersion above.");
......
...@@ -109,8 +109,9 @@ table UrlRule { ...@@ -109,8 +109,9 @@ table UrlRule {
// Contains an N-gram (acting as a key in a hash table) and a list of URL rules // Contains an N-gram (acting as a key in a hash table) and a list of URL rules
// associated with that N-gram. // associated with that N-gram.
table NGramToRules { table NGramToRules {
// A string consisting of N (up to 8) non-special characters, which are stored // A string consisting of N (up to 8) ascii-only non-special characters, which
// in the lowest N non-zero bytes, lower bytes corresponding to later symbols. // are stored in the lowest N non-zero bytes, lower bytes corresponding to
// later symbols. These are lower-cased to support case-insensitive matching.
ngram : ulong; ngram : ulong;
// The list of rules containing |ngram| as a substring of their URL pattern. // The list of rules containing |ngram| as a substring of their URL pattern.
......
...@@ -17,10 +17,12 @@ ...@@ -17,10 +17,12 @@
#include <stddef.h> #include <stddef.h>
#include <algorithm>
#include <ostream> #include <ostream>
#include "base/logging.h" #include "base/logging.h"
#include "base/numerics/checked_math.h" #include "base/numerics/checked_math.h"
#include "base/strings/string_util.h"
#include "components/url_pattern_index/flat/url_pattern_index_generated.h" #include "components/url_pattern_index/flat/url_pattern_index_generated.h"
#include "components/url_pattern_index/fuzzy_pattern_matching.h" #include "components/url_pattern_index/fuzzy_pattern_matching.h"
#include "components/url_pattern_index/string_splitter.h" #include "components/url_pattern_index/string_splitter.h"
...@@ -67,6 +69,10 @@ base::StringPiece ConvertString(const flatbuffers::String* string) { ...@@ -67,6 +69,10 @@ base::StringPiece ConvertString(const flatbuffers::String* string) {
: base::StringPiece(); : base::StringPiece();
} }
bool HasAnyUpperAscii(base::StringPiece string) {
return std::any_of(string.begin(), string.end(), base::IsAsciiUpper<char>);
}
// Returns whether |position| within the |url| belongs to its |host| component // Returns whether |position| within the |url| belongs to its |host| component
// and corresponds to the beginning of a (sub-)domain. // and corresponds to the beginning of a (sub-)domain.
inline bool IsSubdomainAnchored(base::StringPiece url, inline bool IsSubdomainAnchored(base::StringPiece url,
...@@ -138,95 +144,70 @@ size_t FindSubdomainAnchoredSubpattern(base::StringPiece url, ...@@ -138,95 +144,70 @@ size_t FindSubdomainAnchoredSubpattern(base::StringPiece url,
return base::StringPiece::npos; return base::StringPiece::npos;
} }
} // namespace // Returns whether the given |url_pattern| matches the given |url_spec|.
// Compares the pattern the the url in a case-sensitive manner.
UrlPattern::UrlPattern() = default; bool IsCaseSensitiveMatch(base::StringPiece url_pattern,
proto::AnchorType anchor_left,
UrlPattern::UrlPattern(base::StringPiece url_pattern, proto::AnchorType anchor_right,
proto::UrlPatternType type) base::StringPiece url_spec,
: type_(type), url_pattern_(url_pattern) {} url::Component url_host) {
DCHECK(!url_spec.empty());
UrlPattern::UrlPattern(base::StringPiece url_pattern,
proto::AnchorType anchor_left,
proto::AnchorType anchor_right)
: type_(proto::URL_PATTERN_TYPE_WILDCARDED),
url_pattern_(url_pattern),
anchor_left_(anchor_left),
anchor_right_(anchor_right) {}
UrlPattern::UrlPattern(const flat::UrlRule& rule) StringSplitter<IsWildcard> subpatterns(url_pattern);
: type_(ConvertUrlPatternType(rule.url_pattern_type())),
url_pattern_(ConvertString(rule.url_pattern())),
anchor_left_(ConvertAnchorType(rule.anchor_left())),
anchor_right_(ConvertAnchorType(rule.anchor_right())),
match_case_(!!(rule.options() & flat::OptionFlag_IS_MATCH_CASE)) {}
UrlPattern::~UrlPattern() = default;
bool UrlPattern::MatchesUrl(const GURL& url) const {
// Note: Empty domains are also invalid.
DCHECK(url.is_valid());
DCHECK(type_ == proto::URL_PATTERN_TYPE_SUBSTRING ||
type_ == proto::URL_PATTERN_TYPE_WILDCARDED);
StringSplitter<IsWildcard> subpatterns(url_pattern_);
auto subpattern_it = subpatterns.begin(); auto subpattern_it = subpatterns.begin();
auto subpattern_end = subpatterns.end(); auto subpattern_end = subpatterns.end();
if (subpattern_it == subpattern_end) { if (subpattern_it == subpattern_end) {
return anchor_left_ == proto::ANCHOR_TYPE_NONE || return anchor_left == proto::ANCHOR_TYPE_NONE ||
anchor_right_ == proto::ANCHOR_TYPE_NONE; anchor_right == proto::ANCHOR_TYPE_NONE;
} }
const base::StringPiece spec = url.possibly_invalid_spec();
const url::Component host_part = url.parsed_for_possibly_invalid_spec().host;
DCHECK(!spec.empty());
base::StringPiece subpattern = *subpattern_it; base::StringPiece subpattern = *subpattern_it;
++subpattern_it; ++subpattern_it;
// If there is only one |subpattern|, and it has a right anchor, then simply // If there is only one |subpattern|, and it has a right anchor, then simply
// check that it is a suffix of the |spec|, and the left anchor is fulfilled. // check that it is a suffix of the |url_spec|, and the left anchor is
// fulfilled.
if (subpattern_it == subpattern_end && if (subpattern_it == subpattern_end &&
anchor_right_ == proto::ANCHOR_TYPE_BOUNDARY) { anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
if (!EndsWithFuzzy(spec, subpattern)) if (!EndsWithFuzzy(url_spec, subpattern))
return false; return false;
if (anchor_left_ == proto::ANCHOR_TYPE_BOUNDARY) if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY)
return spec.size() == subpattern.size(); return url_spec.size() == subpattern.size();
if (anchor_left_ == proto::ANCHOR_TYPE_SUBDOMAIN) { if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
DCHECK_LE(subpattern.size(), spec.size()); DCHECK_LE(subpattern.size(), url_spec.size());
return url.has_host() && return url_host.is_nonempty() &&
IsSubdomainAnchored(spec, host_part, IsSubdomainAnchored(url_spec, url_host,
spec.size() - subpattern.size()); url_spec.size() - subpattern.size());
} }
return true; return true;
} }
// Otherwise, the first |subpattern| does not have to be a suffix. But it // Otherwise, the first |subpattern| does not have to be a suffix. But it
// still can have a left anchor. Check and handle that. // still can have a left anchor. Check and handle that.
base::StringPiece text = spec; base::StringPiece text = url_spec;
if (anchor_left_ == proto::ANCHOR_TYPE_BOUNDARY) { if (anchor_left == proto::ANCHOR_TYPE_BOUNDARY) {
if (!StartsWithFuzzy(spec, subpattern)) if (!StartsWithFuzzy(url_spec, subpattern))
return false; return false;
if (subpattern_it == subpattern_end) { if (subpattern_it == subpattern_end) {
DCHECK_EQ(anchor_right_, proto::ANCHOR_TYPE_NONE); DCHECK_EQ(anchor_right, proto::ANCHOR_TYPE_NONE);
return true; return true;
} }
text.remove_prefix(subpattern.size()); text.remove_prefix(subpattern.size());
} else if (anchor_left_ == proto::ANCHOR_TYPE_SUBDOMAIN) { } else if (anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) {
if (!url.has_host()) if (!url_host.is_nonempty())
return false; return false;
const size_t match_begin = const size_t match_begin =
FindSubdomainAnchoredSubpattern(spec, host_part, subpattern); FindSubdomainAnchoredSubpattern(url_spec, url_host, subpattern);
if (match_begin == base::StringPiece::npos) if (match_begin == base::StringPiece::npos)
return false; return false;
if (subpattern_it == subpattern_end) { if (subpattern_it == subpattern_end) {
DCHECK_EQ(anchor_right_, proto::ANCHOR_TYPE_NONE); DCHECK_EQ(anchor_right, proto::ANCHOR_TYPE_NONE);
return true; return true;
} }
text.remove_prefix(match_begin + subpattern.size()); text.remove_prefix(match_begin + subpattern.size());
} else { } else {
DCHECK_EQ(anchor_left_, proto::ANCHOR_TYPE_NONE); DCHECK_EQ(anchor_left, proto::ANCHOR_TYPE_NONE);
// Get back to the initial |subpattern|, process it in the loop below. // Get back to the initial |subpattern|, process it in the loop below.
subpattern_it = subpatterns.begin(); subpattern_it = subpatterns.begin();
} }
...@@ -239,7 +220,7 @@ bool UrlPattern::MatchesUrl(const GURL& url) const { ...@@ -239,7 +220,7 @@ bool UrlPattern::MatchesUrl(const GURL& url) const {
DCHECK(!subpattern.empty()); DCHECK(!subpattern.empty());
if (++subpattern_it == subpattern_end && if (++subpattern_it == subpattern_end &&
anchor_right_ == proto::ANCHOR_TYPE_BOUNDARY) { anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
break; break;
} }
...@@ -249,10 +230,62 @@ bool UrlPattern::MatchesUrl(const GURL& url) const { ...@@ -249,10 +230,62 @@ bool UrlPattern::MatchesUrl(const GURL& url) const {
text.remove_prefix(match_position + subpattern.size()); text.remove_prefix(match_position + subpattern.size());
} }
return anchor_right_ != proto::ANCHOR_TYPE_BOUNDARY || return anchor_right != proto::ANCHOR_TYPE_BOUNDARY ||
EndsWithFuzzy(text, subpattern); EndsWithFuzzy(text, subpattern);
} }
} // namespace
UrlPattern::UrlPattern() = default;
UrlPattern::UrlPattern(base::StringPiece url_pattern,
proto::UrlPatternType type,
MatchCase match_case)
: type_(type), url_pattern_(url_pattern), match_case_(match_case) {}
UrlPattern::UrlPattern(base::StringPiece url_pattern,
proto::AnchorType anchor_left,
proto::AnchorType anchor_right)
: type_(proto::URL_PATTERN_TYPE_WILDCARDED),
url_pattern_(url_pattern),
anchor_left_(anchor_left),
anchor_right_(anchor_right) {}
UrlPattern::UrlPattern(const flat::UrlRule& rule)
: type_(ConvertUrlPatternType(rule.url_pattern_type())),
url_pattern_(ConvertString(rule.url_pattern())),
anchor_left_(ConvertAnchorType(rule.anchor_left())),
anchor_right_(ConvertAnchorType(rule.anchor_right())),
match_case_(rule.options() & flat::OptionFlag_IS_MATCH_CASE
? MatchCase::kTrue
: MatchCase::kFalse) {}
UrlPattern::~UrlPattern() = default;
bool UrlPattern::MatchesUrl(const GURL& url) const {
// Note: Empty domains are also invalid.
DCHECK(url.is_valid());
DCHECK(type_ == proto::URL_PATTERN_TYPE_SUBSTRING ||
type_ == proto::URL_PATTERN_TYPE_WILDCARDED);
DCHECK(base::IsStringASCII(url_pattern_));
DCHECK(base::IsStringASCII(url.possibly_invalid_spec()));
if (match_case() || (!HasAnyUpperAscii(url_pattern_) &&
!HasAnyUpperAscii(url.possibly_invalid_spec()))) {
return IsCaseSensitiveMatch(url_pattern_, anchor_left_, anchor_right_,
url.possibly_invalid_spec(),
url.parsed_for_possibly_invalid_spec().host);
}
// For case-insensitive matching, convert both pattern and url to lower case.
const std::string lower_case_url_pattern = base::ToLowerASCII(url_pattern_);
const std::string lower_case_url_spec =
base::ToLowerASCII(url.possibly_invalid_spec());
return IsCaseSensitiveMatch(lower_case_url_pattern, anchor_left_,
anchor_right_, lower_case_url_spec,
url.parsed_for_possibly_invalid_spec().host);
}
std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern) { std::ostream& operator<<(std::ostream& out, const UrlPattern& pattern) {
switch (pattern.anchor_left()) { switch (pattern.anchor_left()) {
case proto::ANCHOR_TYPE_SUBDOMAIN: case proto::ANCHOR_TYPE_SUBDOMAIN:
......
...@@ -23,11 +23,17 @@ struct UrlRule; // The FlatBuffers version of UrlRule. ...@@ -23,11 +23,17 @@ struct UrlRule; // The FlatBuffers version of UrlRule.
// of the UrlRule that owns it, and to match it against URLs. // of the UrlRule that owns it, and to match it against URLs.
class UrlPattern { class UrlPattern {
public: public:
enum class MatchCase {
kTrue,
kFalse,
};
UrlPattern(); UrlPattern();
// Creates a |url_pattern| of a certain |type|. // Creates a |url_pattern| of a certain |type| and case-sensitivity.
UrlPattern(base::StringPiece url_pattern, UrlPattern(base::StringPiece url_pattern,
proto::UrlPatternType type = proto::URL_PATTERN_TYPE_WILDCARDED); proto::UrlPatternType type = proto::URL_PATTERN_TYPE_WILDCARDED,
MatchCase match_case = MatchCase::kFalse);
// Creates a WILDCARDED |url_pattern| with the specified anchors. // Creates a WILDCARDED |url_pattern| with the specified anchors.
UrlPattern(base::StringPiece url_pattern, UrlPattern(base::StringPiece url_pattern,
...@@ -43,7 +49,7 @@ class UrlPattern { ...@@ -43,7 +49,7 @@ class UrlPattern {
base::StringPiece url_pattern() const { return url_pattern_; } base::StringPiece url_pattern() const { return url_pattern_; }
proto::AnchorType anchor_left() const { return anchor_left_; } proto::AnchorType anchor_left() const { return anchor_left_; }
proto::AnchorType anchor_right() const { return anchor_right_; } proto::AnchorType anchor_right() const { return anchor_right_; }
bool match_case() const { return match_case_; } bool match_case() const { return match_case_ == MatchCase::kTrue; }
// Returns whether the |url| matches the URL |pattern|. Requires the type of // Returns whether the |url| matches the URL |pattern|. Requires the type of
// |this| pattern to be either SUBSTRING or WILDCARDED. // |this| pattern to be either SUBSTRING or WILDCARDED.
...@@ -63,8 +69,7 @@ class UrlPattern { ...@@ -63,8 +69,7 @@ class UrlPattern {
proto::AnchorType anchor_left_ = proto::ANCHOR_TYPE_NONE; proto::AnchorType anchor_left_ = proto::ANCHOR_TYPE_NONE;
proto::AnchorType anchor_right_ = proto::ANCHOR_TYPE_NONE; proto::AnchorType anchor_right_ = proto::ANCHOR_TYPE_NONE;
// TODO(pkalinnikov): Implement case-insensitive matching. MatchCase match_case_ = MatchCase::kFalse;
bool match_case_ = false;
DISALLOW_COPY_AND_ASSIGN(UrlPattern); DISALLOW_COPY_AND_ASSIGN(UrlPattern);
}; };
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/numerics/safe_conversions.h" #include "base/numerics/safe_conversions.h"
#include "base/optional.h"
#include "base/strings/string_piece.h" #include "base/strings/string_piece.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "components/url_pattern_index/ngram_extractor.h" #include "components/url_pattern_index/ngram_extractor.h"
...@@ -98,8 +99,7 @@ base::StringPiece ToStringPiece(const flatbuffers::String* string) { ...@@ -98,8 +99,7 @@ base::StringPiece ToStringPiece(const flatbuffers::String* string) {
} }
bool HasNoUpperAscii(base::StringPiece string) { bool HasNoUpperAscii(base::StringPiece string) {
return std::none_of(string.begin(), string.end(), return std::none_of(string.begin(), string.end(), base::IsAsciiUpper<char>);
[](char c) { return base::IsAsciiUpper(c); });
} }
// Comparator to sort UrlRule. Sorts rules by descending order of rule priority. // Comparator to sort UrlRule. Sorts rules by descending order of rule priority.
...@@ -450,8 +450,11 @@ NGram UrlPatternIndexBuilder::GetMostDistinctiveNGram( ...@@ -450,8 +450,11 @@ NGram UrlPatternIndexBuilder::GetMostDistinctiveNGram(
size_t min_list_size = std::numeric_limits<size_t>::max(); size_t min_list_size = std::numeric_limits<size_t>::max();
NGram best_ngram = 0; NGram best_ngram = 0;
// To support case-insensitive matching, lower case the n-grams for |pattern|.
DCHECK(base::IsStringASCII(pattern));
const std::string lower_case_pattern = base::ToLowerASCII(pattern);
auto ngrams = CreateNGramExtractor<kNGramSize, NGram>( auto ngrams = CreateNGramExtractor<kNGramSize, NGram>(
pattern, [](char c) { return c == '*' || c == '^'; }); lower_case_pattern, [](char c) { return c == '*' || c == '^'; });
for (uint64_t ngram : ngrams) { for (uint64_t ngram : ngrams) {
const MutableUrlRuleList* rules = ngram_index_.Get(ngram); const MutableUrlRuleList* rules = ngram_index_.Get(ngram);
...@@ -654,8 +657,14 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex( ...@@ -654,8 +657,14 @@ const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
NGramHashTableProber prober; NGramHashTableProber prober;
// |hash_table| contains lower-cased n-grams. Lower case the url if necessary
// to find possible matches.
base::Optional<std::string> lower_case_url;
if (!HasNoUpperAscii(url.spec()))
lower_case_url = base::ToLowerASCII(url.spec());
auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>( auto ngrams = CreateNGramExtractor<kNGramSize, uint64_t>(
url.spec(), [](char) { return false; }); lower_case_url ? *lower_case_url : url.spec(),
[](char) { return false; });
auto get_max_priority_rule = [](const flat::UrlRule* lhs, auto get_max_priority_rule = [](const flat::UrlRule* lhs,
const flat::UrlRule* rhs) { const flat::UrlRule* rhs) {
......
...@@ -66,7 +66,7 @@ int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain); ...@@ -66,7 +66,7 @@ int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain);
// Increase this value when introducing an incompatible change to the // Increase this value when introducing an incompatible change to the
// UrlPatternIndex schema (flat/url_pattern_index.fbs). url_pattern_index // UrlPatternIndex schema (flat/url_pattern_index.fbs). url_pattern_index
// clients can use this as a signal to rebuild rulesets. // clients can use this as a signal to rebuild rulesets.
constexpr int kUrlPatternIndexFormatVersion = 1; constexpr int kUrlPatternIndexFormatVersion = 2;
// The class used to construct an index over the URL patterns of a set of URL // The class used to construct an index over the URL patterns of a set of URL
// rules. The rules themselves need to be converted to FlatBuffers format by the // rules. The rules themselves need to be converted to FlatBuffers format by the
......
...@@ -131,6 +131,20 @@ TEST_F(UrlPatternIndexTest, NoRuleApplies) { ...@@ -131,6 +131,20 @@ TEST_F(UrlPatternIndexTest, NoRuleApplies) {
EXPECT_FALSE(FindMatch("http://example.com?k=v&filter_not")); EXPECT_FALSE(FindMatch("http://example.com?k=v&filter_not"));
} }
TEST_F(UrlPatternIndexTest, CaseSensitivity) {
ASSERT_TRUE(
AddUrlRule(MakeUrlRule(UrlPattern("case-INSENsitive", kSubstring))));
proto::UrlRule rule = MakeUrlRule(UrlPattern("case-sensitive"));
rule.set_match_case(true);
ASSERT_TRUE(AddUrlRule(rule));
Finish();
EXPECT_TRUE(FindMatch("http://abc.com/type=CASE-insEnsitIVe"));
EXPECT_TRUE(FindMatch("http://abc.com/type=case-INSENSITIVE"));
EXPECT_FALSE(FindMatch("http://abc.com?type=CASE-sensitive"));
EXPECT_TRUE(FindMatch("http://abc.com?type=case-sensitive"));
}
TEST_F(UrlPatternIndexTest, OneRuleWithoutMetaInfo) { TEST_F(UrlPatternIndexTest, OneRuleWithoutMetaInfo) {
const struct { const struct {
UrlPattern url_pattern; UrlPattern url_pattern;
......
...@@ -14,10 +14,12 @@ namespace { ...@@ -14,10 +14,12 @@ namespace {
constexpr proto::AnchorType kAnchorNone = proto::ANCHOR_TYPE_NONE; constexpr proto::AnchorType kAnchorNone = proto::ANCHOR_TYPE_NONE;
constexpr proto::AnchorType kBoundary = proto::ANCHOR_TYPE_BOUNDARY; constexpr proto::AnchorType kBoundary = proto::ANCHOR_TYPE_BOUNDARY;
constexpr proto::AnchorType kSubdomain = proto::ANCHOR_TYPE_SUBDOMAIN; constexpr proto::AnchorType kSubdomain = proto::ANCHOR_TYPE_SUBDOMAIN;
constexpr UrlPattern::MatchCase kMatchCase = UrlPattern::MatchCase::kTrue;
constexpr UrlPattern::MatchCase kDonotMatchCase = UrlPattern::MatchCase::kFalse;
} // namespace } // namespace
TEST(SubresourceFilterUrlPatternTest, MatchesUrl) { TEST(UrlPatternTest, MatchesUrl) {
const struct { const struct {
UrlPattern url_pattern; UrlPattern url_pattern;
const char* url; const char* url;
...@@ -122,6 +124,31 @@ TEST(SubresourceFilterUrlPatternTest, MatchesUrl) { ...@@ -122,6 +124,31 @@ TEST(SubresourceFilterUrlPatternTest, MatchesUrl) {
{{"/path", kSubdomain, kBoundary}, "http://a.com./path", true}, {{"/path", kSubdomain, kBoundary}, "http://a.com./path", true},
{{"^path", kSubdomain, kBoundary}, "http://a.com./path", true}, {{"^path", kSubdomain, kBoundary}, "http://a.com./path", true},
{{"path", kSubdomain, kBoundary}, "http://a.com./path", false}, {{"path", kSubdomain, kBoundary}, "http://a.com./path", false},
// Case-sensitivity tests.
{{"path", proto::URL_PATTERN_TYPE_SUBSTRING, kDonotMatchCase},
"http://a.com/PaTh",
true},
{{"path", proto::URL_PATTERN_TYPE_SUBSTRING, kMatchCase},
"http://a.com/PaTh",
false},
{{"path", proto::URL_PATTERN_TYPE_SUBSTRING, kDonotMatchCase},
"http://a.com/path",
true},
{{"path", proto::URL_PATTERN_TYPE_SUBSTRING, kMatchCase},
"http://a.com/path",
true},
{{"abc*def^", proto::URL_PATTERN_TYPE_WILDCARDED, kMatchCase},
"http://a.com/abcxAdef/vo",
true},
{{"abc*def^", proto::URL_PATTERN_TYPE_WILDCARDED, kMatchCase},
"http://a.com/aBcxAdeF/vo",
false},
{{"abc*def^", proto::URL_PATTERN_TYPE_WILDCARDED, kDonotMatchCase},
"http://a.com/aBcxAdeF/vo",
true},
{{"abc*def^", proto::URL_PATTERN_TYPE_WILDCARDED, kDonotMatchCase},
"http://a.com/abcxAdef/vo",
true},
}; };
for (const auto& test_case : kTestCases) { for (const auto& test_case : kTestCases) {
......
...@@ -96,7 +96,7 @@ TEST_F(IndexedRulesetFormatVersionTest, CheckVersionUpdated) { ...@@ -96,7 +96,7 @@ TEST_F(IndexedRulesetFormatVersionTest, CheckVersionUpdated) {
EXPECT_EQ(StripCommentsAndWhitespace(kFlatbufferSchemaExpected), EXPECT_EQ(StripCommentsAndWhitespace(kFlatbufferSchemaExpected),
StripCommentsAndWhitespace(flatbuffer_schema)) StripCommentsAndWhitespace(flatbuffer_schema))
<< "Schema change detected; update this test and the schema version."; << "Schema change detected; update this test and the schema version.";
EXPECT_EQ(1, GetIndexedRulesetFormatVersionForTesting()) EXPECT_EQ(2, GetIndexedRulesetFormatVersionForTesting())
<< "Update this test if you update the schema version."; << "Update this test if you update the schema version.";
} }
......
...@@ -47,12 +47,12 @@ namespace dnr_api = extensions::api::declarative_net_request; ...@@ -47,12 +47,12 @@ namespace dnr_api = extensions::api::declarative_net_request;
// url_pattern_index.fbs. Whenever an extension with an indexed ruleset format // url_pattern_index.fbs. Whenever an extension with an indexed ruleset format
// version different from the one currently used by Chrome is loaded, the // version different from the one currently used by Chrome is loaded, the
// extension ruleset will be reindexed. // extension ruleset will be reindexed.
constexpr int kIndexedRulesetFormatVersion = 1; constexpr int kIndexedRulesetFormatVersion = 2;
// This static assert is meant to catch cases where // This static assert is meant to catch cases where
// url_pattern_index::kUrlPatternIndexFormatVersion is incremented without // url_pattern_index::kUrlPatternIndexFormatVersion is incremented without
// updating kIndexedRulesetFormatVersion. // updating kIndexedRulesetFormatVersion.
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 1, static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 2,
"kUrlPatternIndexFormatVersion has changed, make sure you've " "kUrlPatternIndexFormatVersion has changed, make sure you've "
"also updated kIndexedRulesetFormatVersion above."); "also updated kIndexedRulesetFormatVersion above.");
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
"subresource_filter": { "subresource_filter": {
"ruleset_version": { "ruleset_version": {
"content": "1000", "content": "1000",
"format": 20 "format": 21
} }
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment