Commit 986188a9 authored by pkalinnikov's avatar pkalinnikov Committed by Commit bot

[subresource_filter] Index activation rules separately.

This CL adds a separate index for activation rules. This drastically increases
speed of IndexedRuleset::ShouldDisableFilteringForDocument, because currently
there are only about 20 rules with activation options, as opposed to tens of
thousands URL rules total in the ruleset. Ultimately, this change makes
computing activation states for subdocuments very fast.

Memory footprint added by the new index is less than a kilobyte.

BUG=708458

Review-Url: https://codereview.chromium.org/2797133006
Cr-Commit-Position: refs/heads/master@{#462825}
parent b7a72005
...@@ -94,8 +94,11 @@ table IndexedRuleset { ...@@ -94,8 +94,11 @@ table IndexedRuleset {
// The index of all blacklist URL rules. // The index of all blacklist URL rules.
blacklist_index : UrlPatternIndex; blacklist_index : UrlPatternIndex;
// The index of all whitelist URL rules. // The index of all whitelist URL rules, except pure activation rules.
whitelist_index : UrlPatternIndex; whitelist_index : UrlPatternIndex;
// The index of all whitelist URL rules with activation options.
activation_index : UrlPatternIndex;
} }
root_type IndexedRuleset; root_type IndexedRuleset;
...@@ -39,6 +39,9 @@ class UrlRuleFlatBufferConverter { ...@@ -39,6 +39,9 @@ class UrlRuleFlatBufferConverter {
// this client version. // this client version.
bool is_convertible() const { return is_convertible_; } bool is_convertible() const { return is_convertible_; }
bool has_element_types() const { return !!element_types_; }
bool has_activation_types() const { return !!activation_types_; }
// Writes the URL |rule| to the FlatBuffer using the |builder|, and returns // Writes the URL |rule| to the FlatBuffer using the |builder|, and returns
// the offset to the serialized rule. // the offset to the serialized rule.
flatbuffers::Offset<flat::UrlRule> SerializeConvertedRule( flatbuffers::Offset<flat::UrlRule> SerializeConvertedRule(
...@@ -200,7 +203,7 @@ class UrlRuleFlatBufferConverter { ...@@ -200,7 +203,7 @@ class UrlRuleFlatBufferConverter {
// RulesetIndexer -------------------------------------------------------------- // RulesetIndexer --------------------------------------------------------------
// static // static
const int RulesetIndexer::kIndexedFormatVersion = 14; const int RulesetIndexer::kIndexedFormatVersion = 15;
RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default;
RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default;
...@@ -212,21 +215,27 @@ bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { ...@@ -212,21 +215,27 @@ bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) {
UrlRuleFlatBufferConverter converter(rule); UrlRuleFlatBufferConverter converter(rule);
if (!converter.is_convertible()) if (!converter.is_convertible())
return false; return false;
DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP);
auto rule_offset = converter.SerializeConvertedRule(&builder_); auto rule_offset = converter.SerializeConvertedRule(&builder_);
MutableUrlPatternIndex* index_part = auto add_rule_to_index = [&rule, rule_offset](MutableUrlPatternIndex* index) {
(rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST ? &blacklist_ NGram ngram =
: &whitelist_); GetMostDistinctiveNGram(index->ngram_index, rule.url_pattern());
if (ngram) {
DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP); index->ngram_index[ngram].push_back(rule_offset);
NGram ngram = } else {
GetMostDistinctiveNGram(index_part->ngram_index, rule.url_pattern()); // TODO(pkalinnikov): Index fallback rules as well.
index->fallback_rules.push_back(rule_offset);
}
};
if (ngram) { if (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST) {
index_part->ngram_index[ngram].push_back(rule_offset); add_rule_to_index(&blacklist_);
} else { } else {
// TODO(pkalinnikov): Index fallback rules as well. if (converter.has_element_types())
index_part->fallback_rules.push_back(rule_offset); add_rule_to_index(&whitelist_);
if (converter.has_activation_types())
add_rule_to_index(&activation_);
} }
return true; return true;
...@@ -235,12 +244,14 @@ bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { ...@@ -235,12 +244,14 @@ bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) {
void RulesetIndexer::Finish() { void RulesetIndexer::Finish() {
auto blacklist_offset = SerializeUrlPatternIndex(blacklist_); auto blacklist_offset = SerializeUrlPatternIndex(blacklist_);
auto whitelist_offset = SerializeUrlPatternIndex(whitelist_); auto whitelist_offset = SerializeUrlPatternIndex(whitelist_);
auto activation_offset = SerializeUrlPatternIndex(activation_);
auto url_rules_index_offset = auto url_rules_index_offset = flat::CreateIndexedRuleset(
flat::CreateIndexedRuleset(builder_, blacklist_offset, whitelist_offset); builder_, blacklist_offset, whitelist_offset, activation_offset);
builder_.Finish(url_rules_index_offset); builder_.Finish(url_rules_index_offset);
} }
// static
NGram RulesetIndexer::GetMostDistinctiveNGram( NGram RulesetIndexer::GetMostDistinctiveNGram(
const MutableNGramIndex& ngram_index, const MutableNGramIndex& ngram_index,
base::StringPiece pattern) { base::StringPiece pattern) {
...@@ -486,7 +497,7 @@ bool IndexedRulesetMatcher::ShouldDisableFilteringForDocument( ...@@ -486,7 +497,7 @@ bool IndexedRulesetMatcher::ShouldDisableFilteringForDocument(
return false; return false;
} }
return IsMatch( return IsMatch(
root_->whitelist_index(), document_url, parent_document_origin, root_->activation_index(), document_url, parent_document_origin,
proto::ELEMENT_TYPE_UNSPECIFIED, activation_type, proto::ELEMENT_TYPE_UNSPECIFIED, activation_type,
FirstPartyOrigin::IsThirdParty(document_url, parent_document_origin), FirstPartyOrigin::IsThirdParty(document_url, parent_document_origin),
false); false);
......
...@@ -95,6 +95,7 @@ class RulesetIndexer { ...@@ -95,6 +95,7 @@ class RulesetIndexer {
MutableUrlPatternIndex blacklist_; MutableUrlPatternIndex blacklist_;
MutableUrlPatternIndex whitelist_; MutableUrlPatternIndex whitelist_;
MutableUrlPatternIndex activation_;
flatbuffers::FlatBufferBuilder builder_; flatbuffers::FlatBufferBuilder builder_;
......
...@@ -28,6 +28,10 @@ constexpr proto::SourceType kAnyParty = proto::SOURCE_TYPE_ANY; ...@@ -28,6 +28,10 @@ constexpr proto::SourceType kAnyParty = proto::SOURCE_TYPE_ANY;
constexpr proto::SourceType kFirstParty = proto::SOURCE_TYPE_FIRST_PARTY; constexpr proto::SourceType kFirstParty = proto::SOURCE_TYPE_FIRST_PARTY;
constexpr proto::SourceType kThirdParty = proto::SOURCE_TYPE_THIRD_PARTY; constexpr proto::SourceType kThirdParty = proto::SOURCE_TYPE_THIRD_PARTY;
constexpr proto::ActivationType kDocument = proto::ACTIVATION_TYPE_DOCUMENT;
constexpr proto::ActivationType kGenericBlock =
proto::ACTIVATION_TYPE_GENERICBLOCK;
// Note: Returns unique origin on origin_string == nullptr. // Note: Returns unique origin on origin_string == nullptr.
url::Origin GetOrigin(const char* origin_string) { url::Origin GetOrigin(const char* origin_string) {
return origin_string ? url::Origin(GURL(origin_string)) : url::Origin(); return origin_string ? url::Origin(GURL(origin_string)) : url::Origin();
...@@ -83,9 +87,9 @@ class UrlRuleBuilder { ...@@ -83,9 +87,9 @@ class UrlRuleBuilder {
} // namespace } // namespace
class IndexedRulesetTest : public testing::Test { class SubresourceFilterIndexedRulesetTest : public testing::Test {
public: public:
IndexedRulesetTest() = default; SubresourceFilterIndexedRulesetTest() = default;
protected: protected:
bool ShouldAllow(const char* url, bool ShouldAllow(const char* url,
...@@ -153,10 +157,10 @@ class IndexedRulesetTest : public testing::Test { ...@@ -153,10 +157,10 @@ class IndexedRulesetTest : public testing::Test {
std::unique_ptr<IndexedRulesetMatcher> matcher_; std::unique_ptr<IndexedRulesetMatcher> matcher_;
private: private:
DISALLOW_COPY_AND_ASSIGN(IndexedRulesetTest); DISALLOW_COPY_AND_ASSIGN(SubresourceFilterIndexedRulesetTest);
}; };
TEST_F(IndexedRulesetTest, OneRuleWithoutMetaInfo) { TEST_F(SubresourceFilterIndexedRulesetTest, OneRuleWithoutMetaInfo) {
const struct { const struct {
UrlPattern url_pattern; UrlPattern url_pattern;
const char* url; const char* url;
...@@ -302,7 +306,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithoutMetaInfo) { ...@@ -302,7 +306,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithoutMetaInfo) {
} }
} }
TEST_F(IndexedRulesetTest, OneRuleWithThirdParty) { TEST_F(SubresourceFilterIndexedRulesetTest, OneRuleWithThirdParty) {
const struct { const struct {
const char* url_pattern; const char* url_pattern;
proto::SourceType source_type; proto::SourceType source_type;
...@@ -360,7 +364,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithThirdParty) { ...@@ -360,7 +364,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithThirdParty) {
} }
} }
TEST_F(IndexedRulesetTest, OneRuleWithDomainList) { TEST_F(SubresourceFilterIndexedRulesetTest, OneRuleWithDomainList) {
const struct { const struct {
const char* url_pattern; const char* url_pattern;
std::vector<std::string> domains; std::vector<std::string> domains;
...@@ -458,7 +462,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithDomainList) { ...@@ -458,7 +462,7 @@ TEST_F(IndexedRulesetTest, OneRuleWithDomainList) {
} }
} }
TEST_F(IndexedRulesetTest, OneRuleWithElementTypes) { TEST_F(SubresourceFilterIndexedRulesetTest, OneRuleWithElementTypes) {
constexpr proto::ElementType kAll = proto::ELEMENT_TYPE_ALL; constexpr proto::ElementType kAll = proto::ELEMENT_TYPE_ALL;
constexpr proto::ElementType kImage = proto::ELEMENT_TYPE_IMAGE; constexpr proto::ElementType kImage = proto::ELEMENT_TYPE_IMAGE;
constexpr proto::ElementType kFont = proto::ELEMENT_TYPE_FONT; constexpr proto::ElementType kFont = proto::ELEMENT_TYPE_FONT;
...@@ -514,11 +518,8 @@ TEST_F(IndexedRulesetTest, OneRuleWithElementTypes) { ...@@ -514,11 +518,8 @@ TEST_F(IndexedRulesetTest, OneRuleWithElementTypes) {
} }
} }
TEST_F(IndexedRulesetTest, OneRuleWithActivationTypes) { TEST_F(SubresourceFilterIndexedRulesetTest, OneRuleWithActivationTypes) {
constexpr proto::ActivationType kNone = proto::ACTIVATION_TYPE_UNSPECIFIED; constexpr proto::ActivationType kNone = proto::ACTIVATION_TYPE_UNSPECIFIED;
constexpr proto::ActivationType kDocument = proto::ACTIVATION_TYPE_DOCUMENT;
constexpr proto::ActivationType kGenericBlock =
proto::ACTIVATION_TYPE_GENERICBLOCK;
const struct { const struct {
const char* url_pattern; const char* url_pattern;
...@@ -565,7 +566,23 @@ TEST_F(IndexedRulesetTest, OneRuleWithActivationTypes) { ...@@ -565,7 +566,23 @@ TEST_F(IndexedRulesetTest, OneRuleWithActivationTypes) {
} }
} }
TEST_F(IndexedRulesetTest, MatchWithDisableGenericRules) { TEST_F(SubresourceFilterIndexedRulesetTest, RuleWithElementAndActivationTypes) {
UrlRuleBuilder builder(UrlPattern("allow.ex.com"), true /* is_whitelist */);
builder.rule().set_activation_types(kDocument);
AddUrlRule(builder.rule());
AddBlacklistRule(UrlPattern("ex.com"));
Finish();
EXPECT_FALSE(ShouldAllow("http://ex.com"));
EXPECT_TRUE(ShouldAllow("http://allow.ex.com"));
EXPECT_FALSE(ShouldDeactivate("http://allow.ex.com", nullptr /* initiator */,
kGenericBlock));
EXPECT_TRUE(ShouldDeactivate("http://allow.ex.com", nullptr /* initiator */,
kDocument));
}
TEST_F(SubresourceFilterIndexedRulesetTest, MatchWithDisableGenericRules) {
// Generic rules. // Generic rules.
ASSERT_NO_FATAL_FAILURE( ASSERT_NO_FATAL_FAILURE(
AddUrlRule(UrlRuleBuilder(UrlPattern("some_text", kSubstring)).rule())); AddUrlRule(UrlRuleBuilder(UrlPattern("some_text", kSubstring)).rule()));
...@@ -632,14 +649,14 @@ TEST_F(IndexedRulesetTest, MatchWithDisableGenericRules) { ...@@ -632,14 +649,14 @@ TEST_F(IndexedRulesetTest, MatchWithDisableGenericRules) {
} }
} }
TEST_F(IndexedRulesetTest, EmptyRuleset) { TEST_F(SubresourceFilterIndexedRulesetTest, EmptyRuleset) {
Finish(); Finish();
EXPECT_TRUE(ShouldAllow("http://example.com")); EXPECT_TRUE(ShouldAllow("http://example.com"));
EXPECT_TRUE(ShouldAllow("http://another.example.com?param=val")); EXPECT_TRUE(ShouldAllow("http://another.example.com?param=val"));
EXPECT_TRUE(ShouldAllow(nullptr)); EXPECT_TRUE(ShouldAllow(nullptr));
} }
TEST_F(IndexedRulesetTest, NoRuleApplies) { TEST_F(SubresourceFilterIndexedRulesetTest, NoRuleApplies) {
AddSimpleRule(UrlPattern("?filtered_content=", kSubstring), false); AddSimpleRule(UrlPattern("?filtered_content=", kSubstring), false);
AddSimpleRule(UrlPattern("&filtered_content=", kSubstring), false); AddSimpleRule(UrlPattern("&filtered_content=", kSubstring), false);
Finish(); Finish();
...@@ -648,7 +665,7 @@ TEST_F(IndexedRulesetTest, NoRuleApplies) { ...@@ -648,7 +665,7 @@ TEST_F(IndexedRulesetTest, NoRuleApplies) {
EXPECT_TRUE(ShouldAllow("http://example.com?filtered_not")); EXPECT_TRUE(ShouldAllow("http://example.com?filtered_not"));
} }
TEST_F(IndexedRulesetTest, SimpleBlacklist) { TEST_F(SubresourceFilterIndexedRulesetTest, SimpleBlacklist) {
AddSimpleRule(UrlPattern("?param=", kSubstring), false); AddSimpleRule(UrlPattern("?param=", kSubstring), false);
Finish(); Finish();
...@@ -656,14 +673,14 @@ TEST_F(IndexedRulesetTest, SimpleBlacklist) { ...@@ -656,14 +673,14 @@ TEST_F(IndexedRulesetTest, SimpleBlacklist) {
EXPECT_FALSE(ShouldAllow("http://example.org?param=image1")); EXPECT_FALSE(ShouldAllow("http://example.org?param=image1"));
} }
TEST_F(IndexedRulesetTest, SimpleWhitelist) { TEST_F(SubresourceFilterIndexedRulesetTest, SimpleWhitelist) {
AddSimpleRule(UrlPattern("example.com/?filtered_content=", kSubstring), true); AddSimpleRule(UrlPattern("example.com/?filtered_content=", kSubstring), true);
Finish(); Finish();
EXPECT_TRUE(ShouldAllow("https://example.com?filtered_content=image1")); EXPECT_TRUE(ShouldAllow("https://example.com?filtered_content=image1"));
} }
TEST_F(IndexedRulesetTest, BlacklistWhitelist) { TEST_F(SubresourceFilterIndexedRulesetTest, BlacklistWhitelist) {
AddSimpleRule(UrlPattern("?filter=", kSubstring), false); AddSimpleRule(UrlPattern("?filter=", kSubstring), false);
AddSimpleRule(UrlPattern("whitelisted.com/?filter=", kSubstring), true); AddSimpleRule(UrlPattern("whitelisted.com/?filter=", kSubstring), true);
Finish(); Finish();
...@@ -673,9 +690,7 @@ TEST_F(IndexedRulesetTest, BlacklistWhitelist) { ...@@ -673,9 +690,7 @@ TEST_F(IndexedRulesetTest, BlacklistWhitelist) {
EXPECT_FALSE(ShouldAllow("http://blacklisted.com?filter=on")); EXPECT_FALSE(ShouldAllow("http://blacklisted.com?filter=on"));
} }
TEST_F(IndexedRulesetTest, BlacklistAndActivationType) { TEST_F(SubresourceFilterIndexedRulesetTest, BlacklistAndActivationType) {
const auto kDocument = proto::ACTIVATION_TYPE_DOCUMENT;
AddSimpleRule(UrlPattern("example.com", kSubstring), false); AddSimpleRule(UrlPattern("example.com", kSubstring), false);
AddWhitelistRuleWithActivationTypes(UrlPattern("example.com", kSubstring), AddWhitelistRuleWithActivationTypes(UrlPattern("example.com", kSubstring),
kDocument); kDocument);
...@@ -687,7 +702,7 @@ TEST_F(IndexedRulesetTest, BlacklistAndActivationType) { ...@@ -687,7 +702,7 @@ TEST_F(IndexedRulesetTest, BlacklistAndActivationType) {
EXPECT_TRUE(ShouldAllow("https://xample.com")); EXPECT_TRUE(ShouldAllow("https://xample.com"));
} }
TEST_F(IndexedRulesetTest, RuleWithUnsupportedOptions) { TEST_F(SubresourceFilterIndexedRulesetTest, RuleWithUnsupportedOptions) {
const struct { const struct {
int element_types; int element_types;
int activation_types; int activation_types;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment