Commit a8f0905f authored by Josh Karlin's avatar Josh Karlin Committed by Commit Bot

Creates a tool that converts unindexed rulesets to indexed on disk

Creates a new binary, subresource_indexing_tool, that takes an unindexed
ruleset as input and writes an indexed ruleset as output.

Bug: 793025
Change-Id: Ieeb10aab4ba80c83baf8058fe202e4d4fcd770e1
Reviewed-on: https://chromium-review.googlesource.com/891140
Commit-Queue: Josh Karlin <jkarlin@chromium.org>
Reviewed-by: default avatarCharlie Harrison <csharrison@chromium.org>
Cr-Commit-Position: refs/heads/master@{#532952}
parent c4ad1a4d
...@@ -58,6 +58,18 @@ static_library("test_support") { ...@@ -58,6 +58,18 @@ static_library("test_support") {
] ]
} }
static_library("tools_lib") {
sources = [
"tools/indexing_tool.cc",
"tools/indexing_tool.h",
]
deps = [
":common",
"//base",
"//components/url_pattern_index:util",
]
}
source_set("unit_tests") { source_set("unit_tests") {
testonly = true testonly = true
sources = [ sources = [
...@@ -65,10 +77,12 @@ source_set("unit_tests") { ...@@ -65,10 +77,12 @@ source_set("unit_tests") {
"first_party_origin_unittest.cc", "first_party_origin_unittest.cc",
"indexed_ruleset_unittest.cc", "indexed_ruleset_unittest.cc",
"scoped_timers_unittest.cc", "scoped_timers_unittest.cc",
"tools/indexing_tool_unittest.cc",
] ]
deps = [ deps = [
":common", ":common",
":test_support", ":test_support",
":tools_lib",
"//base", "//base",
"//base/test:test_support", "//base/test:test_support",
"//components/url_pattern_index:test_support", "//components/url_pattern_index:test_support",
...@@ -77,3 +91,16 @@ source_set("unit_tests") { ...@@ -77,3 +91,16 @@ source_set("unit_tests") {
"//url", "//url",
] ]
} }
if (is_linux || is_mac || is_win) {
executable("subresource_indexing_tool") {
sources = [
"tools/indexing_tool_main.cc",
]
deps = [
":tools_lib",
"//base",
"//build/config:exe_and_shlib_deps",
]
}
}
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
#include "components/subresource_filter/core/common/test_ruleset_utils.h" #include "components/subresource_filter/core/common/test_ruleset_utils.h"
#include <utility>
namespace subresource_filter { namespace subresource_filter {
namespace testing { namespace testing {
...@@ -21,6 +23,18 @@ proto::UrlRule CreateSuffixRule(base::StringPiece suffix) { ...@@ -21,6 +23,18 @@ proto::UrlRule CreateSuffixRule(base::StringPiece suffix) {
return rule; return rule;
} }
proto::UrlRule CreateWhitelistSuffixRule(base::StringPiece suffix) {
proto::UrlRule rule;
rule.set_semantics(proto::RULE_SEMANTICS_WHITELIST);
rule.set_source_type(proto::SOURCE_TYPE_ANY);
rule.set_element_types(proto::ELEMENT_TYPE_ALL);
rule.set_url_pattern_type(proto::URL_PATTERN_TYPE_SUBSTRING);
rule.set_anchor_left(proto::ANCHOR_TYPE_NONE);
rule.set_anchor_right(proto::ANCHOR_TYPE_BOUNDARY);
rule.set_url_pattern(suffix.as_string());
return rule;
}
proto::UrlRule CreateWhitelistRuleForDocument( proto::UrlRule CreateWhitelistRuleForDocument(
base::StringPiece pattern, base::StringPiece pattern,
int32_t activation_types, int32_t activation_types,
......
...@@ -19,6 +19,11 @@ namespace testing { ...@@ -19,6 +19,11 @@ namespace testing {
// the resource URL ends with |suffix|. // the resource URL ends with |suffix|.
url_pattern_index::proto::UrlRule CreateSuffixRule(base::StringPiece suffix); url_pattern_index::proto::UrlRule CreateSuffixRule(base::StringPiece suffix);
// Creates a white URL rule which targets subresources of any type such that
// the resource URL ends with |suffix|.
url_pattern_index::proto::UrlRule CreateWhitelistSuffixRule(
base::StringPiece suffix);
// Same as CreateUrlRule(pattern, proto::URL_PATTERN_TYPE_WILDCARDED), but the // Same as CreateUrlRule(pattern, proto::URL_PATTERN_TYPE_WILDCARDED), but the
// rule applies to the specified |activation_types|, and to no element types. // rule applies to the specified |activation_types|, and to no element types.
// Additionally, it is restricted to a set of |domains| (if provided). // Additionally, it is restricted to a set of |domains| (if provided).
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/subresource_filter/core/common/tools/indexing_tool.h"
#include <utility>
#include "base/files/file.h"
#include "base/files/file_util.h"
#include "base/numerics/safe_conversions.h"
#include "components/subresource_filter/core/common/indexed_ruleset.h"
#include "components/url_pattern_index/copying_file_stream.h"
#include "components/url_pattern_index/proto/rules.pb.h"
#include "components/url_pattern_index/unindexed_ruleset.h"
namespace subresource_filter {
bool IndexAndWriteRuleset(const base::FilePath& unindexed_path,
const base::FilePath& indexed_path) {
if (!base::PathExists(unindexed_path) ||
!base::DirectoryExists(indexed_path.DirName())) {
return false;
}
base::File unindexed_file(unindexed_path,
base::File::FLAG_OPEN | base::File::FLAG_READ);
subresource_filter::RulesetIndexer indexer;
url_pattern_index::CopyingFileInputStream copying_stream(
std::move(unindexed_file));
google::protobuf::io::CopyingInputStreamAdaptor zero_copy_stream_adaptor(
&copying_stream, 4096 /* buffer_size */);
url_pattern_index::UnindexedRulesetReader reader(&zero_copy_stream_adaptor);
url_pattern_index::proto::FilteringRules ruleset_chunk;
while (reader.ReadNextChunk(&ruleset_chunk)) {
for (const auto& rule : ruleset_chunk.url_rules()) {
indexer.AddUrlRule(rule);
}
}
indexer.Finish();
base::WriteFile(indexed_path, reinterpret_cast<const char*>(indexer.data()),
base::checked_cast<int>(indexer.size()));
return true;
}
} // namespace subresource_filter
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_TOOLS_INDEXING_TOOL_H_
#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_TOOLS_INDEXING_TOOL_H_
#include "base/command_line.h"
#include "base/files/file_path.h"
namespace subresource_filter {
// Given |unindexed_path|, which is a path to an unindexed ruleset, writes the
// indexed (flatbuffer) version to |indexed_path|. Returns false if there was
// something wrong with the given paths.
bool IndexAndWriteRuleset(const base::FilePath& unindexed_path,
const base::FilePath& indexed_path);
} // namespace subresource_filter
#endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_TOOLS_INDEXING_TOOL_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/command_line.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/logging.h"
#include "components/subresource_filter/core/common/tools/indexing_tool.h"
const char kHelpMsg[] = R"(
subresource_indexing_tool <unindexed_ruleset_file> <output_file>
subresource_indexing_tool will open the |unindexed_ruleset_file| and output
an indexed version in |output_file|.
)";
void PrintHelp() {
printf("%s\n\n", kHelpMsg);
}
int main(int argc, char* argv[]) {
base::CommandLine::Init(argc, argv);
const base::CommandLine& command_line =
*base::CommandLine::ForCurrentProcess();
base::CommandLine::StringVector args = command_line.GetArgs();
if (args.size() < 2U) {
PrintHelp();
return 1;
}
base::FilePath unindexed_path(args[0]);
base::FilePath indexed_path(args[1]);
if (!subresource_filter::IndexAndWriteRuleset(unindexed_path, indexed_path)) {
LOG(ERROR) << "There was an error. Be sure that the first argument points "
"to a valid unindexed file and that the second argument is "
"in an existing directory.";
}
}
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/subresource_filter/core/common/tools/indexing_tool.h"
#include <memory>
#include <string>
#include <vector>
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
#include "components/subresource_filter/core/common/test_ruleset_creator.h"
#include "components/subresource_filter/core/common/test_ruleset_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace subresource_filter {
namespace proto = url_pattern_index::proto;
namespace {
std::vector<uint8_t> ReadFileContents(const base::FilePath& file_path) {
base::File file(file_path, base::File::FLAG_OPEN | base::File::FLAG_READ);
size_t length = base::checked_cast<size_t>(file.GetLength());
std::vector<uint8_t> contents(length);
static_assert(sizeof(uint8_t) == sizeof(char), "Expected char = byte.");
file.Read(0, reinterpret_cast<char*>(contents.data()),
base::checked_cast<int>(length));
return contents;
}
class IndexingToolTest : public ::testing::Test {
public:
IndexingToolTest() {}
protected:
void SetUp() override { ASSERT_TRUE(scoped_temp_dir_.CreateUniqueTempDir()); }
base::FilePath GetUniquePath() {
base::FilePath path = scoped_temp_dir_.GetPath().AppendASCII(
base::IntToString(file_count_++));
return path;
}
void CreateSimpleRuleset() {
std::vector<proto::UrlRule> rules;
rules.push_back(testing::CreateSuffixRule("disallowed1.png"));
rules.push_back(testing::CreateSuffixRule("disallowed2.png"));
rules.push_back(testing::CreateSuffixRule("disallowed3.png"));
rules.push_back(
testing::CreateWhitelistSuffixRule("whitelist/disallowed1.png"));
rules.push_back(
testing::CreateWhitelistSuffixRule("whitelist/disallowed2.png"));
ASSERT_NO_FATAL_FAILURE(test_ruleset_creator_.CreateRulesetWithRules(
rules, &test_ruleset_pair_));
}
void WriteUnindexedRulesetToFile(const base::FilePath& path) {
// Write the test unindexed data to a file.
const std::vector<uint8_t>& unindexed_data =
test_ruleset_pair_.unindexed.contents;
base::WriteFile(path, reinterpret_cast<const char*>(unindexed_data.data()),
base::checked_cast<int>(unindexed_data.size()));
}
int file_count_ = 0;
base::ScopedTempDir scoped_temp_dir_;
testing::TestRulesetCreator test_ruleset_creator_;
testing::TestRulesetPair test_ruleset_pair_;
DISALLOW_COPY_AND_ASSIGN(IndexingToolTest);
};
TEST_F(IndexingToolTest, UnindexedFileDoesNotExist) {
// There is no file at the unindexed position, so it should return false.
EXPECT_FALSE(IndexAndWriteRuleset(GetUniquePath(), GetUniquePath()));
}
TEST_F(IndexingToolTest, IndexedDirectoryDoesNotExist) {
// Create a valid unindexed file.
base::FilePath unindexed_path = GetUniquePath();
CreateSimpleRuleset();
WriteUnindexedRulesetToFile(unindexed_path);
// The indexed path is in a non-existant directory.
base::FilePath indexed_path =
scoped_temp_dir_.GetPath().AppendASCII("foo/bar");
// This should fail because the directory for indexed_path doesn't exist.
EXPECT_FALSE(IndexAndWriteRuleset(unindexed_path, indexed_path));
}
TEST_F(IndexingToolTest, VerifyOutput) {
base::FilePath unindexed_path = GetUniquePath();
base::FilePath indexed_path = GetUniquePath();
CreateSimpleRuleset();
WriteUnindexedRulesetToFile(unindexed_path);
// Convert the unindexed data to indexed data, and write the result to
// indexed_path.
EXPECT_TRUE(IndexAndWriteRuleset(unindexed_path, indexed_path));
// Verify that the output equals the test indexed data.
std::vector<uint8_t> indexed_data = ReadFileContents(indexed_path);
EXPECT_EQ(test_ruleset_pair_.indexed.contents, indexed_data);
}
} // namespace
} // namespace subresource_filter
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment