Commit a28dae95 authored by Charlie Harrison's avatar Charlie Harrison Committed by Commit Bot

Refactor ruleset_converter and add more tests

This refactors main.cc into a class RulesetConverter to make things
a bit more testable.

This CL only contains one behavior change: instead of using a colon
separated list of directories as input, we instead use a comma separated
list. Paths in windows frequently have colons in them :)

Bug: 833419
Change-Id: I51c3b9c2b8ac6ce93a193d07f8bafaff897f3fdf
Reviewed-on: https://chromium-review.googlesource.com/1036243
Commit-Queue: Charlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarJosh Karlin <jkarlin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#555715}
parent a207d01d
...@@ -9,6 +9,8 @@ source_set("support") { ...@@ -9,6 +9,8 @@ source_set("support") {
"//third_party/protobuf:protobuf_lite", "//third_party/protobuf:protobuf_lite",
"rule_stream.cc", "rule_stream.cc",
"rule_stream.h", "rule_stream.h",
"ruleset_converter.cc",
"ruleset_converter.h",
"ruleset_format.cc", "ruleset_format.cc",
"ruleset_format.h", "ruleset_format.h",
] ]
...@@ -25,6 +27,7 @@ source_set("unit_tests") { ...@@ -25,6 +27,7 @@ source_set("unit_tests") {
testonly = true testonly = true
sources = [ sources = [
"rule_stream_unittest.cc", "rule_stream_unittest.cc",
"ruleset_converter_unittest.cc",
"ruleset_test_util.cc", "ruleset_test_util.cc",
"ruleset_test_util.h", "ruleset_test_util.h",
] ]
......
...@@ -2,25 +2,8 @@ ...@@ -2,25 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "base/command_line.h" #include "base/command_line.h"
#include "base/files/file_path.h" #include "components/subresource_filter/tools/ruleset_converter/ruleset_converter.h"
#include "base/files/file_util.h"
#include "base/logging.h"
#include "base/strings/string16.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
#include "components/subresource_filter/tools/ruleset_converter/ruleset_format.h"
namespace { namespace {
...@@ -44,7 +27,7 @@ const char kHelpMsg[] = R"( ...@@ -44,7 +27,7 @@ const char kHelpMsg[] = R"(
ruleset_converter is a utility for converting subresource_filter rulesets ruleset_converter is a utility for converting subresource_filter rulesets
across multiple formats: across multiple formats:
* --input_files: Colon-separated list of input files with rules. The files * --input_files: Comma-separated list of input files with rules. The files
are processed in the order of declaration are processed in the order of declaration
* --output_file: The file to output the rules. Either this option or at least * --output_file: The file to output the rules. Either this option or at least
...@@ -82,126 +65,53 @@ int main(int argc, char* argv[]) { ...@@ -82,126 +65,53 @@ int main(int argc, char* argv[]) {
return 1; return 1;
} }
if (!command_line.HasSwitch(kSwitchInputFiles)) { subresource_filter::RulesetConverter converter;
std::fprintf(stderr, "--input_files flag is not specified.\n"); if (!converter.SetInputFiles(
command_line.GetSwitchValueNative(kSwitchInputFiles))) {
PrintHelp(); PrintHelp();
return 1; return 1;
} }
if (command_line.GetSwitchValueASCII(kSwitchOutputFile).empty() && if (command_line.HasSwitch(kSwitchOutputFile) &&
command_line.GetSwitchValueASCII(kSwitchOutputFileUrl).empty() && !converter.SetOutputFile(
command_line.GetSwitchValueASCII(kSwitchOutputFileCss).empty()) { command_line.GetSwitchValuePath(kSwitchOutputFile))) {
std::fprintf(stderr,
"Either --output_file, or at least of one "
"--output_file_url|--output_file_css should be specified.\n");
PrintHelp(); PrintHelp();
return 1; return 1;
} }
if (command_line.HasSwitch(kSwitchOutputFileUrl) &&
const subresource_filter::RulesetFormat input_format = !converter.SetOutputFileUrl(
command_line.HasSwitch(kSwitchInputFormat) command_line.GetSwitchValuePath(kSwitchOutputFileUrl))) {
? subresource_filter::ParseFlag( PrintHelp();
command_line.GetSwitchValueASCII(kSwitchInputFormat))
: subresource_filter::RulesetFormat::kFilterList;
if (input_format == subresource_filter::RulesetFormat::kUndefined) {
std::fprintf(stderr, "Input format is not defined.\n");
return 1; return 1;
} }
if (command_line.HasSwitch(kSwitchOutputFileCss) &&
const subresource_filter::RulesetFormat output_format = !converter.SetOutputFileCss(
subresource_filter::ParseFlag( command_line.GetSwitchValuePath(kSwitchOutputFileCss))) {
command_line.GetSwitchValueASCII(kSwitchOutputFormat)); PrintHelp();
if (output_format == subresource_filter::RulesetFormat::kUndefined) {
std::fprintf(stderr, "Output format is not defined.\n");
return 1; return 1;
} }
int chrome_version = 59; if (command_line.HasSwitch(kSwitchChromeVersion) &&
if (command_line.HasSwitch(kSwitchChromeVersion)) { !converter.SetChromeVersion(
if (!base::StringToInt( command_line.GetSwitchValueASCII(kSwitchChromeVersion))) {
command_line.GetSwitchValueASCII(kSwitchChromeVersion), PrintHelp();
&chrome_version)) {
fprintf(stderr, "Unable to parse chrome version");
return 1;
}
}
if (chrome_version != 0 && chrome_version != 54 && chrome_version != 59) {
std::fprintf(stderr, "chrome_version should be in {0, 54, 59}.\n");
return 1; return 1;
} }
// Vet the input paths. if (command_line.HasSwitch(kSwitchInputFormat) &&
base::CommandLine::StringType inputs = !converter.SetInputFormat(
command_line.GetSwitchValueNative(kSwitchInputFiles); command_line.GetSwitchValueASCII(kSwitchInputFormat))) {
std::vector<base::FilePath> input_paths; PrintHelp();
#if defined(OS_WIN)
base::StringPiece16 separator(base::ASCIIToUTF16(":"));
#else
base::StringPiece separator(":");
#endif
for (const auto& piece :
base::SplitStringPiece(inputs, separator, base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY)) {
base::FilePath path(piece);
if (!base::PathExists(path)) {
std::fprintf(stderr, "Input path does not exist\n");
return 1; return 1;
} }
input_paths.push_back(path); if (command_line.HasSwitch(kSwitchOutputFormat) &&
} !converter.SetOutputFormat(
command_line.GetSwitchValueASCII(kSwitchOutputFormat))) {
// Create output stream(s). PrintHelp();
std::unique_ptr<subresource_filter::RuleOutputStream> primary_output;
std::unique_ptr<subresource_filter::RuleOutputStream> secondary_output;
subresource_filter::RuleOutputStream* css_rules_output = nullptr;
base::FilePath primary_filename =
command_line.GetSwitchValuePath(kSwitchOutputFile);
const bool single_output = !primary_filename.empty();
if (!single_output)
primary_filename = command_line.GetSwitchValuePath(kSwitchOutputFileUrl);
if (!primary_filename.empty()) {
if (!base::DirectoryExists(primary_filename.DirName())) {
std::fprintf(stderr, "Output directory does not exist\n");
return 1; return 1;
} }
primary_output = subresource_filter::RuleOutputStream::Create(
std::make_unique<std::ofstream>(primary_filename.AsUTF8Unsafe(),
std::ios::binary | std::ios::out),
output_format);
}
base::FilePath secondary_filename = if (!converter.Convert())
command_line.GetSwitchValuePath(kSwitchOutputFileCss);
if (single_output || secondary_filename == primary_filename) {
css_rules_output = primary_output.get();
} else if (!secondary_filename.empty()) {
if (!base::DirectoryExists(secondary_filename.DirName())) {
std::fprintf(stderr, "Output directory does not exist\n");
return 1; return 1;
}
secondary_output = subresource_filter::RuleOutputStream::Create(
std::make_unique<std::ofstream>(secondary_filename.AsUTF8Unsafe(),
std::ios::binary | std::ios::out),
output_format);
css_rules_output = secondary_output.get();
}
// Iterate through input files and stream them to the outputs.
for (const auto& path : input_paths) {
auto input_stream = subresource_filter::RuleInputStream::Create(
std::make_unique<std::ifstream>(path.AsUTF8Unsafe(),
std::ios::binary | std::ios::in),
input_format);
CHECK(input_stream);
CHECK(subresource_filter::TransferRules(input_stream.get(),
primary_output.get(),
css_rules_output, chrome_version));
}
if (primary_output)
CHECK(primary_output->Finish());
if (secondary_output)
CHECK(secondary_output->Finish());
return 0; return 0;
} }
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/subresource_filter/tools/ruleset_converter/ruleset_converter.h"
#include <fstream>
#include <iostream>
#include "base/files/file_util.h"
#include "base/logging.h"
#include "base/strings/string16.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
#include "components/subresource_filter/tools/ruleset_converter/ruleset_format.h"
namespace subresource_filter {
RulesetConverter::RulesetConverter() = default;
RulesetConverter::~RulesetConverter() = default;
bool RulesetConverter::Convert() {
if (inputs_.empty()) {
std::fprintf(stderr, "No input files specified\n");
return false;
}
std::unique_ptr<RuleOutputStream> primary_output;
std::unique_ptr<RuleOutputStream> secondary_output;
subresource_filter::RuleOutputStream* css_rules_output = nullptr;
auto make_output = [](const base::FilePath& path, RulesetFormat format) {
return RuleOutputStream::Create(
std::make_unique<std::ofstream>(path.AsUTF8Unsafe(),
std::ios::binary | std::ios::out),
format);
};
if (!output_file_.empty()) {
primary_output = make_output(output_file_, output_format_);
css_rules_output = primary_output.get();
} else {
if (!output_url_.empty()) {
primary_output = make_output(output_url_, output_format_);
}
if (output_css_ == output_url_) {
css_rules_output = primary_output.get();
} else if (!output_css_.empty()) {
secondary_output = make_output(output_css_, output_format_);
css_rules_output = secondary_output.get();
}
}
if (!primary_output && !secondary_output) {
std::fprintf(stderr,
"Must specify an output_file, or one of "
"output_file_url|output_file_css\n");
return false;
}
// Iterate through input files and stream them to the outputs.
for (const auto& path : inputs_) {
auto input_stream = subresource_filter::RuleInputStream::Create(
std::make_unique<std::ifstream>(path.AsUTF8Unsafe(),
std::ios::binary | std::ios::in),
input_format_);
CHECK(input_stream);
CHECK(TransferRules(input_stream.get(), primary_output.get(),
css_rules_output, chrome_version_));
}
if (primary_output)
CHECK(primary_output->Finish());
if (secondary_output)
CHECK(secondary_output->Finish());
return true;
}
bool RulesetConverter::SetInputFiles(
const base::CommandLine::StringType& comma_separated_paths) {
#if defined(OS_WIN)
base::string16 separator16 = base::ASCIIToUTF16(",");
base::StringPiece16 separator(separator16);
#else
base::StringPiece separator(",");
#endif
for (const auto& piece : base::SplitStringPiece(
comma_separated_paths, separator, base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY)) {
base::FilePath path(piece);
if (!base::PathExists(path)) {
std::fprintf(stderr, "Path not found: %s\n", path.AsUTF8Unsafe().c_str());
return false;
}
inputs_.push_back(path);
}
if (inputs_.empty()) {
std::fprintf(stderr, "Received no input files\n");
return false;
}
return true;
}
bool RulesetConverter::SetChromeVersion(const std::string& version) {
int parsed_version = 0;
if (!base::StringToInt(version, &parsed_version)) {
std::fprintf(stderr,
"chrome_version could not be parsed into an integer.\n");
return false;
}
if (parsed_version != 0 && parsed_version != 54 && parsed_version != 59) {
std::fprintf(stderr, "chrome_version should be in {0, 54, 59}.\n");
return false;
}
chrome_version_ = parsed_version;
return true;
}
bool RulesetConverter::SetOutputFile(const base::FilePath& path) {
if (!base::DirectoryExists(path.DirName())) {
std::printf("Directory does not exist: %s\n",
path.DirName().AsUTF8Unsafe().c_str());
return false;
}
output_file_ = path;
return true;
}
bool RulesetConverter::SetOutputFileUrl(const base::FilePath& path) {
if (!base::DirectoryExists(path.DirName())) {
std::printf("Directory does not exist: %s\n",
path.DirName().AsUTF8Unsafe().c_str());
return false;
}
output_url_ = path;
return true;
}
bool RulesetConverter::SetOutputFileCss(const base::FilePath& path) {
if (!base::DirectoryExists(path.DirName())) {
std::printf("Directory does not exist: %s\n",
path.DirName().AsUTF8Unsafe().c_str());
return false;
}
output_css_ = path;
return true;
}
bool RulesetConverter::SetInputFormat(const std::string& format) {
RulesetFormat ruleset_format = ParseFlag(format);
if (ruleset_format == subresource_filter::RulesetFormat::kUndefined) {
std::fprintf(stderr, "Input format is not defined.\n");
return false;
}
input_format_ = ruleset_format;
return true;
}
bool RulesetConverter::SetOutputFormat(const std::string& format) {
RulesetFormat ruleset_format = ParseFlag(format);
if (ruleset_format == subresource_filter::RulesetFormat::kUndefined) {
std::fprintf(stderr, "Output format is not defined.\n");
return false;
}
output_format_ = ruleset_format;
return true;
}
} // namespace subresource_filter
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULESET_CONVERTER_RULESET_CONVERTER_H_
#define COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULESET_CONVERTER_RULESET_CONVERTER_H_
#include <memory>
#include <string>
#include <vector>
#include "base/command_line.h"
#include "base/files/file_path.h"
#include "base/macros.h"
#include "components/subresource_filter/tools/ruleset_converter/rule_stream.h"
#include "components/subresource_filter/tools/ruleset_converter/ruleset_format.h"
namespace subresource_filter {
// The RulesetConverter converts subresource_filter rulesets across multiple
// formats.
// This class is a thin abstraction to enable testing of the |ruleset_converter|
// command line tool. See comments in main.cc for more information.
class RulesetConverter {
public:
RulesetConverter();
~RulesetConverter();
// Converts rulesets based on Set* configurations.
bool Convert();
// Returns false if the input files are invalid or cannot be found.
// Corresponds to --input_files parameter.
bool SetInputFiles(
const base::CommandLine::StringType& comma_separated_paths);
// These methods will return false if the directory does not exist.
//
// Corresponds to --output_file parameter.
bool SetOutputFile(const base::FilePath& path);
// Corresponds to --output_file_url parameter.
bool SetOutputFileUrl(const base::FilePath& path);
// Corresponds to --output_file_css parameter.
bool SetOutputFileCss(const base::FilePath& path);
// Corresponds to --chrome_version.
bool SetChromeVersion(const std::string& version);
// Corresponds to --input_format / --output_format.
bool SetInputFormat(const std::string& format);
bool SetOutputFormat(const std::string& format);
private:
std::vector<base::FilePath> inputs_;
base::FilePath output_file_;
base::FilePath output_url_;
base::FilePath output_css_;
RulesetFormat input_format_ = RulesetFormat::kFilterList;
RulesetFormat output_format_ = RulesetFormat::kUnindexedRuleset;
// Increase this if rule_stream gets more custom logic for versions > 59.
int chrome_version_ = 59;
DISALLOW_COPY_AND_ASSIGN(RulesetConverter);
};
} // namespace subresource_filter
#endif // COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULESET_CONVERTER_RULESET_CONVERTER_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment