Commit 31e4a5df authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: Caspian: Add name filtering

This mirrors the JS SuperSize implementation in supporting two regex
parameters, for whitelisting and blacklisting.

std::regex is two orders of magnitude too slow, so I've brought in RE2
from /third_party/.

The front-end has code to validate input regexes and display a user-facing
error in case of invalid input - this justified squashing any regex-parsing
errors encountered on the WebAssembly side.

Bug: 1011921
Change-Id: I36b73815d4fd72142580b78cf5ff08fc595990bb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1891113
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarNico Weber <thakis@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#710930}
parent c57e8073
......@@ -43,6 +43,7 @@ if (is_wasm) {
]
deps = [
":caspian-lib",
"//third_party/re2:re2",
]
if (!is_debug) {
# Use optimize_speed (-O3) to output the _smallest_ code.
......
include_rules = [
"+third_party/zlib",
"+third_party/jsoncpp",
"+third_party/re2",
]
......@@ -13,6 +13,7 @@
#include <string>
#include "third_party/jsoncpp/source/include/json/json.h"
#include "third_party/re2/src/re2/re2.h"
#include "tools/binary_size/libsupersize/caspian/file_format.h"
#include "tools/binary_size/libsupersize/caspian/model.h"
#include "tools/binary_size/libsupersize/caspian/tree_builder.h"
......@@ -39,8 +40,32 @@ void LoadSizeFile(const char* compressed, size_t size) {
ParseSizeInfo(compressed, size, &info);
}
void BuildTree(bool group_by_component) {
builder.reset(new TreeBuilder(&info, group_by_component));
void BuildTree(bool group_by_component,
const char* include_regex_str,
const char* exclude_regex_str) {
std::vector<std::function<bool(const Symbol&)>> filters;
std::unique_ptr<RE2> include_regex;
if (include_regex_str && *include_regex_str) {
include_regex.reset(new RE2(include_regex_str));
if (include_regex->error_code() == RE2::NoError) {
filters.push_back([&include_regex](const Symbol& sym) -> bool {
return RE2::PartialMatch(sym.full_name, *include_regex);
});
}
}
std::unique_ptr<RE2> exclude_regex;
if (exclude_regex_str && *exclude_regex_str) {
exclude_regex.reset(new RE2(exclude_regex_str));
if (exclude_regex->error_code() == RE2::NoError) {
filters.push_back([&exclude_regex](const Symbol& sym) -> bool {
return !RE2::PartialMatch(sym.full_name, *exclude_regex);
});
}
}
builder.reset(new TreeBuilder(&info, group_by_component, filters));
builder->Build();
}
......
......@@ -40,10 +40,14 @@ std::string_view DirName(std::string_view path, char sep, char othersep) {
}
} // namespace
TreeBuilder::TreeBuilder(SizeInfo* size_info, bool group_by_component)
TreeBuilder::TreeBuilder(
SizeInfo* size_info,
bool group_by_component,
std::vector<std::function<bool(const Symbol&)>> filters)
: size_info_(size_info),
group_by_component_(group_by_component),
sep_(group_by_component ? kComponentSep : kPathSep) {}
sep_(group_by_component ? kComponentSep : kPathSep),
filters_(filters) {}
TreeBuilder::~TreeBuilder() = default;
......@@ -57,12 +61,14 @@ void TreeBuilder::Build() {
// Group symbols by source path.
std::unordered_map<std::string_view, std::vector<const Symbol*>> symbols;
for (auto& sym : size_info_->raw_symbols) {
if (ShouldIncludeSymbol(sym)) {
std::string_view key = sym.source_path;
if (key == nullptr) {
key = sym.object_path;
}
symbols[key].push_back(&sym);
}
}
for (const auto& pair : symbols) {
AddFileEntry(pair.first, pair.second);
}
......@@ -188,6 +194,15 @@ ContainerType TreeBuilder::ContainerTypeFromChild(
}
}
bool TreeBuilder::ShouldIncludeSymbol(const Symbol& symbol) const {
for (const auto& filter : filters_) {
if (!filter(symbol)) {
return false;
}
}
return true;
}
void TreeBuilder::JoinDexMethodClasses(TreeNode* node) {
const bool is_file_node = node->container_type == ContainerType::kFile;
const bool has_dex =
......
......@@ -17,14 +17,14 @@
namespace caspian {
class TreeBuilder {
public:
TreeBuilder(SizeInfo* size_info, bool group_by_component);
TreeBuilder(SizeInfo* size_info,
bool group_by_component,
std::vector<std::function<bool(const Symbol&)>> filters);
~TreeBuilder();
void Build();
Json::Value Open(const char* path);
private:
std::string_view GetPath(const Symbol* symbol);
void AddFileEntry(const std::string_view source_path,
const std::vector<const Symbol*>& symbols);
......@@ -34,6 +34,8 @@ class TreeBuilder {
ContainerType ContainerTypeFromChild(std::string_view child_id_path) const;
bool ShouldIncludeSymbol(const Symbol& symbol) const;
// Merges dex method symbols into containers based on the class of the dex
// method.
void JoinDexMethodClasses(TreeNode* node);
......@@ -55,6 +57,7 @@ class TreeBuilder {
// Note that we split paths on '/' no matter the value of separator, since
// when grouping by component, paths look like Component>path/to/file.
char sep_;
std::vector<std::function<bool(const Symbol&)>> filters_;
}; // TreeBuilder
} // namespace caspian
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_TREE_BUILDER_H_
......@@ -144,8 +144,8 @@ const fetcher = new DataFetcher('data.size');
let sizeFileLoaded = false;
async function buildTree(
groupBy, filterTest, highlightTest, methodCountMode, onProgress) {
groupBy, includeRegex, excludeRegex, highlightTest, methodCountMode,
onProgress) {
if (!sizeFileLoaded) {
let sizeBuffer = await fetcher.loadSizeBuffer();
let heapBuffer = mallocBuffer(sizeBuffer);
......@@ -188,10 +188,11 @@ async function buildTree(
return message;
}
let BuildTree = Module.cwrap('BuildTree', 'void', ['bool']);
let BuildTree =
Module.cwrap('BuildTree', 'void', ['bool', 'string', 'string']);
let start_time = Date.now();
const groupByComponent = groupBy === "component";
BuildTree(groupByComponent);
BuildTree(groupByComponent, includeRegex, excludeRegex);
console.log('Constructed tree in ' +
(Date.now() - start_time)/1000.0 + ' seconds');
......@@ -216,20 +217,33 @@ function parseOptions(options) {
const filterGeneratedFiles = params.has('generated_filter');
const flagToHighlight = _NAMES_TO_FLAGS[params.get('highlight')];
function filterTest(symbolNode) {
return true;
}
const includeRegex = params.get('include');
const excludeRegex = params.get('exclude');
function highlightTest(symbolNode) {
return false;
}
return {groupBy, filterTest, highlightTest, url, methodCountMode};
return {
groupBy,
includeRegex,
excludeRegex,
highlightTest,
url,
methodCountMode
};
}
const actions = {
/** @param {{input:string|null,options:string}} param0 */
load({input, options}) {
const {groupBy, filterTest, highlightTest, url, methodCountMode} =
parseOptions(options);
const {
groupBy,
includeRegex,
excludeRegex,
highlightTest,
url,
methodCountMode
} = parseOptions(options);
if (input === 'from-url://' && url) {
// Display the data from the `load_url` query parameter
console.info('Displaying data from', url);
......@@ -240,7 +254,8 @@ const actions = {
}
return buildTree(
groupBy, filterTest, highlightTest, methodCountMode, progress => {
groupBy, includeRegex, excludeRegex, highlightTest, methodCountMode,
progress => {
// @ts-ignore
self.postMessage(progress);
});
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment