Commit bf9383ab authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: Caspian: Add DeltaSymbol, match Python diffing

Bug: 1011921
Change-Id: Idaa9423573c506eb9f958998519f71599c8db4bf
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1913072
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#715279}
parent 16740b37
...@@ -13,6 +13,8 @@ assert(is_linux || is_wasm) ...@@ -13,6 +13,8 @@ assert(is_linux || is_wasm)
source_set("caspian-lib") { source_set("caspian-lib") {
sources = [ sources = [
"diff.cc",
"diff.h",
"file_format.cc", "file_format.cc",
"file_format.h", "file_format.h",
"function_signature.cc", "function_signature.cc",
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "third_party/jsoncpp/source/include/json/json.h" #include "third_party/jsoncpp/source/include/json/json.h"
#include "third_party/re2/src/re2/re2.h" #include "third_party/re2/src/re2/re2.h"
#include "tools/binary_size/libsupersize/caspian/diff.h"
#include "tools/binary_size/libsupersize/caspian/file_format.h" #include "tools/binary_size/libsupersize/caspian/file_format.h"
#include "tools/binary_size/libsupersize/caspian/model.h" #include "tools/binary_size/libsupersize/caspian/model.h"
#include "tools/binary_size/libsupersize/caspian/tree_builder.h" #include "tools/binary_size/libsupersize/caspian/tree_builder.h"
...@@ -23,7 +24,7 @@ namespace caspian { ...@@ -23,7 +24,7 @@ namespace caspian {
namespace { namespace {
std::unique_ptr<SizeInfo> info; std::unique_ptr<SizeInfo> info;
std::unique_ptr<SizeInfo> before_info; std::unique_ptr<SizeInfo> before_info;
std::unique_ptr<DiffSizeInfo> diff_info; std::unique_ptr<DeltaSizeInfo> diff_info;
std::unique_ptr<TreeBuilder> builder; std::unique_ptr<TreeBuilder> builder;
std::unique_ptr<Json::StreamWriter> writer; std::unique_ptr<Json::StreamWriter> writer;
...@@ -57,18 +58,18 @@ void BuildTree(bool group_by_component, ...@@ -57,18 +58,18 @@ void BuildTree(bool group_by_component,
const char* include_sections, const char* include_sections,
int minimum_size_bytes, int minimum_size_bytes,
int match_flag) { int match_flag) {
std::vector<std::function<bool(const Symbol&)>> filters; std::vector<std::function<bool(const BaseSymbol&)>> filters;
const bool diff_mode = info && before_info; const bool diff_mode = info && before_info;
if (minimum_size_bytes > 0) { if (minimum_size_bytes > 0) {
if (!diff_mode) { if (!diff_mode) {
filters.push_back([minimum_size_bytes](const Symbol& sym) -> bool { filters.push_back([minimum_size_bytes](const BaseSymbol& sym) -> bool {
return sym.pss >= minimum_size_bytes; return sym.Pss() >= minimum_size_bytes;
}); });
} else { } else {
filters.push_back([minimum_size_bytes](const Symbol& sym) -> bool { filters.push_back([minimum_size_bytes](const BaseSymbol& sym) -> bool {
return abs(sym.pss) >= minimum_size_bytes; return abs(sym.Pss()) >= minimum_size_bytes;
}); });
} }
} }
...@@ -77,8 +78,8 @@ void BuildTree(bool group_by_component, ...@@ -77,8 +78,8 @@ void BuildTree(bool group_by_component,
// |match_flag| can be assumed to be a power of two. // |match_flag| can be assumed to be a power of two.
if (match_flag) { if (match_flag) {
std::cout << "Filtering on flag matching " << match_flag << std::endl; std::cout << "Filtering on flag matching " << match_flag << std::endl;
filters.push_back([match_flag](const Symbol& sym) -> bool { filters.push_back([match_flag](const BaseSymbol& sym) -> bool {
return match_flag & sym.flags; return match_flag & sym.Flags();
}); });
} }
...@@ -88,8 +89,8 @@ void BuildTree(bool group_by_component, ...@@ -88,8 +89,8 @@ void BuildTree(bool group_by_component,
for (const char* c = include_sections; *c; c++) { for (const char* c = include_sections; *c; c++) {
include_sections_map[static_cast<uint8_t>(*c)] = true; include_sections_map[static_cast<uint8_t>(*c)] = true;
} }
filters.push_back([&include_sections_map](const Symbol& sym) -> bool { filters.push_back([&include_sections_map](const BaseSymbol& sym) -> bool {
return include_sections_map[static_cast<uint8_t>(sym.sectionId)]; return include_sections_map[static_cast<uint8_t>(sym.Section())];
}); });
} }
...@@ -97,8 +98,8 @@ void BuildTree(bool group_by_component, ...@@ -97,8 +98,8 @@ void BuildTree(bool group_by_component,
if (include_regex_str && *include_regex_str) { if (include_regex_str && *include_regex_str) {
include_regex.reset(new RE2(include_regex_str)); include_regex.reset(new RE2(include_regex_str));
if (include_regex->error_code() == RE2::NoError) { if (include_regex->error_code() == RE2::NoError) {
filters.push_back([&include_regex](const Symbol& sym) -> bool { filters.push_back([&include_regex](const BaseSymbol& sym) -> bool {
re2::StringPiece piece(sym.full_name.data(), sym.full_name.size()); re2::StringPiece piece(sym.FullName().data(), sym.FullName().size());
return RE2::PartialMatch(piece, *include_regex); return RE2::PartialMatch(piece, *include_regex);
}); });
} }
...@@ -108,26 +109,26 @@ void BuildTree(bool group_by_component, ...@@ -108,26 +109,26 @@ void BuildTree(bool group_by_component,
if (exclude_regex_str && *exclude_regex_str) { if (exclude_regex_str && *exclude_regex_str) {
exclude_regex.reset(new RE2(exclude_regex_str)); exclude_regex.reset(new RE2(exclude_regex_str));
if (exclude_regex->error_code() == RE2::NoError) { if (exclude_regex->error_code() == RE2::NoError) {
filters.push_back([&exclude_regex](const Symbol& sym) -> bool { filters.push_back([&exclude_regex](const BaseSymbol& sym) -> bool {
re2::StringPiece piece(sym.full_name.data(), sym.full_name.size()); re2::StringPiece piece(sym.FullName().data(), sym.FullName().size());
return !RE2::PartialMatch(piece, *exclude_regex); return !RE2::PartialMatch(piece, *exclude_regex);
}); });
} }
} }
// BuildTree() is called every time a new filter is applied in the HTML // BuildTree() is called every time a new filter is applied in the HTML
// viewer, but if we already have a DiffSizeInfo we can skip regenerating it // viewer, but if we already have a DeltaSizeInfo we can skip regenerating it
// and let the TreeBuilder filter the symbols we care about. // and let the TreeBuilder filter the symbols we care about.
if (diff_mode && !diff_info) { if (diff_mode && !diff_info) {
diff_info.reset(new DiffSizeInfo(before_info.get(), info.get())); diff_info.reset(new DeltaSizeInfo(Diff(before_info.get(), info.get())));
} }
BaseSizeInfo* rendered_info = info.get();
if (diff_mode) { if (diff_mode) {
rendered_info = diff_info.get(); builder.reset(new TreeBuilder(diff_info.get()));
} else {
builder.reset(new TreeBuilder(info.get()));
} }
builder.reset(new TreeBuilder(rendered_info, group_by_component, filters)); builder->Build(group_by_component, filters);
builder->Build();
} }
const char* Open(const char* path) { const char* Open(const char* path) {
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include "tools/binary_size/libsupersize/caspian/diff.h"
#include "tools/binary_size/libsupersize/caspian/file_format.h" #include "tools/binary_size/libsupersize/caspian/file_format.h"
#include "tools/binary_size/libsupersize/caspian/model.h" #include "tools/binary_size/libsupersize/caspian/model.h"
...@@ -32,7 +33,7 @@ void Diff(const char* before_filename, const char* after_filename) { ...@@ -32,7 +33,7 @@ void Diff(const char* before_filename, const char* after_filename) {
caspian::SizeInfo after; caspian::SizeInfo after;
ParseSizeInfoFromFile(after_filename, &after); ParseSizeInfoFromFile(after_filename, &after);
caspian::DiffSizeInfo diff(&before, &after); caspian::DeltaSizeInfo diff = Diff(&before, &after);
} }
void Validate(const char* filename) { void Validate(const char* filename) {
...@@ -41,11 +42,11 @@ void Validate(const char* filename) { ...@@ -41,11 +42,11 @@ void Validate(const char* filename) {
size_t max_aliases = 0; size_t max_aliases = 0;
for (auto& s : info.raw_symbols) { for (auto& s : info.raw_symbols) {
if (s.aliases) { if (s.aliases_) {
max_aliases = std::max(max_aliases, s.aliases->size()); max_aliases = std::max(max_aliases, s.aliases_->size());
// What a wonderful O(n^2) loop // What a wonderful O(n^2) loop
for (auto* ss : *s.aliases) { for (auto* ss : *s.aliases_) {
if (ss->aliases != s.aliases) { if (ss->aliases_ != s.aliases_) {
std::cerr << "Not all symbols in alias group had same alias count" std::cerr << "Not all symbols in alias group had same alias count"
<< std::endl; << std::endl;
exit(1); exit(1);
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "tools/binary_size/libsupersize/caspian/diff.h"
#include <functional>
#include <iostream>
#include <list>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
namespace {
struct SymbolMatchIndex {
SymbolMatchIndex() {}
SymbolMatchIndex(caspian::SectionId id,
std::string_view name,
std::string_view path,
int size_without_padding)
: nonempty(true),
id(id),
name(name),
path(path),
size_without_padding(size_without_padding) {
this->name = name;
}
operator bool() const { return nonempty; }
bool operator==(const SymbolMatchIndex& other) const {
return id == other.id && name == other.name && path == other.path &&
size_without_padding == other.size_without_padding;
}
bool nonempty = false;
caspian::SectionId id;
std::string_view name;
std::string_view path;
int size_without_padding;
};
} // namespace
namespace std {
template <>
struct hash<SymbolMatchIndex> {
static constexpr size_t kPrime1 = 105929;
static constexpr size_t kPrime2 = 8543;
size_t operator()(const SymbolMatchIndex& k) const {
return ((kPrime1 * static_cast<size_t>(k.id)) ^
hash<string_view>()(k.name) ^ hash<string_view>()(k.path) ^
(kPrime2 * k.size_without_padding));
}
};
} // namespace std
namespace {
// Copied from /base/stl_util.h
template <class T, class Allocator, class Value>
void Erase(std::vector<T, Allocator>& container, const Value& value) {
container.erase(std::remove(container.begin(), container.end(), value),
container.end());
}
std::string_view GetIdPath(const caspian::Symbol& sym) {
return (sym.SourcePath() && *sym.SourcePath()) ? sym.SourcePath()
: sym.ObjectPath();
}
// |full_name| is costly enough to derive that we'd rather avoid it.
// Try to match on |raw_name| if possible.
SymbolMatchIndex SectionAndFullNameAndPathAndSize(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
sym.Pss());
}
SymbolMatchIndex SectionAndFullNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
0.0f);
}
// Allows signature changes (uses |Name()| rather than |FullName()|)
SymbolMatchIndex SectionAndNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.Name(), GetIdPath(sym), 0.0f);
}
// Match on full name, but without path (to account for file moves)
SymbolMatchIndex SectionAndFullName(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
}
int MatchSymbols(
std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func,
std::vector<caspian::DeltaSymbol>* delta_symbols,
std::vector<const caspian::Symbol*>* unmatched_before,
std::vector<const caspian::Symbol*>* unmatched_after) {
// TODO(jaspercb): Accumulate added/dropped padding by section name.
int n_matched_symbols = 0;
std::unordered_map<SymbolMatchIndex,
std::list<std::reference_wrapper<const caspian::Symbol*>>>
before_symbols_by_key;
for (const caspian::Symbol*& before_sym : *unmatched_before) {
SymbolMatchIndex key = key_func(*before_sym);
if (key) {
before_symbols_by_key[key].emplace_back(before_sym);
}
}
for (const caspian::Symbol*& after_sym : *unmatched_after) {
SymbolMatchIndex key = key_func(*after_sym);
if (key) {
const auto& found = before_symbols_by_key.find(key);
if (found != before_symbols_by_key.end() && found->second.size()) {
const caspian::Symbol*& before_sym = found->second.front().get();
found->second.pop_front();
caspian::DeltaSymbol delta_sym(before_sym, after_sym);
if (delta_sym.Pss() != 0.0) {
delta_symbols->push_back(delta_sym);
}
before_sym = nullptr;
after_sym = nullptr;
n_matched_symbols++;
}
}
}
Erase(*unmatched_before, nullptr);
Erase(*unmatched_after, nullptr);
return n_matched_symbols;
}
} // namespace
namespace caspian {
DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
DeltaSizeInfo ret(before, after);
std::vector<const Symbol*> unmatched_before;
for (const Symbol& sym : before->raw_symbols) {
unmatched_before.push_back(&sym);
}
std::vector<const Symbol*> unmatched_after;
for (const Symbol& sym : after->raw_symbols) {
unmatched_after.push_back(&sym);
}
// Attempt several rounds to use increasingly loose matching on unmatched
// symbols. Any symbols still unmatched are tried in the next round.
int step = 0;
auto key_funcs = {SectionAndFullNameAndPathAndSize, SectionAndFullNameAndPath,
SectionAndNameAndPath, SectionAndFullName};
for (const auto& key_function : key_funcs) {
int n_matched_symbols = MatchSymbols(key_function, &ret.delta_symbols,
&unmatched_before, &unmatched_after);
std::cout << "Matched " << n_matched_symbols << " symbols in matching pass "
<< ++step << std::endl;
}
// Add removals or deletions for any unmatched symbols.
for (const Symbol* after_sym : unmatched_after) {
ret.delta_symbols.push_back(DeltaSymbol(nullptr, after_sym));
}
for (const Symbol* before_sym : unmatched_before) {
ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr));
}
return ret;
}
} // namespace caspian
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_DIFF_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_DIFF_H_
#include "tools/binary_size/libsupersize/caspian/model.h"
namespace caspian {
DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after);
}
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_DIFF_H_
...@@ -163,27 +163,27 @@ void CalculatePadding(std::vector<Symbol>* raw_symbols) { ...@@ -163,27 +163,27 @@ void CalculatePadding(std::vector<Symbol>* raw_symbols) {
Symbol& symbol = (*raw_symbols)[i]; Symbol& symbol = (*raw_symbols)[i];
if (symbol.IsOverhead()) { if (symbol.IsOverhead()) {
symbol.padding = symbol.size; symbol.padding_ = symbol.size_;
} }
if (prev_symbol.section_name != symbol.section_name) { if (prev_symbol.SectionName() != symbol.SectionName()) {
if (seen_sections.count(symbol.section_name)) { if (seen_sections.count(symbol.section_name_)) {
std::cerr << "Input symbols must be sorted by section, then address: " std::cerr << "Input symbols must be sorted by section, then address: "
<< prev_symbol << ", " << symbol << std::endl; << prev_symbol << ", " << symbol << std::endl;
exit(1); exit(1);
} }
seen_sections.insert(symbol.section_name); seen_sections.insert(symbol.SectionName());
continue; continue;
} }
if (symbol.address <= 0 || prev_symbol.address <= 0 || !symbol.IsNative() || if (symbol.Address() <= 0 || prev_symbol.Address() <= 0 ||
!prev_symbol.IsNative()) { !symbol.IsNative() || !prev_symbol.IsNative()) {
continue; continue;
} }
if (symbol.address == prev_symbol.address) { if (symbol.Address() == prev_symbol.Address()) {
if (symbol.aliases && symbol.aliases == prev_symbol.aliases) { if (symbol.aliases_ && symbol.aliases_ == prev_symbol.aliases_) {
symbol.padding = prev_symbol.padding; symbol.padding_ = prev_symbol.padding_;
symbol.size = prev_symbol.size; symbol.size_ = prev_symbol.size_;
continue; continue;
} }
if (prev_symbol.SizeWithoutPadding() != 0) { if (prev_symbol.SizeWithoutPadding() != 0) {
...@@ -194,10 +194,10 @@ void CalculatePadding(std::vector<Symbol>* raw_symbols) { ...@@ -194,10 +194,10 @@ void CalculatePadding(std::vector<Symbol>* raw_symbols) {
} }
} }
int32_t padding = symbol.address - prev_symbol.EndAddress(); int32_t padding = symbol.Address() - prev_symbol.EndAddress();
symbol.padding = padding; symbol.padding_ = padding;
symbol.size += padding; symbol.size_ += padding;
if (symbol.size < 0) { if (symbol.size_ < 0) {
std::cerr << "Symbol has negative size (likely not sorted properly):" std::cerr << "Symbol has negative size (likely not sorted properly):"
<< symbol << std::endl; << symbol << std::endl;
std::cerr << "prev symbol: " << prev_symbol << std::endl; std::cerr << "prev symbol: " << prev_symbol << std::endl;
...@@ -326,7 +326,7 @@ void ParseSizeInfo(const char* gzipped, ...@@ -326,7 +326,7 @@ void ParseSizeInfo(const char* gzipped,
int32_t num_aliases = 0; int32_t num_aliases = 0;
char* line = strsep(&rest, "\n"); char* line = strsep(&rest, "\n");
if (*line) { if (*line) {
new_sym.full_name = strsep(&line, "\t"); new_sym.full_name_ = strsep(&line, "\t");
char* first = nullptr; char* first = nullptr;
char* second = nullptr; char* second = nullptr;
if (line) { if (line) {
...@@ -348,16 +348,16 @@ void ParseSizeInfo(const char* gzipped, ...@@ -348,16 +348,16 @@ void ParseSizeInfo(const char* gzipped,
} }
} }
} }
new_sym.sectionId = cur_section_id; new_sym.section_id_ = cur_section_id;
new_sym.address = cur_addresses[i]; new_sym.address_ = cur_addresses[i];
new_sym.size = cur_sizes[i]; new_sym.size_ = cur_sizes[i];
new_sym.section_name = cur_section_name; new_sym.section_name_ = cur_section_name;
new_sym.object_path = info->object_paths[cur_path_indices[i]]; new_sym.object_path_ = info->object_paths[cur_path_indices[i]];
new_sym.source_path = info->source_paths[cur_path_indices[i]]; new_sym.source_path_ = info->source_paths[cur_path_indices[i]];
if (has_components) { if (has_components) {
new_sym.component = info->components[cur_component_indices[i]]; new_sym.component_ = info->components[cur_component_indices[i]];
} }
new_sym.flags = flags; new_sym.flags_ = flags;
// When we encounter a symbol with an alias count, the next N symbols we // When we encounter a symbol with an alias count, the next N symbols we
// encounter should be placed in the same symbol group. // encounter should be placed in the same symbol group.
...@@ -367,8 +367,8 @@ void ParseSizeInfo(const char* gzipped, ...@@ -367,8 +367,8 @@ void ParseSizeInfo(const char* gzipped,
alias_counter = num_aliases; alias_counter = num_aliases;
} }
if (alias_counter > 0) { if (alias_counter > 0) {
new_sym.aliases = &info->alias_groups.back(); new_sym.aliases_ = &info->alias_groups.back();
new_sym.aliases->push_back(&new_sym); new_sym.aliases_->push_back(&new_sym);
alias_counter--; alias_counter--;
} }
} }
...@@ -376,11 +376,6 @@ void ParseSizeInfo(const char* gzipped, ...@@ -376,11 +376,6 @@ void ParseSizeInfo(const char* gzipped,
CalculatePadding(&info->raw_symbols); CalculatePadding(&info->raw_symbols);
for (caspian::Symbol& sym : info->raw_symbols) {
size_t alias_count = sym.aliases ? sym.aliases->size() : 1;
sym.pss = static_cast<float>(sym.size) / alias_count;
}
// If there are unparsed non-empty lines, something's gone wrong. // If there are unparsed non-empty lines, something's gone wrong.
CheckNoNonEmptyLinesRemain(rest); CheckNoNonEmptyLinesRemain(rest);
......
...@@ -42,59 +42,122 @@ enum class SectionId : char { ...@@ -42,59 +42,122 @@ enum class SectionId : char {
kPakTranslations = 'p', kPakTranslations = 'p',
}; };
struct BaseSizeInfo; class Symbol;
struct Symbol { class BaseSymbol {
Symbol(); public:
Symbol(const Symbol& other); virtual ~BaseSymbol();
Symbol& operator=(const Symbol& other);
static Symbol DiffSymbolFrom(const Symbol* before, const Symbol* after); virtual int32_t Address() const = 0;
virtual int32_t Size() const = 0;
virtual int32_t Flags() const = 0;
virtual int32_t Padding() const = 0;
virtual std::string_view FullName() const = 0;
// Derived from |full_name|. Generated lazily and cached.
virtual std::string_view TemplateName() const = 0;
virtual std::string_view Name() const = 0;
virtual const std::vector<Symbol*>* Aliases() const = 0;
virtual SectionId Section() const = 0;
virtual const char* ObjectPath() const = 0;
virtual const char* SourcePath() const = 0;
virtual const char* SectionName() const = 0;
virtual const char* Component() const = 0;
virtual float Pss() const = 0;
virtual float PssWithoutPadding() const = 0;
virtual float PaddingPss() const = 0;
int32_t SizeWithoutPadding() const { return Size() - Padding(); }
bool IsOverhead() const { return full_name.substr(0, 10) == "Overhead: "; } int32_t EndAddress() const { return Address() + SizeWithoutPadding(); }
bool IsBss() const { return sectionId == SectionId::kBss; } int32_t NumAliases() const {
const std::vector<Symbol*>* aliases = Aliases();
return aliases ? aliases->size() : 1;
}
bool IsOverhead() const { return FullName().substr(0, 10) == "Overhead: "; }
bool IsBss() const { return Section() == SectionId::kBss; }
bool IsDex() const { bool IsDex() const {
return sectionId == SectionId::kDex || sectionId == SectionId::kDexMethod; SectionId section_id = Section();
return section_id == SectionId::kDex || section_id == SectionId::kDexMethod;
} }
bool IsOther() const { return sectionId == SectionId::kOther; } bool IsOther() const { return Section() == SectionId::kOther; }
bool IsPak() const { bool IsPak() const {
return sectionId == SectionId::kPakNontranslated || SectionId section_id = Section();
sectionId == SectionId::kPakTranslations; return section_id == SectionId::kPakNontranslated ||
section_id == SectionId::kPakTranslations;
} }
bool IsNative() const { bool IsNative() const {
return (sectionId == SectionId::kBss || sectionId == SectionId::kData || SectionId section_id = Section();
sectionId == SectionId::kDataRelRo || return (section_id == SectionId::kBss || section_id == SectionId::kData ||
sectionId == SectionId::kText || sectionId == SectionId::kRoData); section_id == SectionId::kDataRelRo ||
section_id == SectionId::kText || section_id == SectionId::kRoData);
} }
bool IsStringLiteral() const { return full_name.substr(0, 1) == "\""; } bool IsStringLiteral() const { return FullName().substr(0, 1) == "\""; }
int32_t SizeWithoutPadding() const { return size - padding; } bool IsNameUnique() const {
return IsStringLiteral() || IsOverhead() ||
FullName().substr(0, 1) == "*" ||
(IsNative() && FullName().find('.') != std::string::npos);
}
};
int32_t EndAddress() const { return address + SizeWithoutPadding(); } struct BaseSizeInfo;
class Symbol;
// Derived from |full_name|. Generated lazily and cached. class Symbol : public BaseSymbol {
void DeriveNames() const; public:
std::string_view TemplateName() const; Symbol();
std::string_view Name() const; ~Symbol() override;
Symbol(const Symbol& other);
int32_t address = 0; int32_t Address() const override;
int32_t size = 0; int32_t Size() const override;
int32_t flags = 0; int32_t Flags() const override;
int32_t padding = 0; int32_t Padding() const override;
float pss = 0.0f;
SectionId sectionId = SectionId::kNone; std::string_view FullName() const override;
std::string_view full_name; // Derived from |full_name|. Generated lazily and cached.
std::string_view TemplateName() const override;
std::string_view Name() const override;
const std::vector<Symbol*>* Aliases() const override;
SectionId Section() const override;
const char* ObjectPath() const override;
const char* SourcePath() const override;
const char* SectionName() const override;
const char* Component() const override;
float Pss() const override;
float PssWithoutPadding() const override;
float PaddingPss() const override;
int32_t address_ = 0;
int32_t size_ = 0;
int32_t flags_ = 0;
int32_t padding_ = 0;
SectionId section_id_ = SectionId::kNone;
std::string_view full_name_;
// Derived lazily
mutable std::string_view template_name_;
mutable std::string_view name_;
// Pointers into SizeInfo->raw_decompressed; // Pointers into SizeInfo->raw_decompressed;
const char* section_name = nullptr; const char* section_name_ = nullptr;
const char* object_path = nullptr; const char* object_path_ = nullptr;
const char* source_path = nullptr; const char* source_path_ = nullptr;
const char* component = nullptr; const char* component_ = nullptr;
std::vector<Symbol*>* aliases = nullptr; std::vector<Symbol*>* aliases_ = nullptr;
// The SizeInfo the symbol was constructed from. Primarily used for // The SizeInfo the symbol was constructed from. Primarily used for
// allocating commonly-reused strings in a context where they won't outlive // allocating commonly-reused strings in a context where they won't outlive
...@@ -102,26 +165,55 @@ struct Symbol { ...@@ -102,26 +165,55 @@ struct Symbol {
BaseSizeInfo* size_info = nullptr; BaseSizeInfo* size_info = nullptr;
private: private:
mutable std::string_view template_name; void DeriveNames() const;
mutable std::string_view name; };
class DeltaSymbol : public BaseSymbol {
public:
DeltaSymbol(const Symbol* before, const Symbol* after);
~DeltaSymbol() override;
int32_t Address() const override;
int32_t Size() const override;
int32_t Flags() const override;
int32_t Padding() const override;
std::string_view FullName() const override;
// Derived from |full_name|. Generated lazily and cached.
std::string_view TemplateName() const override;
std::string_view Name() const override;
const std::vector<Symbol*>* Aliases() const override;
SectionId Section() const override;
const char* ObjectPath() const override;
const char* SourcePath() const override;
const char* SectionName() const override;
const char* Component() const override;
float Pss() const override;
float PssWithoutPadding() const override;
float PaddingPss() const override;
private:
const Symbol* before_ = nullptr;
const Symbol* after_ = nullptr;
}; };
std::ostream& operator<<(std::ostream& os, const Symbol& sym); std::ostream& operator<<(std::ostream& os, const Symbol& sym);
struct BaseSizeInfo { struct BaseSizeInfo {
BaseSizeInfo(); BaseSizeInfo();
~BaseSizeInfo(); BaseSizeInfo(const BaseSizeInfo&);
std::vector<caspian::Symbol> raw_symbols; virtual ~BaseSizeInfo();
Json::Value metadata; Json::Value metadata;
std::deque<std::string> owned_strings; std::deque<std::string> owned_strings;
SectionId ShortSectionName(const char* section_name);
}; };
struct SizeInfo : BaseSizeInfo { struct SizeInfo : BaseSizeInfo {
SizeInfo(); SizeInfo();
~SizeInfo(); ~SizeInfo() override;
SizeInfo(const SizeInfo& other) = delete; SizeInfo(const SizeInfo& other) = delete;
SizeInfo& operator=(const SizeInfo& other) = delete; SizeInfo& operator=(const SizeInfo& other) = delete;
SectionId ShortSectionName(const char* section_name);
// Entries in |raw_symbols| hold pointers to this data. // Entries in |raw_symbols| hold pointers to this data.
std::vector<const char*> object_paths; std::vector<const char*> object_paths;
...@@ -130,18 +222,21 @@ struct SizeInfo : BaseSizeInfo { ...@@ -130,18 +222,21 @@ struct SizeInfo : BaseSizeInfo {
std::vector<const char*> section_names; std::vector<const char*> section_names;
std::vector<char> raw_decompressed; std::vector<char> raw_decompressed;
std::vector<Symbol> raw_symbols;
// A container for each symbol group. // A container for each symbol group.
std::deque<std::vector<Symbol*>> alias_groups; std::deque<std::vector<Symbol*>> alias_groups;
}; };
struct DiffSizeInfo : BaseSizeInfo { struct DeltaSizeInfo : BaseSizeInfo {
DiffSizeInfo(SizeInfo* before, SizeInfo* after); DeltaSizeInfo(const SizeInfo* before, const SizeInfo* after);
~DiffSizeInfo(); ~DeltaSizeInfo() override;
DiffSizeInfo(const DiffSizeInfo&) = delete; DeltaSizeInfo(const DeltaSizeInfo&);
DiffSizeInfo& operator=(const DiffSizeInfo&) = delete; DeltaSizeInfo& operator=(const DeltaSizeInfo&);
SizeInfo* before = nullptr; const SizeInfo* before = nullptr;
SizeInfo* after = nullptr; const SizeInfo* after = nullptr;
std::vector<DeltaSymbol> delta_symbols;
}; };
struct Stat { struct Stat {
...@@ -182,7 +277,7 @@ struct TreeNode { ...@@ -182,7 +277,7 @@ struct TreeNode {
std::vector<TreeNode*> children; std::vector<TreeNode*> children;
TreeNode* parent = nullptr; TreeNode* parent = nullptr;
const Symbol* symbol = nullptr; const BaseSymbol* symbol = nullptr;
}; };
} // namespace caspian } // namespace caspian
......
...@@ -40,36 +40,46 @@ std::string_view DirName(std::string_view path, char sep, char othersep) { ...@@ -40,36 +40,46 @@ std::string_view DirName(std::string_view path, char sep, char othersep) {
} }
} // namespace } // namespace
TreeBuilder::TreeBuilder( TreeBuilder::TreeBuilder(SizeInfo* size_info) {
BaseSizeInfo* size_info, symbols_.reserve(size_info->raw_symbols.size());
bool group_by_component, for (const Symbol& sym : size_info->raw_symbols) {
std::vector<std::function<bool(const Symbol&)>> filters) symbols_.push_back(&sym);
: size_info_(size_info), }
group_by_component_(group_by_component), }
sep_(group_by_component ? kComponentSep : kPathSep),
filters_(filters) {} TreeBuilder::TreeBuilder(DeltaSizeInfo* size_info) {
symbols_.reserve(size_info->delta_symbols.size());
for (const DeltaSymbol& sym : size_info->delta_symbols) {
symbols_.push_back(&sym);
}
}
TreeBuilder::~TreeBuilder() = default; TreeBuilder::~TreeBuilder() = default;
void TreeBuilder::Build() { void TreeBuilder::Build(
bool group_by_component,
std::vector<std::function<bool(const BaseSymbol&)>> filters) {
group_by_component_ = group_by_component;
filters_ = filters;
// Initialize tree root. // Initialize tree root.
root_.container_type = ContainerType::kDirectory; root_.container_type = ContainerType::kDirectory;
owned_strings_.emplace_back(1, sep_); owned_strings_.emplace_back(1, sep_);
root_.id_path = owned_strings_.back(); root_.id_path = owned_strings_.back();
_parents[""] = &root_; _parents[""] = &root_;
// Group symbols by source path. std::unordered_map<std::string_view, std::vector<const BaseSymbol*>>
std::unordered_map<std::string_view, std::vector<const Symbol*>> symbols; symbols_by_source_path;
for (auto& sym : size_info_->raw_symbols) { for (const BaseSymbol* sym : symbols_) {
if (ShouldIncludeSymbol(sym)) { if (ShouldIncludeSymbol(*sym)) {
std::string_view key = sym.source_path; std::string_view key = sym->SourcePath();
if (key == nullptr) { if (key == nullptr) {
key = sym.object_path; key = sym->ObjectPath();
} }
symbols[key].push_back(&sym); symbols_by_source_path[key].push_back(sym);
} }
} }
for (const auto& pair : symbols) { for (const auto& pair : symbols_by_source_path) {
AddFileEntry(pair.first, pair.second); AddFileEntry(pair.first, pair.second);
} }
} }
...@@ -91,7 +101,7 @@ Json::Value TreeBuilder::Open(const char* path) { ...@@ -91,7 +101,7 @@ Json::Value TreeBuilder::Open(const char* path) {
} }
void TreeBuilder::AddFileEntry(const std::string_view source_path, void TreeBuilder::AddFileEntry(const std::string_view source_path,
const std::vector<const Symbol*>& symbols) { const std::vector<const BaseSymbol*>& symbols) {
// Creates a single file node with a child for each symbol in that file. // Creates a single file node with a child for each symbol in that file.
TreeNode* file_node = new TreeNode(); TreeNode* file_node = new TreeNode();
file_node->container_type = ContainerType::kFile; file_node->container_type = ContainerType::kFile;
...@@ -103,8 +113,8 @@ void TreeBuilder::AddFileEntry(const std::string_view source_path, ...@@ -103,8 +113,8 @@ void TreeBuilder::AddFileEntry(const std::string_view source_path,
} }
if (group_by_component_) { if (group_by_component_) {
std::string component; std::string component;
if (symbols[0]->component && *symbols[0]->component) { if (symbols[0]->Component() && *symbols[0]->Component()) {
component = symbols[0]->component; component = symbols[0]->Component();
} else { } else {
component = kNoComponent; component = kNoComponent;
} }
...@@ -119,12 +129,12 @@ void TreeBuilder::AddFileEntry(const std::string_view source_path, ...@@ -119,12 +129,12 @@ void TreeBuilder::AddFileEntry(const std::string_view source_path,
// TODO: Initialize file type, source path, component // TODO: Initialize file type, source path, component
// Create symbol nodes. // Create symbol nodes.
for (const Symbol* sym : symbols) { for (const BaseSymbol* sym : symbols) {
TreeNode* symbol_node = new TreeNode(); TreeNode* symbol_node = new TreeNode();
symbol_node->container_type = ContainerType::kSymbol; symbol_node->container_type = ContainerType::kSymbol;
symbol_node->id_path = sym->full_name; symbol_node->id_path = sym->FullName();
symbol_node->size = sym->pss; symbol_node->size = sym->Pss();
symbol_node->node_stats = NodeStats(sym->sectionId, 1, symbol_node->size); symbol_node->node_stats = NodeStats(sym->Section(), 1, symbol_node->size);
symbol_node->symbol = sym; symbol_node->symbol = sym;
AttachToParent(symbol_node, file_node); AttachToParent(symbol_node, file_node);
} }
...@@ -194,7 +204,7 @@ ContainerType TreeBuilder::ContainerTypeFromChild( ...@@ -194,7 +204,7 @@ ContainerType TreeBuilder::ContainerTypeFromChild(
} }
} }
bool TreeBuilder::ShouldIncludeSymbol(const Symbol& symbol) const { bool TreeBuilder::ShouldIncludeSymbol(const BaseSymbol& symbol) const {
for (const auto& filter : filters_) { for (const auto& filter : filters_) {
if (!filter(symbol)) { if (!filter(symbol)) {
return false; return false;
......
...@@ -17,16 +17,16 @@ ...@@ -17,16 +17,16 @@
namespace caspian { namespace caspian {
class TreeBuilder { class TreeBuilder {
public: public:
TreeBuilder(BaseSizeInfo* size_info, TreeBuilder(SizeInfo* size_info);
bool group_by_component, TreeBuilder(DeltaSizeInfo* size_info);
std::vector<std::function<bool(const Symbol&)>> filters);
~TreeBuilder(); ~TreeBuilder();
void Build(); void Build(bool group_by_component,
std::vector<std::function<bool(const BaseSymbol&)>> filters);
Json::Value Open(const char* path); Json::Value Open(const char* path);
private: private:
void AddFileEntry(const std::string_view source_path, void AddFileEntry(const std::string_view source_path,
const std::vector<const Symbol*>& symbols); const std::vector<const BaseSymbol*>& symbols);
TreeNode* GetOrMakeParentNode(TreeNode* child_node); TreeNode* GetOrMakeParentNode(TreeNode* child_node);
...@@ -34,7 +34,7 @@ class TreeBuilder { ...@@ -34,7 +34,7 @@ class TreeBuilder {
ContainerType ContainerTypeFromChild(std::string_view child_id_path) const; ContainerType ContainerTypeFromChild(std::string_view child_id_path) const;
bool ShouldIncludeSymbol(const Symbol& symbol) const; bool ShouldIncludeSymbol(const BaseSymbol& symbol) const;
// Merges dex method symbols into containers based on the class of the dex // Merges dex method symbols into containers based on the class of the dex
// method. // method.
...@@ -45,7 +45,6 @@ class TreeBuilder { ...@@ -45,7 +45,6 @@ class TreeBuilder {
// node. // node.
std::unordered_map<std::string_view, TreeNode*> _parents; std::unordered_map<std::string_view, TreeNode*> _parents;
BaseSizeInfo* size_info_ = nullptr;
// Contained TreeNode hold lightweight string_views to fields in SizeInfo. // Contained TreeNode hold lightweight string_views to fields in SizeInfo.
// If grouping by component, this isn't possible: TreeNode id_paths are not // If grouping by component, this isn't possible: TreeNode id_paths are not
// substrings of SizeInfo-owned strings. In that case, the strings are stored // substrings of SizeInfo-owned strings. In that case, the strings are stored
...@@ -57,7 +56,8 @@ class TreeBuilder { ...@@ -57,7 +56,8 @@ class TreeBuilder {
// Note that we split paths on '/' no matter the value of separator, since // Note that we split paths on '/' no matter the value of separator, since
// when grouping by component, paths look like Component>path/to/file. // when grouping by component, paths look like Component>path/to/file.
char sep_; char sep_;
std::vector<std::function<bool(const Symbol&)>> filters_; std::vector<std::function<bool(const BaseSymbol&)>> filters_;
std::vector<const BaseSymbol*> symbols_;
}; // TreeBuilder }; // TreeBuilder
} // namespace caspian } // namespace caspian
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_TREE_BUILDER_H_ #endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_TREE_BUILDER_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment