Commit 9e94595a authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: Caspian: Aggregate padding by section

Also adds diff_test.cc.

Bug: 1011921
Change-Id: I3d678738719531d1c74e620f4fc3605cab08b325
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1918027
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#715841}
parent ab1e737d
...@@ -25,6 +25,7 @@ source_set("caspian-lib") { ...@@ -25,6 +25,7 @@ source_set("caspian-lib") {
"tree_builder.h", "tree_builder.h",
] ]
deps = [ deps = [
"//third_party/re2:re2",
"//third_party/zlib:zlib", "//third_party/zlib:zlib",
"//third_party/zlib/google:compression_utils_portable", "//third_party/zlib/google:compression_utils_portable",
] ]
...@@ -35,6 +36,7 @@ source_set("caspian-lib") { ...@@ -35,6 +36,7 @@ source_set("caspian-lib") {
test("caspian_unittests") { test("caspian_unittests") {
sources = [ sources = [
"diff_test.cc",
"function_signature_test.cc", "function_signature_test.cc",
] ]
deps = [ deps = [
...@@ -52,6 +54,7 @@ executable("cli") { ...@@ -52,6 +54,7 @@ executable("cli") {
deps = [ deps = [
":caspian-lib", ":caspian-lib",
] ]
cflags = [ "-Wall" ]
} }
if (is_wasm) { if (is_wasm) {
......
...@@ -34,6 +34,18 @@ void Diff(const char* before_filename, const char* after_filename) { ...@@ -34,6 +34,18 @@ void Diff(const char* before_filename, const char* after_filename) {
ParseSizeInfoFromFile(after_filename, &after); ParseSizeInfoFromFile(after_filename, &after);
caspian::DeltaSizeInfo diff = Diff(&before, &after); caspian::DeltaSizeInfo diff = Diff(&before, &after);
float pss = 0.0f;
float size = 0.0f;
float padding = 0.0f;
for (const auto& sym : diff.delta_symbols) {
pss += sym.Pss();
size += sym.Size();
padding += sym.Padding();
}
std::cout << "Pss: " << pss << std::endl;
std::cout << "Size: " << size << std::endl;
std::cout << "Padding: " << padding << std::endl;
} }
void Validate(const char* filename) { void Validate(const char* filename) {
...@@ -57,21 +69,25 @@ void Validate(const char* filename) { ...@@ -57,21 +69,25 @@ void Validate(const char* filename) {
std::cout << "Largest number of aliases: " << max_aliases << std::endl; std::cout << "Largest number of aliases: " << max_aliases << std::endl;
} }
int main(int argc, char* argv[]) { void PrintUsageAndExit() {
if (argc < 2) {
std::cerr << "Must have exactly one of:" << std::endl; std::cerr << "Must have exactly one of:" << std::endl;
std::cerr << " validate, diff" << std::endl; std::cerr << " validate, diff" << std::endl;
std::cerr << "Usage: " << std::endl; std::cerr << "Usage:" << std::endl;
std::cerr << " cli validate <size file>" << std::endl; std::cerr << " cli validate <size file>" << std::endl;
std::cerr << " cli diff <before_file> <after_file>" << std::endl; std::cerr << " cli diff <before_file> <after_file>" << std::endl;
exit(1); exit(1);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
PrintUsageAndExit();
} }
if (std::string_view(argv[1]) == "diff") { if (std::string_view(argv[1]) == "diff") {
Diff(argv[2], argv[3]); Diff(argv[2], argv[3]);
} else if (std::string_view(argv[1]) == "validate") { } else if (std::string_view(argv[1]) == "validate") {
Validate(argv[1]); Validate(argv[1]);
} else { } else {
exit(1); PrintUsageAndExit();
} }
return 0; return 0;
} }
...@@ -4,14 +4,18 @@ ...@@ -4,14 +4,18 @@
#include "tools/binary_size/libsupersize/caspian/diff.h" #include "tools/binary_size/libsupersize/caspian/diff.h"
#include <deque>
#include <functional> #include <functional>
#include <iostream> #include <iostream>
#include <list> #include <list>
#include <string>
#include <string_view> #include <string_view>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "third_party/re2/src/re2/re2.h"
namespace { namespace {
struct SymbolMatchIndex { struct SymbolMatchIndex {
SymbolMatchIndex() {} SymbolMatchIndex() {}
...@@ -68,34 +72,12 @@ std::string_view GetIdPath(const caspian::Symbol& sym) { ...@@ -68,34 +72,12 @@ std::string_view GetIdPath(const caspian::Symbol& sym) {
: sym.ObjectPath(); : sym.ObjectPath();
} }
// |full_name| is costly enough to derive that we'd rather avoid it.
// Try to match on |raw_name| if possible.
SymbolMatchIndex SectionAndFullNameAndPathAndSize(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
sym.Pss());
}
SymbolMatchIndex SectionAndFullNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
0.0f);
}
// Allows signature changes (uses |Name()| rather than |FullName()|)
SymbolMatchIndex SectionAndNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.Name(), GetIdPath(sym), 0.0f);
}
// Match on full name, but without path (to account for file moves)
SymbolMatchIndex SectionAndFullName(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
}
int MatchSymbols( int MatchSymbols(
std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func, std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func,
std::vector<caspian::DeltaSymbol>* delta_symbols, std::vector<caspian::DeltaSymbol>* delta_symbols,
std::vector<const caspian::Symbol*>* unmatched_before, std::vector<const caspian::Symbol*>* unmatched_before,
std::vector<const caspian::Symbol*>* unmatched_after) { std::vector<const caspian::Symbol*>* unmatched_after,
// TODO(jaspercb): Accumulate added/dropped padding by section name. std::unordered_map<caspian::SectionId, float>* padding_by_section_name) {
int n_matched_symbols = 0; int n_matched_symbols = 0;
std::unordered_map<SymbolMatchIndex, std::unordered_map<SymbolMatchIndex,
std::list<std::reference_wrapper<const caspian::Symbol*>>> std::list<std::reference_wrapper<const caspian::Symbol*>>>
...@@ -114,10 +96,16 @@ int MatchSymbols( ...@@ -114,10 +96,16 @@ int MatchSymbols(
if (found != before_symbols_by_key.end() && found->second.size()) { if (found != before_symbols_by_key.end() && found->second.size()) {
const caspian::Symbol*& before_sym = found->second.front().get(); const caspian::Symbol*& before_sym = found->second.front().get();
found->second.pop_front(); found->second.pop_front();
// Padding tracked in aggregate, except for padding-only symbols.
if (before_sym->SizeWithoutPadding() != 0) {
(*padding_by_section_name)[before_sym->section_id_] +=
after_sym->PaddingPss() - before_sym->PaddingPss();
}
caspian::DeltaSymbol delta_sym(before_sym, after_sym); caspian::DeltaSymbol delta_sym(before_sym, after_sym);
if (delta_sym.Pss() != 0.0) { if (delta_sym.Pss() != 0.0) {
delta_symbols->push_back(delta_sym); delta_symbols->push_back(delta_sym);
} }
// Null associated pointers in |unmatched_before|, |unmatched_after|.
before_sym = nullptr; before_sym = nullptr;
after_sym = nullptr; after_sym = nullptr;
n_matched_symbols++; n_matched_symbols++;
...@@ -125,10 +113,82 @@ int MatchSymbols( ...@@ -125,10 +113,82 @@ int MatchSymbols(
} }
} }
// Compact out nulled entries.
Erase(*unmatched_before, nullptr); Erase(*unmatched_before, nullptr);
Erase(*unmatched_after, nullptr); Erase(*unmatched_after, nullptr);
return n_matched_symbols; return n_matched_symbols;
} }
class DiffHelper {
public:
DiffHelper() = default;
std::string_view StripNumbers(std::string_view in) {
static const RE2 number_regex("\\d+");
re2::StringPiece piece(in.data(), in.size());
if (RE2::PartialMatch(piece, number_regex)) {
tmp_strings_.push_back(std::string(in));
RE2::GlobalReplace(&tmp_strings_.back(), number_regex, "");
return tmp_strings_.back();
}
return in;
}
std::string_view NormalizeStarSymbols(std::string_view in) {
// Used only for "*" symbols to strip suffixes "abc123" or "abc123 (any)".
static const RE2 normalize_star_symbols("\\s+\\d+(?: \\(.*\\))?$");
re2::StringPiece piece(in.data(), in.size());
if (RE2::PartialMatch(piece, normalize_star_symbols)) {
tmp_strings_.push_back(std::string(in));
RE2::Replace(&tmp_strings_.back(), normalize_star_symbols, "s");
return tmp_strings_.back();
}
return in;
}
using MatchFunc = std::function<SymbolMatchIndex(const caspian::Symbol&)>;
MatchFunc SectionAndFullNameAndPathAndSize() {
return [](const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
sym.Pss());
};
}
MatchFunc SectionAndFullNameAndPath() {
return [this](const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, StripNumbers(sym.full_name_),
GetIdPath(sym), 0.0f);
};
}
// Allows signature changes (uses |Name()| rather than |FullName()|)
MatchFunc SectionAndNameAndPath() {
return [this](const caspian::Symbol& sym) {
std::string_view name = sym.Name();
if (!name.empty() && name[0] == '*') {
name = NormalizeStarSymbols(name);
}
return SymbolMatchIndex(sym.section_id_, name, GetIdPath(sym), 0.0f);
};
}
// Match on full name, but without path (to account for file moves)
MatchFunc SectionAndFullName() {
return [](const caspian::Symbol& sym) {
if (!sym.IsNameUnique()) {
return SymbolMatchIndex();
}
return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
};
}
void ClearStrings() { tmp_strings_.clear(); }
private:
// Holds strings created during number stripping/star symbol normalization.
std::deque<std::string> tmp_strings_;
};
} // namespace } // namespace
namespace caspian { namespace caspian {
...@@ -149,13 +209,19 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) { ...@@ -149,13 +209,19 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
// Attempt several rounds to use increasingly loose matching on unmatched // Attempt several rounds to use increasingly loose matching on unmatched
// symbols. Any symbols still unmatched are tried in the next round. // symbols. Any symbols still unmatched are tried in the next round.
int step = 0; int step = 0;
auto key_funcs = {SectionAndFullNameAndPathAndSize, SectionAndFullNameAndPath, DiffHelper helper;
SectionAndNameAndPath, SectionAndFullName}; std::vector<DiffHelper::MatchFunc>
key_funcs = {helper.SectionAndFullNameAndPathAndSize(),
helper.SectionAndFullNameAndPath(),
helper.SectionAndNameAndPath(), helper.SectionAndFullName()};
std::unordered_map<SectionId, float> padding_by_section_name;
for (const auto& key_function : key_funcs) { for (const auto& key_function : key_funcs) {
int n_matched_symbols = MatchSymbols(key_function, &ret.delta_symbols, int n_matched_symbols =
&unmatched_before, &unmatched_after); MatchSymbols(key_function, &ret.delta_symbols, &unmatched_before,
&unmatched_after, &padding_by_section_name);
std::cout << "Matched " << n_matched_symbols << " symbols in matching pass " std::cout << "Matched " << n_matched_symbols << " symbols in matching pass "
<< ++step << std::endl; << ++step << std::endl;
helper.ClearStrings();
} }
// Add removals or deletions for any unmatched symbols. // Add removals or deletions for any unmatched symbols.
...@@ -165,6 +231,24 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) { ...@@ -165,6 +231,24 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
for (const Symbol* before_sym : unmatched_before) { for (const Symbol* before_sym : unmatched_before) {
ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr)); ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr));
} }
// Create a DeltaSymbol to represent the zeroed out padding of matched
// symbols.
for (const auto& pair : padding_by_section_name) {
SectionId section_id = pair.first;
float padding = pair.second;
if (padding != 0.0f) {
ret.owned_symbols.emplace_back();
Symbol& after_sym = ret.owned_symbols.back();
after_sym.section_id_ = section_id;
after_sym.size_ = padding;
after_sym.padding_ = padding;
after_sym.full_name_ = "Overhead: aggregate padding of diff'ed symbols";
after_sym.template_name_ = after_sym.full_name_;
after_sym.name_ = after_sym.full_name_;
ret.delta_symbols.push_back(DeltaSymbol(nullptr, &after_sym));
}
}
return ret; return ret;
} }
} // namespace caspian } // namespace caspian
This diff is collapsed.
...@@ -358,6 +358,7 @@ void ParseSizeInfo(const char* gzipped, ...@@ -358,6 +358,7 @@ void ParseSizeInfo(const char* gzipped,
new_sym.component_ = info->components[cur_component_indices[i]]; new_sym.component_ = info->components[cur_component_indices[i]];
} }
new_sym.flags_ = flags; new_sym.flags_ = flags;
new_sym.size_info_ = info;
// When we encounter a symbol with an alias count, the next N symbols we // When we encounter a symbol with an alias count, the next N symbols we
// encounter should be placed in the same symbol group. // encounter should be placed in the same symbol group.
......
...@@ -39,7 +39,7 @@ void Symbol::DeriveNames() const { ...@@ -39,7 +39,7 @@ void Symbol::DeriveNames() const {
name_ = full_name_; name_ = full_name_;
} else if (IsDex()) { } else if (IsDex()) {
std::tuple<std::string_view, std::string_view, std::string_view> std::tuple<std::string_view, std::string_view, std::string_view>
parsed_names = ParseJava(full_name_, &size_info->owned_strings); parsed_names = ParseJava(full_name_, &size_info_->owned_strings);
template_name_ = std::get<1>(parsed_names); template_name_ = std::get<1>(parsed_names);
name_ = std::get<2>(parsed_names); name_ = std::get<2>(parsed_names);
} else if (IsStringLiteral()) { } else if (IsStringLiteral()) {
...@@ -47,7 +47,7 @@ void Symbol::DeriveNames() const { ...@@ -47,7 +47,7 @@ void Symbol::DeriveNames() const {
name_ = full_name_; name_ = full_name_;
} else if (IsNative()) { } else if (IsNative()) {
std::tuple<std::string_view, std::string_view, std::string_view> std::tuple<std::string_view, std::string_view, std::string_view>
parsed_names = ParseCpp(full_name_, &size_info->owned_strings); parsed_names = ParseCpp(full_name_, &size_info_->owned_strings);
template_name_ = std::get<1>(parsed_names); template_name_ = std::get<1>(parsed_names);
name_ = std::get<2>(parsed_names); name_ = std::get<2>(parsed_names);
} else { } else {
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_ #ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_ #define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <array>
#include <deque> #include <deque>
#include <map> #include <map>
#include <string> #include <string>
...@@ -42,6 +44,13 @@ enum class SectionId : char { ...@@ -42,6 +44,13 @@ enum class SectionId : char {
kPakTranslations = 'p', kPakTranslations = 'p',
}; };
enum class DiffStatus : uint8_t {
kUnchanged = 0,
kChanged = 1,
kAdded = 2,
kRemoved = 3,
};
class Symbol; class Symbol;
class BaseSymbol { class BaseSymbol {
...@@ -102,12 +111,15 @@ class BaseSymbol { ...@@ -102,12 +111,15 @@ class BaseSymbol {
section_id == SectionId::kText || section_id == SectionId::kRoData); section_id == SectionId::kText || section_id == SectionId::kRoData);
} }
bool IsStringLiteral() const { return FullName().substr(0, 1) == "\""; } bool IsStringLiteral() const {
std::string_view full_name = FullName();
return !full_name.empty() && full_name[0] == '"';
}
bool IsNameUnique() const { bool IsNameUnique() const {
return IsStringLiteral() || IsOverhead() || return !(IsStringLiteral() || IsOverhead() ||
FullName().substr(0, 1) == "*" || (!FullName().empty() && FullName()[0] == '*') ||
(IsNative() && FullName().find('.') != std::string::npos); (IsNative() && FullName().find('.') != std::string_view::npos));
} }
}; };
...@@ -162,7 +174,7 @@ class Symbol : public BaseSymbol { ...@@ -162,7 +174,7 @@ class Symbol : public BaseSymbol {
// The SizeInfo the symbol was constructed from. Primarily used for // The SizeInfo the symbol was constructed from. Primarily used for
// allocating commonly-reused strings in a context where they won't outlive // allocating commonly-reused strings in a context where they won't outlive
// the symbol. // the symbol.
BaseSizeInfo* size_info = nullptr; BaseSizeInfo* size_info_ = nullptr;
private: private:
void DeriveNames() const; void DeriveNames() const;
...@@ -193,6 +205,19 @@ class DeltaSymbol : public BaseSymbol { ...@@ -193,6 +205,19 @@ class DeltaSymbol : public BaseSymbol {
float PssWithoutPadding() const override; float PssWithoutPadding() const override;
float PaddingPss() const override; float PaddingPss() const override;
DiffStatus DiffStatus() const {
if (!before_) {
return DiffStatus::kAdded;
}
if (!after_) {
return DiffStatus::kRemoved;
}
if (Size() || Pss() != 0) {
return DiffStatus::kChanged;
}
return DiffStatus::kUnchanged;
}
private: private:
const Symbol* before_ = nullptr; const Symbol* before_ = nullptr;
const Symbol* after_ = nullptr; const Symbol* after_ = nullptr;
...@@ -234,9 +259,20 @@ struct DeltaSizeInfo : BaseSizeInfo { ...@@ -234,9 +259,20 @@ struct DeltaSizeInfo : BaseSizeInfo {
DeltaSizeInfo(const DeltaSizeInfo&); DeltaSizeInfo(const DeltaSizeInfo&);
DeltaSizeInfo& operator=(const DeltaSizeInfo&); DeltaSizeInfo& operator=(const DeltaSizeInfo&);
using Results = std::array<int32_t, 4>;
Results CountsByDiffStatus() const {
Results ret{0};
for (const DeltaSymbol& sym : delta_symbols) {
ret[static_cast<uint8_t>(sym.DiffStatus())]++;
}
return ret;
}
const SizeInfo* before = nullptr; const SizeInfo* before = nullptr;
const SizeInfo* after = nullptr; const SizeInfo* after = nullptr;
std::vector<DeltaSymbol> delta_symbols; std::vector<DeltaSymbol> delta_symbols;
// Symbols created during diffing, e.g. aggregated padding symbols.
std::deque<Symbol> owned_symbols;
}; };
struct Stat { struct Stat {
......
...@@ -30,13 +30,9 @@ size_t LastSeparatorIndex(std::string_view str, char sep, char othersep) { ...@@ -30,13 +30,9 @@ size_t LastSeparatorIndex(std::string_view str, char sep, char othersep) {
} }
std::string_view DirName(std::string_view path, char sep, char othersep) { std::string_view DirName(std::string_view path, char sep, char othersep) {
auto end = LastSeparatorIndex(path, sep, othersep); size_t sep_idx = LastSeparatorIndex(path, sep, othersep);
if (end != std::string_view::npos) { return sep_idx != std::string_view::npos ? path.substr(0, sep_idx)
std::string_view ret = path; : std::string_view();
ret.remove_suffix(path.size() - end);
return ret;
}
return "";
} }
} // namespace } // namespace
...@@ -61,6 +57,7 @@ void TreeBuilder::Build( ...@@ -61,6 +57,7 @@ void TreeBuilder::Build(
std::vector<std::function<bool(const BaseSymbol&)>> filters) { std::vector<std::function<bool(const BaseSymbol&)>> filters) {
group_by_component_ = group_by_component; group_by_component_ = group_by_component;
filters_ = filters; filters_ = filters;
sep_ = group_by_component ? kComponentSep : kPathSep;
// Initialize tree root. // Initialize tree root.
root_.container_type = ContainerType::kDirectory; root_.container_type = ContainerType::kDirectory;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment