Commit 9e94595a authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: Caspian: Aggregate padding by section

Also adds diff_test.cc.

Bug: 1011921
Change-Id: I3d678738719531d1c74e620f4fc3605cab08b325
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1918027
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#715841}
parent ab1e737d
......@@ -25,6 +25,7 @@ source_set("caspian-lib") {
"tree_builder.h",
]
deps = [
"//third_party/re2:re2",
"//third_party/zlib:zlib",
"//third_party/zlib/google:compression_utils_portable",
]
......@@ -35,6 +36,7 @@ source_set("caspian-lib") {
test("caspian_unittests") {
sources = [
"diff_test.cc",
"function_signature_test.cc",
]
deps = [
......@@ -52,6 +54,7 @@ executable("cli") {
deps = [
":caspian-lib",
]
cflags = [ "-Wall" ]
}
if (is_wasm) {
......
......@@ -34,6 +34,18 @@ void Diff(const char* before_filename, const char* after_filename) {
ParseSizeInfoFromFile(after_filename, &after);
caspian::DeltaSizeInfo diff = Diff(&before, &after);
float pss = 0.0f;
float size = 0.0f;
float padding = 0.0f;
for (const auto& sym : diff.delta_symbols) {
pss += sym.Pss();
size += sym.Size();
padding += sym.Padding();
}
std::cout << "Pss: " << pss << std::endl;
std::cout << "Size: " << size << std::endl;
std::cout << "Padding: " << padding << std::endl;
}
void Validate(const char* filename) {
......@@ -57,21 +69,25 @@ void Validate(const char* filename) {
std::cout << "Largest number of aliases: " << max_aliases << std::endl;
}
void PrintUsageAndExit() {
std::cerr << "Must have exactly one of:" << std::endl;
std::cerr << " validate, diff" << std::endl;
std::cerr << "Usage:" << std::endl;
std::cerr << " cli validate <size file>" << std::endl;
std::cerr << " cli diff <before_file> <after_file>" << std::endl;
exit(1);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << "Must have exactly one of:" << std::endl;
std::cerr << " validate, diff" << std::endl;
std::cerr << "Usage: " << std::endl;
std::cerr << " cli validate <size file>" << std::endl;
std::cerr << " cli diff <before_file> <after_file>" << std::endl;
exit(1);
PrintUsageAndExit();
}
if (std::string_view(argv[1]) == "diff") {
Diff(argv[2], argv[3]);
} else if (std::string_view(argv[1]) == "validate") {
Validate(argv[1]);
} else {
exit(1);
PrintUsageAndExit();
}
return 0;
}
......@@ -4,14 +4,18 @@
#include "tools/binary_size/libsupersize/caspian/diff.h"
#include <deque>
#include <functional>
#include <iostream>
#include <list>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
#include "third_party/re2/src/re2/re2.h"
namespace {
struct SymbolMatchIndex {
SymbolMatchIndex() {}
......@@ -68,34 +72,12 @@ std::string_view GetIdPath(const caspian::Symbol& sym) {
: sym.ObjectPath();
}
// |full_name| is costly enough to derive that we'd rather avoid it.
// Try to match on |raw_name| if possible.
SymbolMatchIndex SectionAndFullNameAndPathAndSize(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
sym.Pss());
}
SymbolMatchIndex SectionAndFullNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
0.0f);
}
// Allows signature changes (uses |Name()| rather than |FullName()|)
SymbolMatchIndex SectionAndNameAndPath(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.Name(), GetIdPath(sym), 0.0f);
}
// Match on full name, but without path (to account for file moves)
SymbolMatchIndex SectionAndFullName(const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
}
int MatchSymbols(
std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func,
std::vector<caspian::DeltaSymbol>* delta_symbols,
std::vector<const caspian::Symbol*>* unmatched_before,
std::vector<const caspian::Symbol*>* unmatched_after) {
// TODO(jaspercb): Accumulate added/dropped padding by section name.
std::vector<const caspian::Symbol*>* unmatched_after,
std::unordered_map<caspian::SectionId, float>* padding_by_section_name) {
int n_matched_symbols = 0;
std::unordered_map<SymbolMatchIndex,
std::list<std::reference_wrapper<const caspian::Symbol*>>>
......@@ -114,10 +96,16 @@ int MatchSymbols(
if (found != before_symbols_by_key.end() && found->second.size()) {
const caspian::Symbol*& before_sym = found->second.front().get();
found->second.pop_front();
// Padding tracked in aggregate, except for padding-only symbols.
if (before_sym->SizeWithoutPadding() != 0) {
(*padding_by_section_name)[before_sym->section_id_] +=
after_sym->PaddingPss() - before_sym->PaddingPss();
}
caspian::DeltaSymbol delta_sym(before_sym, after_sym);
if (delta_sym.Pss() != 0.0) {
delta_symbols->push_back(delta_sym);
}
// Null associated pointers in |unmatched_before|, |unmatched_after|.
before_sym = nullptr;
after_sym = nullptr;
n_matched_symbols++;
......@@ -125,10 +113,82 @@ int MatchSymbols(
}
}
// Compact out nulled entries.
Erase(*unmatched_before, nullptr);
Erase(*unmatched_after, nullptr);
return n_matched_symbols;
}
class DiffHelper {
public:
DiffHelper() = default;
std::string_view StripNumbers(std::string_view in) {
static const RE2 number_regex("\\d+");
re2::StringPiece piece(in.data(), in.size());
if (RE2::PartialMatch(piece, number_regex)) {
tmp_strings_.push_back(std::string(in));
RE2::GlobalReplace(&tmp_strings_.back(), number_regex, "");
return tmp_strings_.back();
}
return in;
}
std::string_view NormalizeStarSymbols(std::string_view in) {
// Used only for "*" symbols to strip suffixes "abc123" or "abc123 (any)".
static const RE2 normalize_star_symbols("\\s+\\d+(?: \\(.*\\))?$");
re2::StringPiece piece(in.data(), in.size());
if (RE2::PartialMatch(piece, normalize_star_symbols)) {
tmp_strings_.push_back(std::string(in));
RE2::Replace(&tmp_strings_.back(), normalize_star_symbols, "s");
return tmp_strings_.back();
}
return in;
}
using MatchFunc = std::function<SymbolMatchIndex(const caspian::Symbol&)>;
MatchFunc SectionAndFullNameAndPathAndSize() {
return [](const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
sym.Pss());
};
}
MatchFunc SectionAndFullNameAndPath() {
return [this](const caspian::Symbol& sym) {
return SymbolMatchIndex(sym.section_id_, StripNumbers(sym.full_name_),
GetIdPath(sym), 0.0f);
};
}
// Allows signature changes (uses |Name()| rather than |FullName()|)
MatchFunc SectionAndNameAndPath() {
return [this](const caspian::Symbol& sym) {
std::string_view name = sym.Name();
if (!name.empty() && name[0] == '*') {
name = NormalizeStarSymbols(name);
}
return SymbolMatchIndex(sym.section_id_, name, GetIdPath(sym), 0.0f);
};
}
// Match on full name, but without path (to account for file moves)
MatchFunc SectionAndFullName() {
return [](const caspian::Symbol& sym) {
if (!sym.IsNameUnique()) {
return SymbolMatchIndex();
}
return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
};
}
void ClearStrings() { tmp_strings_.clear(); }
private:
// Holds strings created during number stripping/star symbol normalization.
std::deque<std::string> tmp_strings_;
};
} // namespace
namespace caspian {
......@@ -149,13 +209,19 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
// Attempt several rounds to use increasingly loose matching on unmatched
// symbols. Any symbols still unmatched are tried in the next round.
int step = 0;
auto key_funcs = {SectionAndFullNameAndPathAndSize, SectionAndFullNameAndPath,
SectionAndNameAndPath, SectionAndFullName};
DiffHelper helper;
std::vector<DiffHelper::MatchFunc>
key_funcs = {helper.SectionAndFullNameAndPathAndSize(),
helper.SectionAndFullNameAndPath(),
helper.SectionAndNameAndPath(), helper.SectionAndFullName()};
std::unordered_map<SectionId, float> padding_by_section_name;
for (const auto& key_function : key_funcs) {
int n_matched_symbols = MatchSymbols(key_function, &ret.delta_symbols,
&unmatched_before, &unmatched_after);
int n_matched_symbols =
MatchSymbols(key_function, &ret.delta_symbols, &unmatched_before,
&unmatched_after, &padding_by_section_name);
std::cout << "Matched " << n_matched_symbols << " symbols in matching pass "
<< ++step << std::endl;
helper.ClearStrings();
}
// Add removals or deletions for any unmatched symbols.
......@@ -165,6 +231,24 @@ DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
for (const Symbol* before_sym : unmatched_before) {
ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr));
}
// Create a DeltaSymbol to represent the zeroed out padding of matched
// symbols.
for (const auto& pair : padding_by_section_name) {
SectionId section_id = pair.first;
float padding = pair.second;
if (padding != 0.0f) {
ret.owned_symbols.emplace_back();
Symbol& after_sym = ret.owned_symbols.back();
after_sym.section_id_ = section_id;
after_sym.size_ = padding;
after_sym.padding_ = padding;
after_sym.full_name_ = "Overhead: aggregate padding of diff'ed symbols";
after_sym.template_name_ = after_sym.full_name_;
after_sym.name_ = after_sym.full_name_;
ret.delta_symbols.push_back(DeltaSymbol(nullptr, &after_sym));
}
}
return ret;
}
} // namespace caspian
This diff is collapsed.
......@@ -358,6 +358,7 @@ void ParseSizeInfo(const char* gzipped,
new_sym.component_ = info->components[cur_component_indices[i]];
}
new_sym.flags_ = flags;
new_sym.size_info_ = info;
// When we encounter a symbol with an alias count, the next N symbols we
// encounter should be placed in the same symbol group.
......
......@@ -39,7 +39,7 @@ void Symbol::DeriveNames() const {
name_ = full_name_;
} else if (IsDex()) {
std::tuple<std::string_view, std::string_view, std::string_view>
parsed_names = ParseJava(full_name_, &size_info->owned_strings);
parsed_names = ParseJava(full_name_, &size_info_->owned_strings);
template_name_ = std::get<1>(parsed_names);
name_ = std::get<2>(parsed_names);
} else if (IsStringLiteral()) {
......@@ -47,7 +47,7 @@ void Symbol::DeriveNames() const {
name_ = full_name_;
} else if (IsNative()) {
std::tuple<std::string_view, std::string_view, std::string_view>
parsed_names = ParseCpp(full_name_, &size_info->owned_strings);
parsed_names = ParseCpp(full_name_, &size_info_->owned_strings);
template_name_ = std::get<1>(parsed_names);
name_ = std::get<2>(parsed_names);
} else {
......
......@@ -5,8 +5,10 @@
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#include <stdint.h>
#include <stdlib.h>
#include <array>
#include <deque>
#include <map>
#include <string>
......@@ -42,6 +44,13 @@ enum class SectionId : char {
kPakTranslations = 'p',
};
enum class DiffStatus : uint8_t {
kUnchanged = 0,
kChanged = 1,
kAdded = 2,
kRemoved = 3,
};
class Symbol;
class BaseSymbol {
......@@ -102,12 +111,15 @@ class BaseSymbol {
section_id == SectionId::kText || section_id == SectionId::kRoData);
}
bool IsStringLiteral() const { return FullName().substr(0, 1) == "\""; }
bool IsStringLiteral() const {
std::string_view full_name = FullName();
return !full_name.empty() && full_name[0] == '"';
}
bool IsNameUnique() const {
return IsStringLiteral() || IsOverhead() ||
FullName().substr(0, 1) == "*" ||
(IsNative() && FullName().find('.') != std::string::npos);
return !(IsStringLiteral() || IsOverhead() ||
(!FullName().empty() && FullName()[0] == '*') ||
(IsNative() && FullName().find('.') != std::string_view::npos));
}
};
......@@ -162,7 +174,7 @@ class Symbol : public BaseSymbol {
// The SizeInfo the symbol was constructed from. Primarily used for
// allocating commonly-reused strings in a context where they won't outlive
// the symbol.
BaseSizeInfo* size_info = nullptr;
BaseSizeInfo* size_info_ = nullptr;
private:
void DeriveNames() const;
......@@ -193,6 +205,19 @@ class DeltaSymbol : public BaseSymbol {
float PssWithoutPadding() const override;
float PaddingPss() const override;
DiffStatus DiffStatus() const {
if (!before_) {
return DiffStatus::kAdded;
}
if (!after_) {
return DiffStatus::kRemoved;
}
if (Size() || Pss() != 0) {
return DiffStatus::kChanged;
}
return DiffStatus::kUnchanged;
}
private:
const Symbol* before_ = nullptr;
const Symbol* after_ = nullptr;
......@@ -234,9 +259,20 @@ struct DeltaSizeInfo : BaseSizeInfo {
DeltaSizeInfo(const DeltaSizeInfo&);
DeltaSizeInfo& operator=(const DeltaSizeInfo&);
using Results = std::array<int32_t, 4>;
Results CountsByDiffStatus() const {
Results ret{0};
for (const DeltaSymbol& sym : delta_symbols) {
ret[static_cast<uint8_t>(sym.DiffStatus())]++;
}
return ret;
}
const SizeInfo* before = nullptr;
const SizeInfo* after = nullptr;
std::vector<DeltaSymbol> delta_symbols;
// Symbols created during diffing, e.g. aggregated padding symbols.
std::deque<Symbol> owned_symbols;
};
struct Stat {
......
......@@ -30,13 +30,9 @@ size_t LastSeparatorIndex(std::string_view str, char sep, char othersep) {
}
std::string_view DirName(std::string_view path, char sep, char othersep) {
auto end = LastSeparatorIndex(path, sep, othersep);
if (end != std::string_view::npos) {
std::string_view ret = path;
ret.remove_suffix(path.size() - end);
return ret;
}
return "";
size_t sep_idx = LastSeparatorIndex(path, sep, othersep);
return sep_idx != std::string_view::npos ? path.substr(0, sep_idx)
: std::string_view();
}
} // namespace
......@@ -61,6 +57,7 @@ void TreeBuilder::Build(
std::vector<std::function<bool(const BaseSymbol&)>> filters) {
group_by_component_ = group_by_component;
filters_ = filters;
sep_ = group_by_component ? kComponentSep : kPathSep;
// Initialize tree root.
root_.container_type = ContainerType::kDirectory;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment