Commit 755ae1b5 authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: C++ implementation of .size format parser

Preliminary work as part of investigating converting the SuperSize Tiger
Viewer web worker to parse and serve .size files instead of .ndjson
files.

For now, for ease of development and testing, it's packaged as a binary.

Change-Id: Ie016922f121e78ae421f8f3720d427bf6d02b5ca
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1841742
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Reviewed-by: default avatarMike Klein <mtklein@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#703882}
parent 15ed6e2e
......@@ -22,3 +22,9 @@ python_library("sizes_py") {
"//third_party/catapult/tracing:convert_chart_json",
]
}
group("caspian") {
deps = [
"//tools/binary_size/libsupersize/caspian:cli($host_toolchain)",
]
}
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
source_set("caspian-lib") {
sources = [
"file_format.cc",
"file_format.h",
"model.cc",
"model.h",
]
deps = [
"//base",
"//third_party/zlib:zlib",
"//third_party/zlib/google:compression_utils",
]
}
executable("cli") {
sources = [
"cli.cc",
]
deps = [
":caspian-lib",
]
}
include_rules = [
"+third_party/zlib",
]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Command-line interface for checking the integrity of .size files.
// Usage: cli (path to .size file)
#include <stdlib.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include "tools/binary_size/libsupersize/caspian/file_format.h"
#include "tools/binary_size/libsupersize/caspian/model.h"
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << "Usage: cli (path to .size file)" << std::endl;
exit(1);
}
std::ifstream ifs(argv[1], std::ifstream::in);
if (!ifs.good()) {
std::cerr << "Unable to open file: " << argv[1] << std::endl;
exit(1);
}
caspian::SizeInfo info;
caspian::ParseSizeInfo(&ifs, &info);
unsigned long max_aliases = 0;
for (auto& s : info.raw_symbols) {
if (s.aliases != nullptr) {
max_aliases = std::max(max_aliases, s.aliases->size());
// What a wonderful O(n^2) loop
for (auto* ss : *s.aliases) {
if (ss->aliases != s.aliases) {
std::cerr << "Not all symbols in alias group had same alias count"
<< std::endl;
exit(1);
}
}
}
}
std::cout << "Largest number of aliases: " << max_aliases << std::endl;
return 0;
}
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
/* C++ implementation of a .size file parser.
* The .size file spec is found in libsupersize/file_format.py
*/
#include "tools/binary_size/libsupersize/caspian/file_format.h"
#include <assert.h>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string>
#include <vector>
#include "base/json/json_reader.h"
#include "base/optional.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "third_party/zlib/google/compression_utils.h"
#include "tools/binary_size/libsupersize/caspian/model.h"
namespace {
const std::string SERIALIZATION_VERSION = "Size File Format v1";
int readLoneInt(std::stringstream* stream) {
int val;
*stream >> val;
stream->ignore(); // Drop newline.
return val;
}
std::stringstream decompress(std::istream* gzstream) {
std::string uncompressed;
std::string gzbuf(std::istreambuf_iterator<char>(*gzstream), {});
compression::GzipUncompress(gzbuf, &uncompressed);
return std::stringstream(uncompressed);
}
std::vector<std::string> ReadValuesFromLine(std::istream* stream,
base::StringPiece delimiter) {
std::string line;
std::getline(*stream, line);
return base::SplitString(line, delimiter, base::KEEP_WHITESPACE,
base::SPLIT_WANT_ALL);
}
template <typename T>
std::vector<T> ReadIntList(std::istream* stream, int n, bool stored_as_delta) {
std::vector<T> result;
result.resize(n);
for (int i = 0; i < n; i++)
*stream >> result[i];
if (stored_as_delta)
std::partial_sum(result.begin(), result.end(), result.begin());
return result;
}
template <typename T>
std::vector<std::vector<T>> ReadIntListForEachSection(
std::istream* stream,
const std::vector<int>& section_counts,
bool stored_as_delta) {
std::vector<std::vector<T>> ret;
ret.reserve(section_counts.size());
for (int nsymbols : section_counts) {
ret.emplace_back(ReadIntList<T>(stream, nsymbols, stored_as_delta));
}
return ret;
}
} // namespace
namespace caspian {
void ParseSizeInfo(std::istream* gzstream, ::caspian::SizeInfo* info) {
std::stringstream ss = decompress(gzstream);
// Ignore generated header
std::string line;
std::getline(ss, line);
// Serialization version
std::getline(ss, line);
if (line != SERIALIZATION_VERSION) {
std::cerr << "Serialization version: '" << line << "' not recognized."
<< std::endl;
exit(1);
}
// Metadata
int metadata_len = readLoneInt(&ss) + 1;
std::vector<char> metadata_str;
metadata_str.resize(metadata_len);
ss.get(metadata_str.data(), metadata_len, EOF);
base::Optional<base::Value> root =
base::JSONReader::Read(metadata_str.data());
if (!root) {
std::cerr << "Failed to parse JSON metadata:" << metadata_str.data()
<< std::endl;
exit(1);
} else {
std::swap(info->metadata, *root);
}
const bool has_components =
info->metadata.FindKey("has_components")->GetBool();
// List of paths: (object_path, [source_path])
int n_paths = readLoneInt(&ss);
info->object_paths.reserve(n_paths);
info->source_paths.reserve(n_paths);
for (int i = 0; i < n_paths; i++) {
const std::vector<std::string> paths = ReadValuesFromLine(&ss, "\t");
info->object_paths.push_back(paths[0]);
if (paths.size() == 2) {
info->source_paths.push_back(paths[1]);
} else if (paths.size() == 1) {
info->source_paths.push_back("");
} else {
std::cerr << "Could not extract paths from line" << std::endl;
exit(1);
}
}
// List of component names
int component_len;
ss >> component_len;
std::cout << "Reading " << component_len << " components" << std::endl;
ss.ignore(); // Eat newline
info->components.reserve(component_len);
for (int i = 0; i < component_len; i++) {
std::getline(ss, line);
info->components.push_back(line);
}
// Section names
info->section_names = ReadValuesFromLine(&ss, "\t");
int n_sections = info->section_names.size();
// Symbol counts for each section
std::vector<int> section_counts = ReadIntList<int>(&ss, n_sections, false);
std::cout << "Section counts:" << std::endl;
int total_symbols =
std::accumulate(section_counts.begin(), section_counts.end(), 0);
for (int section_idx = 0; section_idx < n_sections; section_idx++) {
std::cout << " " << info->section_names[section_idx] << '\t'
<< section_counts[section_idx] << std::endl;
}
std::vector<std::vector<int64_t>> addresses =
ReadIntListForEachSection<int64_t>(&ss, section_counts, true);
std::vector<std::vector<int>> sizes =
ReadIntListForEachSection<int>(&ss, section_counts, false);
std::vector<std::vector<int>> path_indices =
ReadIntListForEachSection<int>(&ss, section_counts, true);
std::vector<std::vector<int>> component_indices;
if (has_components) {
component_indices =
ReadIntListForEachSection<int>(&ss, section_counts, true);
} else {
component_indices.resize(addresses.size());
}
ss.ignore();
info->raw_symbols.reserve(total_symbols);
// Construct raw symbols
for (int section_idx = 0; section_idx < n_sections; section_idx++) {
const std::string* cur_section_name = &info->section_names[section_idx];
const int cur_section_count = section_counts[section_idx];
const std::vector<int64_t>& cur_addresses = addresses[section_idx];
const std::vector<int>& cur_sizes = sizes[section_idx];
const std::vector<int>& cur_path_indices = path_indices[section_idx];
const std::vector<int>& cur_component_indices =
component_indices[section_idx];
int32_t alias_counter = 0;
for (int i = 0; i < cur_section_count; i++) {
const std::vector<std::string> parts = ReadValuesFromLine(&ss, "\t");
if (parts.empty()) {
std::cout << "Row " << i << " of symbols is blank" << std::endl;
continue;
}
uint32_t flags = 0;
uint32_t num_aliases = 0;
if (parts.size() == 3) {
base::HexStringToUInt(parts[1], &num_aliases);
base::HexStringToUInt(parts[2], &flags);
} else if (parts.size() == 2) {
if (parts[1][0] == '0') {
// full_name aliases_part
base::HexStringToUInt(parts[1], &num_aliases);
} else {
// full_name flags_part
base::HexStringToUInt(parts[1], &flags);
}
}
info->raw_symbols.emplace_back();
caspian::Symbol& new_sym = info->raw_symbols.back();
new_sym.section_name = cur_section_name;
new_sym.full_name = parts[0];
new_sym.address = cur_addresses[i];
new_sym.size = cur_sizes[i];
new_sym.object_path = &info->object_paths[cur_path_indices[i]];
new_sym.source_path = &info->source_paths[cur_path_indices[i]];
if (has_components) {
new_sym.component = &info->components[cur_component_indices[i]];
} else {
new_sym.component = nullptr;
}
new_sym.flags = flags;
// Derived
new_sym.padding = 0;
new_sym.template_name = "";
new_sym.name = "";
// When we encounter a symbol with an alias count, the next N symbols we
// encounter should be placed in the same symbol group.
if (num_aliases) {
assert(alias_counter == 0);
info->alias_groups.emplace_back();
alias_counter = num_aliases;
}
if (alias_counter > 0) {
new_sym.aliases = &info->alias_groups.back();
new_sym.aliases->push_back(&new_sym);
alias_counter--;
} else {
new_sym.aliases = nullptr;
}
}
}
if (std::getline(ss, line)) {
int lines_remaining = 50;
do {
std::cerr << "Unparsed line: " << line << std::endl;
lines_remaining++;
} while (lines_remaining > 0 && std::getline(ss, line));
exit(1);
}
std::cout << "Parsed " << info->raw_symbols.size() << " symbols" << std::endl;
}
} // namespace caspian
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FILE_FORMAT_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FILE_FORMAT_H_
#include <istream>
namespace caspian {
struct SizeInfo; // forward declare
void ParseSizeInfo(std::istream* gzstream, caspian::SizeInfo* info);
} // namespace caspian
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FILE_FORMAT_H_
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "tools/binary_size/libsupersize/caspian/model.h"
#include "tools/binary_size/libsupersize/caspian/file_format.h"
caspian::Symbol::Symbol() = default;
caspian::Symbol::Symbol(const Symbol& other) = default;
caspian::SizeInfo::SizeInfo() = default;
caspian::SizeInfo::~SizeInfo() = default;
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
#include <stdlib.h>
#include <deque>
#include <string>
#include <vector>
#include "base/values.h"
// Copied from representation in tools/binary_size/libsupersize/models.py
namespace caspian {
struct Symbol {
Symbol();
Symbol(const Symbol& other);
int32_t address = 0;
int32_t size = 0;
int32_t flags = 0;
int32_t padding = 0;
std::string full_name;
std::string template_name;
std::string name;
const std::string* object_path = nullptr;
const std::string* section_name = nullptr;
const std::string* source_path = nullptr;
const std::string* component = nullptr;
std::vector<Symbol*>* aliases = nullptr;
};
struct SizeInfo {
SizeInfo();
~SizeInfo();
SizeInfo(const SizeInfo& other) = delete;
SizeInfo& operator=(const SizeInfo& other) = delete;
std::vector<caspian::Symbol> raw_symbols;
base::Value metadata;
// Entries in |raw_symbols| hold pointers to this data.
// Appending to one will change their capacity and invalidate pointers.
std::vector<std::string> object_paths;
std::vector<std::string> source_paths;
std::vector<std::string> components;
std::vector<std::string> section_names;
// A container for each symbol group.
std::deque<std::vector<Symbol*>> alias_groups;
};
} // namespace caspian
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_MODEL_H_
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment