Commit dd038a5c authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

SuperSize: Caspian: Utility functions for parsing Java symbol names

Just adding utility functions for now, a future CL will hook them up.

This is required for size diffing, specifically fallback matching
(e.g.when a function's signature changes but its name stays the same).

The calculation of these derived names is costly in space and time, so
it's likely that these properties will need to be derived lazily (only
when generating a size file diff or searching derived full names by
regular expression).

Bug: 1011921
Change-Id: I5708705334bc7b250f4b8252dde9efa87b3cdae0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1898477
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarVictor Costan <pwnall@chromium.org>
Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#713024}
parent 03a0b04f
......@@ -2,6 +2,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//testing/test.gni")
# Enable wasm target that depends on foo.patch being applied.
if (!defined(is_wasm)) {
is_wasm = current_os == "wasm"
......@@ -13,6 +15,8 @@ source_set("caspian-lib") {
sources = [
"file_format.cc",
"file_format.h",
"function_signature.cc",
"function_signature.h",
"model.cc",
"model.h",
"tree_builder.cc",
......@@ -27,6 +31,17 @@ source_set("caspian-lib") {
]
}
test("caspian_unittests") {
sources = [
"function_signature_test.cc",
]
deps = [
":caspian-lib",
"//testing/gtest",
"//testing/gtest:gtest_main",
]
}
executable("cli") {
sources = [
"cli.cc",
......
......@@ -2,4 +2,5 @@ include_rules = [
"+third_party/zlib",
"+third_party/jsoncpp",
"+third_party/re2",
"+testing/gtest/include/gtest",
]
......@@ -81,7 +81,8 @@ void BuildTree(bool group_by_component,
include_regex.reset(new RE2(include_regex_str));
if (include_regex->error_code() == RE2::NoError) {
filters.push_back([&include_regex](const Symbol& sym) -> bool {
return RE2::PartialMatch(sym.full_name, *include_regex);
re2::StringPiece piece(sym.full_name.data(), sym.full_name.size());
return RE2::PartialMatch(piece, *include_regex);
});
}
}
......@@ -91,7 +92,8 @@ void BuildTree(bool group_by_component,
exclude_regex.reset(new RE2(exclude_regex_str));
if (exclude_regex->error_code() == RE2::NoError) {
filters.push_back([&exclude_regex](const Symbol& sym) -> bool {
return !RE2::PartialMatch(sym.full_name, *exclude_regex);
re2::StringPiece piece(sym.full_name.data(), sym.full_name.size());
return !RE2::PartialMatch(piece, *exclude_regex);
});
}
}
......
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file
// Much of this logic is duplicated at
// tools/binary_size/libsupersize/function_signature.py.
#include <stddef.h>
#include <deque>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>
#include "tools/binary_size/libsupersize/caspian/function_signature.h"
namespace caspian {
std::vector<std::string_view> SplitBy(std::string_view str, char delim) {
std::vector<std::string_view> ret;
while (true) {
size_t pos = str.find(delim);
ret.push_back(str.substr(0, pos));
if (pos == std::string_view::npos) {
break;
}
str = str.substr(pos + 1);
}
return ret;
}
std::tuple<std::string_view, std::string_view, std::string_view> ParseJava(
std::string_view full_name,
std::deque<std::string>* owned_strings) {
std::string maybe_member_type;
size_t hash_idx = full_name.find('#');
std::string_view full_class_name;
std::string_view member;
std::string_view member_type;
if (hash_idx != std::string_view::npos) {
// Parse an already parsed full_name.
// Format: Class#symbol: type
full_class_name = full_name.substr(0, hash_idx);
size_t colon_idx = full_name.find(':');
member = full_name.substr(hash_idx + 1, colon_idx - hash_idx - 1);
if (colon_idx != std::string_view::npos) {
member_type = full_name.substr(colon_idx);
}
} else {
// Format: Class [returntype] functionName()
std::vector<std::string_view> parts = SplitBy(full_name, ' ');
full_class_name = parts[0];
if (parts.size() >= 2) {
member = parts.back();
}
if (parts.size() >= 3) {
maybe_member_type = ": " + std::string(parts[1]);
member_type = maybe_member_type;
}
}
std::vector<std::string_view> split = SplitBy(full_class_name, '.');
std::string_view short_class_name = split.back();
if (member.empty()) {
return std::make_tuple(full_name, full_name, short_class_name);
}
owned_strings->push_back(std::string(full_class_name) + std::string("#") +
std::string(member) + std::string(member_type));
full_name = owned_strings->back();
member = member.substr(0, member.find('('));
owned_strings->push_back(std::string(short_class_name) + std::string("#") +
std::string(member));
std::string_view name = owned_strings->back();
owned_strings->push_back(std::string(full_class_name) + std::string("#") +
std::string(member));
std::string_view template_name = owned_strings->back();
return std::make_tuple(full_name, template_name, name);
}
} // namespace caspian
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FUNCTION_SIGNATURE_H_
#define TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FUNCTION_SIGNATURE_H_
#include <deque>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>
namespace caspian {
std::vector<std::string_view> SplitBy(std::string_view str, char delim);
// Breaks Java |full_name| into parts.
// If needed, new strings are allocated into |owned_strings|.
// Returns: A tuple of (full_name, template_name, name), where:
// * full_name = "class_with_package#member(args): type"
// * template_name = "class_with_package#member"
// * name = "class_without_package#member"
std::tuple<std::string_view, std::string_view, std::string_view> ParseJava(
std::string_view full_name,
std::deque<std::string>* owned_strings);
} // namespace caspian
#endif // TOOLS_BINARY_SIZE_LIBSUPERSIZE_CASPIAN_FUNCTION_SIGNATURE_H_
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "tools/binary_size/libsupersize/caspian/function_signature.h"
#include <string>
#include <string_view>
#include <tuple>
#include "testing/gtest/include/gtest/gtest.h"
namespace {
std::tuple<std::string, std::string, std::string> PrettyDebug(
std::tuple<std::string_view, std::string_view, std::string_view> tuple) {
return std::make_tuple(std::string(std::get<0>(tuple)),
std::string(std::get<1>(tuple)),
std::string(std::get<2>(tuple)));
}
TEST(AnalyzeTest, StringSplit) {
std::string input = "a//b/cd";
std::vector<std::string_view> expected_output = {"a", "", "b", "cd"};
EXPECT_EQ(expected_output, caspian::SplitBy(input, '/'));
input = "a";
expected_output = {"a"};
EXPECT_EQ(expected_output, caspian::SplitBy(input, '/'));
input = "";
expected_output = {""};
EXPECT_EQ(expected_output, caspian::SplitBy(input, '/'));
input = "/";
expected_output = {"", ""};
EXPECT_EQ(expected_output, caspian::SplitBy(input, '/'));
}
TEST(AnalyzeTest, ParseJavaFunctionSignature) {
::std::deque<std::string> owned_strings;
// Java method with no args
auto do_test = [&owned_strings](std::string sig, std::string exp_full_name,
std::string exp_template_name,
std::string exp_name) {
auto actual = caspian::ParseJava(sig, &owned_strings);
EXPECT_EQ(exp_full_name, std::string(std::get<0>(actual)));
EXPECT_EQ(exp_template_name, std::string(std::get<1>(actual)));
EXPECT_EQ(exp_name, std::string(std::get<2>(actual)));
// Ensure that ParseJava() is idempotent w.r.t. |full_name| output.
EXPECT_EQ(PrettyDebug(actual), PrettyDebug(caspian::ParseJava(
std::get<0>(actual), &owned_strings)));
};
do_test("org.ClassName java.util.List getCameraInfo()",
"org.ClassName#getCameraInfo(): java.util.List",
"org.ClassName#getCameraInfo", "ClassName#getCameraInfo");
// Java method with args
do_test("org.ClassName int readShort(int,int)",
"org.ClassName#readShort(int,int): int", "org.ClassName#readShort",
"ClassName#readShort");
// Java <init> method
do_test("org.ClassName$Inner <init>(byte[])",
"org.ClassName$Inner#<init>(byte[])", "org.ClassName$Inner#<init>",
"ClassName$Inner#<init>");
// Java Class
do_test("org.ClassName", "org.ClassName", "org.ClassName", "ClassName");
// Java field
do_test("org.ClassName some.Type mField", "org.ClassName#mField: some.Type",
"org.ClassName#mField", "ClassName#mField");
}
} // namespace
......@@ -9,6 +9,7 @@
#include <deque>
#include <map>
#include <string_view>
#include <vector>
#include "third_party/jsoncpp/source/include/json/json.h"
......@@ -55,7 +56,7 @@ struct Symbol {
int32_t padding = 0;
// Pointers into SizeInfo->raw_decompressed;
const char* section_name = nullptr;
const char* full_name = nullptr;
std::string_view full_name;
const char* object_path = nullptr;
const char* source_path = nullptr;
const char* component = nullptr;
......
......@@ -2,11 +2,14 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Logic for parsing a function signatures."""
"""Logic for parsing a function signatures.
Much of this logic is duplicated at
tools/binary_size/libsupersize/caspian/function_signature.cc."""
def _FindParameterListParen(name):
"""Finds index of the "(" that denotes the start of a paremeter list."""
"""Finds index of the "(" that denotes the start of a parameter list."""
# This loops from left-to-right, but the only reason (I think) that this
# is necessary (rather than reusing _FindLastCharOutsideOfBrackets), is
# to capture the outer-most function in the case where classes are nested.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment