Commit 72090733 authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

Supersize archive: Add --relocations

Sample output in viewer:
https://storage.googleapis.com/chrome-supersize/viewer.html?load_url=oneoffs%2Fsample-relocations2.ndjson&byteunit=B
(note the unit is 'bytes' when this is actually instruction count)

Using llvm-readelf directly on the above binary gives 392,984 R_ARM_RELATIVE
instructions, which differs by a whopping 300 instructions from the 392,689.16
instructions counted by the analysis above. I'm not sure where the difference
is coming from (although the floating-point values come from how size is
attributed across symbol aliases). I'd be interested in thoughts on this, but
accuracy to one part in a thousand is good enough for this use case.

Timing information: This post-processing step takes about six seconds on my
workstation (in addition to the previous runtime of supersize archive).
Doesn't seem worth optimizing.

Bug: 981592
Change-Id: I0c8ad5ec5c3e76ef51493ea43800900077f40110
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1811930
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#698603}
parent a90d4e2c
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
"""Main Python API for analyzing binary size.""" """Main Python API for analyzing binary size."""
import argparse import argparse
import bisect
import calendar import calendar
import collections import collections
import datetime import datetime
...@@ -119,6 +120,9 @@ class SectionSizeKnobs(object): ...@@ -119,6 +120,9 @@ class SectionSizeKnobs(object):
self.src_root = path_util.SRC_ROOT self.src_root = path_util.SRC_ROOT
# Whether to count number of relative relocations instead of binary size
self.relocations_mode = False
def _OpenMaybeGz(path): def _OpenMaybeGz(path):
"""Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
...@@ -1286,6 +1290,47 @@ def _CalculateElfOverhead(section_sizes, elf_path): ...@@ -1286,6 +1290,47 @@ def _CalculateElfOverhead(section_sizes, elf_path):
return 0 return 0
def _OverwriteSymbolSizesWithRelocationCount(raw_symbols, tool_prefix,
elf_path):
logging.info('Overwriting symbol sizes with relocation count')
native_symbols = [sym for sym in raw_symbols if sym.IsNative()]
symbol_addresses = [0] * (1 + len(native_symbols))
for i, symbol in enumerate(native_symbols):
symbol_addresses[i] = symbol.address
# Last symbol address is the end of the last symbol, so we don't misattribute
# all relros after the last symbol to that symbol.
symbol_addresses[-1] = native_symbols[-1].address + native_symbols[-1].size
for symbol in raw_symbols:
symbol.address = 0
symbol.size = 0
symbol.padding = 0
relocs_cmd = [path_util.GetReadElfPath(tool_prefix), '--relocs', elf_path]
relro_addresses = subprocess.check_output(relocs_cmd).split('\n')
# Grab first column from (sample output) '02de6d5c 00000017 R_ARM_RELATIVE'
relro_addresses = [
int(l.split()[0], 16) for l in relro_addresses if 'R_ARM_RELATIVE' in l
]
# More likely for there to be a bug in supersize than an ELF to have any
# relative relocations.
assert relro_addresses
logging.info('Adding %d relocations', len(relro_addresses))
for addr in relro_addresses:
# Attribute relros to largest symbol start address that precede them.
idx = bisect.bisect_right(symbol_addresses, addr) - 1
if 0 <= idx < len(native_symbols):
symbol = native_symbols[idx]
for alias in symbol.aliases or [symbol]:
alias.size += 1
logging.info('Removing non-native symbols...')
raw_symbols[:] = [sym for sym in raw_symbols if sym.size or sym.IsNative()]
def CreateSectionSizesAndSymbols(map_path=None, def CreateSectionSizesAndSymbols(map_path=None,
tool_prefix=None, tool_prefix=None,
output_directory=None, output_directory=None,
...@@ -1450,6 +1495,10 @@ def CreateSectionSizesAndSymbols(map_path=None, ...@@ -1450,6 +1495,10 @@ def CreateSectionSizesAndSymbols(map_path=None,
_CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs) _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs)
logging.debug('Connecting nm aliases') logging.debug('Connecting nm aliases')
_ConnectNmAliases(raw_symbols) _ConnectNmAliases(raw_symbols)
if elf_path and knobs.relocations_mode:
_OverwriteSymbolSizesWithRelocationCount(raw_symbols, tool_prefix, elf_path)
return section_sizes, raw_symbols return section_sizes, raw_symbols
...@@ -1627,6 +1676,11 @@ def AddArguments(parser): ...@@ -1627,6 +1676,11 @@ def AddArguments(parser):
default=True, action='store_false', default=True, action='store_false',
help='Disable breaking down "** merge strings" into more ' help='Disable breaking down "** merge strings" into more '
'granular symbols.') 'granular symbols.')
parser.add_argument(
'--relocations',
action='store_true',
help='Instead of counting binary size, count number of relative'
'relocation instructions in ELF code.')
parser.add_argument('--source-directory', parser.add_argument('--source-directory',
help='Custom path to the root source directory.') help='Custom path to the root source directory.')
parser.add_argument( parser.add_argument(
...@@ -1782,6 +1836,10 @@ def _RunInternal(args, parser, extracted_minimal_apk_path): ...@@ -1782,6 +1836,10 @@ def _RunInternal(args, parser, extracted_minimal_apk_path):
if args.no_native: if args.no_native:
knobs.analyze_native = False knobs.analyze_native = False
if args.relocations:
knobs.relocations_mode = True
knobs.analyze_java = False
if not knobs.analyze_native: if not knobs.analyze_native:
map_path = None map_path = None
elf_path = None elf_path = None
...@@ -1790,7 +1848,6 @@ def _RunInternal(args, parser, extracted_minimal_apk_path): ...@@ -1790,7 +1848,6 @@ def _RunInternal(args, parser, extracted_minimal_apk_path):
metadata = CreateMetadata(map_path, elf_path, args.apk_file, metadata = CreateMetadata(map_path, elf_path, args.apk_file,
args.minimal_apks_file, tool_prefix, args.minimal_apks_file, tool_prefix,
output_directory, linker_name) output_directory, linker_name)
section_sizes, raw_symbols = CreateSectionSizesAndSymbols( section_sizes, raw_symbols = CreateSectionSizesAndSymbols(
map_path=map_path, map_path=map_path,
tool_prefix=tool_prefix, tool_prefix=tool_prefix,
......
...@@ -180,10 +180,6 @@ def GetObjDumpPath(tool_prefix): ...@@ -180,10 +180,6 @@ def GetObjDumpPath(tool_prefix):
def GetReadElfPath(tool_prefix): def GetReadElfPath(tool_prefix):
# Work-around for llvm-readobj bug where 'File: ...' info is not printed:
# https://bugs.llvm.org/show_bug.cgi?id=35351
if tool_prefix[-5:] == 'llvm-':
return 'readelf'
return tool_prefix + 'readelf' return tool_prefix + 'readelf'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment