Commit 2a179dff authored by Samuel Huang's avatar Samuel Huang Committed by Commit Bot

[SuperSize] Fix section size accounting for ELF files.

SuperSize-archive calls _AddUnattributedSectionSymbols() to create
symbols (in .other section) to account for sections that are not covered
by symbols. However, previously this was done only ELF files inside APK
and not for stand-alone ELF files. Also, the accounting was off,
resulting in Supersize-console
  Print(size_info)
producing file sizes that are off. This CL fixes the above problems.
Details:
* Add models.ClassifySections() to classify sections into one of three
  groups:
  (1) Unsummed sections: These don't add to binary size. E.g., .bss.
  (2) Explicitly summed sections: These add to binary size, and have
      symbols that are extracted by the "usual method" via .map file
      parsing, nm, etc. E.g., .text.
  (3) Implicitly summed sections: These add to binary size, but needs
      to be extracted via _AddUnattributedSectionSymbols(), and counted
      as part of the .other section. E.g., .dynsym.
  * The function is used by archive.py and describe.py, to eliminate
    previous size mismatches.
  * Add BaseSizeInfo.ClassifySection() to wrap this.
* archive.py: Change main flow so that _AddUnattributedSectionSymbols()
  get called when an ELF file is the main input (even without APK).
* describe.py: When describing section sizes, add note to make it clear
  that a section is counted in the .other section.

This CL leads to extensive .golden file changes.

Bug: 1084580
Change-Id: If663eb9acce89b32f857a0056b6f5ec39db35d8e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2217861Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Commit-Queue: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#773342}
parent cf85d1a3
......@@ -1160,8 +1160,9 @@ def _ParseApkElfSectionRanges(section_ranges, metadata, apk_elf_result):
# hasn't been used since switching from gold -> lld.
apk_section_ranges['%s (unpacked)' %
packed_section_name] = unpacked_range
return apk_section_ranges, elf_overhead_size
return section_ranges, 0
else:
_, apk_section_ranges, elf_overhead_size = apk_elf_result.get()
return apk_section_ranges, elf_overhead_size
class _ResourcePathDeobfuscator(object):
......@@ -1357,10 +1358,9 @@ def _OverwriteSymbolSizesWithRelocationCount(raw_symbols, tool_prefix,
raw_symbols[:] = [sym for sym in raw_symbols if sym.size or sym.IsNative()]
def _AddUnattributedSectionSymbols(raw_symbols, section_ranges, elf_result):
def _AddUnattributedSectionSymbols(raw_symbols, section_ranges):
# Create symbols for ELF sections not covered by existing symbols.
logging.info('Searching for symbol gaps...')
_, section_ranges, _ = elf_result.get()
last_symbol_ends = collections.defaultdict(int)
for sym in raw_symbols:
if sym.end_address > last_symbol_ends[sym.section_name]:
......@@ -1383,11 +1383,15 @@ def _AddUnattributedSectionSymbols(raw_symbols, section_ranges, elf_result):
logging.info('Last symbol in %s does not reach end of section, gap=%d',
section_name, overhead)
# Sections that should not bundle into ".other".
unsummed_sections, summed_sections = models.ClassifySections(
section_ranges.keys())
# Sort keys to ensure consistent order (> 1 sections may have address = 0).
for section_name in sorted(section_ranges.keys()):
# Handle sections that don't appear in |raw_symbols|.
if section_name not in last_symbol_ends:
address, section_size = section_ranges[section_name]
address, section_size = section_ranges[section_name]
if (section_name not in unsummed_sections
and section_name not in summed_sections):
logging.info('All bytes in %s are unattributed, gap=%d', section_name,
overhead)
raw_symbols.append(
......@@ -1449,6 +1453,8 @@ def CreateSectionSizesAndSymbols(knobs=None,
# Extraction takes around 1 second, so do it in parallel.
apk_elf_result = parallel.ForkAndCall(_ElfInfoFromApk,
(apk_path, apk_so_path, tool_prefix))
else:
apk_elf_result = None
outdir_context = None
source_mapper = None
......@@ -1509,16 +1515,16 @@ def CreateSectionSizesAndSymbols(knobs=None,
else:
section_ranges, raw_symbols, object_paths_by_name = {}, [], None
elf_overhead_size = _CalculateElfOverhead(section_ranges, elf_path)
if apk_elf_result:
section_ranges, elf_overhead_size = _ParseApkElfSectionRanges(
section_ranges, metadata, apk_elf_result)
else:
elf_overhead_size = _CalculateElfOverhead(section_ranges, elf_path)
if elf_path:
_AddUnattributedSectionSymbols(raw_symbols, section_ranges)
pak_symbols_by_id = None
if apk_path and size_info_prefix:
if elf_path:
section_ranges, elf_overhead_size = _ParseApkElfSectionRanges(
section_ranges, metadata, apk_elf_result)
_AddUnattributedSectionSymbols(raw_symbols, section_ranges,
apk_elf_result)
# Can modify |section_ranges|.
pak_symbols_by_id = _FindPakSymbolsFromApk(opts, section_ranges, apk_path,
size_info_prefix)
......
......@@ -51,23 +51,21 @@ def _Divide(a, b):
return float(a) / b if b else 0
def _IncludeInTotals(section_name):
return section_name not in models.BSS_SECTIONS and '(' not in section_name
def _GetSectionSizeInfo(unsummed_sections, summed_sections, section_sizes):
sizes = [v for k, v in section_sizes.items() if k in summed_sections]
total_bytes = sum(sizes)
max_bytes = max(sizes)
def _GetSectionSizeInfo(section_sizes):
total_bytes = sum(v for k, v in section_sizes.items() if _IncludeInTotals(k))
max_bytes = max(
abs(v) for k, v in section_sizes.items() if _IncludeInTotals(k))
maybe_significant_sections = unsummed_sections | summed_sections
def is_significant_section(name, size):
# Show all sections containing symbols, plus relocations.
# As a catch-all, also include any section that comprises > 4% of the
# largest section. Use largest section rather than total so that it still
# works out when showing a diff containing +100, -100 (total=0).
return (name in list(models.SECTION_TO_SECTION_NAME.values())
or name in ('.rela.dyn', '.rel.dyn')
or _IncludeInTotals(name) and abs(_Divide(size, max_bytes)) > .04)
return (name in maybe_significant_sections
or name in ['.rela.dyn', '.rel.dyn']
or abs(_Divide(size, max_bytes)) > .04)
section_names = sorted(
k for k, v in section_sizes.items() if is_significant_section(k, v))
......@@ -179,20 +177,26 @@ class DescriberText(Describer):
self.recursive = recursive
self.summarize = summarize
def _DescribeSectionSizes(self, section_sizes):
total_bytes, section_names = _GetSectionSizeInfo(section_sizes)
def _DescribeSectionSizes(self, unsummed_sections, summed_sections,
section_sizes):
total_bytes, section_names = _GetSectionSizeInfo(unsummed_sections,
summed_sections,
section_sizes)
yield ''
yield 'Section Sizes (Total={} ({} bytes)):'.format(
_PrettySize(total_bytes), total_bytes)
for name in section_names:
size = section_sizes[name]
if not _IncludeInTotals(name):
if name in unsummed_sections:
yield ' {}: {} ({} bytes) (not included in totals)'.format(
name, _PrettySize(size), size)
else:
notes = ''
if name not in summed_sections:
notes = ' (counted in .other)'
percent = _Divide(size, total_bytes)
yield ' {}: {} ({} bytes) ({:.1%})'.format(
name, _PrettySize(size), size, percent)
yield ' {}: {} ({} bytes) ({:.1%}){}'.format(name, _PrettySize(size),
size, percent, notes)
if self.verbose:
yield ''
......@@ -200,12 +204,14 @@ class DescriberText(Describer):
section_names = sorted(
k for k in section_sizes.keys() if k not in section_names)
for name in section_names:
not_included_part = ''
if not _IncludeInTotals(name):
not_included_part = ' (not included in totals)'
yield ' {}: {} ({} bytes){}'.format(
name, _PrettySize(section_sizes[name]), section_sizes[name],
not_included_part)
notes = ''
if name in unsummed_sections:
notes = ' (not included in totals)'
elif name not in summed_sections:
notes = ' (counted in .other)'
yield ' {}: {} ({} bytes){}'.format(name,
_PrettySize(section_sizes[name]),
section_sizes[name], notes)
def _DescribeSymbol(self, sym, single_line=False):
address = 'Group' if sym.IsGroup() else hex(sym.address)
......@@ -504,7 +510,10 @@ class DescriberText(Describer):
(' %s' % line for line in DescribeMetadata(before_metadata)),
('New Metadata:',),
(' %s' % line for line in DescribeMetadata(after_metadata)))
section_desc = self._DescribeSectionSizes(diff.section_sizes)
unsummed_sections, summed_sections = diff.ClassifySections()
section_desc = self._DescribeSectionSizes(unsummed_sections,
summed_sections,
diff.section_sizes)
group_desc = self.GenerateLines(diff.symbols)
return itertools.chain(metadata_desc, section_desc, ('',), group_desc)
......@@ -512,7 +521,10 @@ class DescriberText(Describer):
metadata_desc = itertools.chain(
('Metadata:',),
(' %s' % line for line in DescribeMetadata(size_info.metadata)))
section_desc = self._DescribeSectionSizes(size_info.section_sizes)
unsummed_sections, summed_sections = size_info.ClassifySections()
section_desc = self._DescribeSectionSizes(unsummed_sections,
summed_sections,
size_info.section_sizes)
coverage_desc = ()
if self.verbose:
coverage_desc = itertools.chain(
......@@ -628,9 +640,11 @@ class DescriberCsv(Describer):
self.csv_writer.writerow(data)
return self.stringio.getvalue().rstrip()
def _DescribeSectionSizes(self, section_sizes):
significant_section_names = _GetSectionSizeInfo(section_sizes)[1]
def _DescribeSectionSizes(self, unsummed_sections, summed_section,
section_sizes):
_, significant_section_names = _GetSectionSizeInfo(unsummed_sections,
summed_section,
section_sizes)
if self.verbose:
significant_set = set(significant_section_names)
section_names = sorted(section_sizes.keys())
......@@ -645,12 +659,18 @@ class DescriberCsv(Describer):
yield self._RenderCsv([name, size])
def _DescribeDeltaSizeInfo(self, diff):
section_desc = self._DescribeSectionSizes(diff.section_sizes)
unsummed_sections, summed_sections = diff.ClassifySections()
section_desc = self._DescribeSectionSizes(unsummed_sections,
summed_sections,
diff.section_sizes)
group_desc = self.GenerateLines(diff.symbols)
return itertools.chain(section_desc, ('',), group_desc)
def _DescribeSizeInfo(self, size_info):
section_desc = self._DescribeSectionSizes(size_info.section_sizes)
unsummed_sections, summed_sections = size_info.ClassifySections()
section_desc = self._DescribeSectionSizes(unsummed_sections,
summed_sections,
size_info.section_sizes)
group_desc = self.GenerateLines(size_info.symbols)
return itertools.chain(section_desc, ('',), group_desc)
......
......@@ -170,6 +170,27 @@ DIFF_COUNT_DELTA = [0, 0, 1, -1]
STRING_LITERAL_NAME = 'string literal'
def ClassifySections(section_names):
"""Returns section names subsets classified by contribution to binary size.
Args:
section_names: A list of existing sections names.
Returns:
Tuple (unsummed_sections, summed_sections). |unsummed_sections| are sections
that don't contribute to binary size. |summed_sections| are sections that
*explicitly* contribute to binary size. What's excluded are sections that
*implicitly* contribute to binary size -- these get lumped into the .other
section.
"""
unsummed_sections = set(name for name in section_names
if name in BSS_SECTIONS or '(' in name)
summed_sections = (set(section_names)
& set(SECTION_NAME_TO_SECTION.keys()) - unsummed_sections)
return frozenset(unsummed_sections), frozenset(summed_sections)
class BaseSizeInfo(object):
"""Base class for SizeInfo and DeltaSizeInfo.
......@@ -189,6 +210,7 @@ class BaseSizeInfo(object):
'_symbols',
'_native_symbols',
'_pak_symbols',
'_classified_sections',
)
def __init__(self, section_sizes, raw_symbols, symbols=None):
......@@ -199,6 +221,7 @@ class BaseSizeInfo(object):
self._symbols = symbols
self._native_symbols = None
self._pak_symbols = None
self._classified_sections = None
@property
def symbols(self):
......@@ -226,6 +249,11 @@ class BaseSizeInfo(object):
self._pak_symbols = self.raw_symbols.WhereIsPak()
return self._pak_symbols
def ClassifySections(self):
if not self._classified_sections:
self._classified_sections = ClassifySections(self.section_sizes.keys())
return self._classified_sections
class SizeInfo(BaseSizeInfo):
"""Represents all size information for a single binary.
......
Name,Size
.ARM.exidx,1536456
.bss,1300456
.data,101768
.data.rel.ro,1065224
.other,33902635
.data.rel.ro.local,790024
.other,90351129
.rel.dyn,2655384
.rodata,5927652
.strtab,34841854
......@@ -16,15 +16,39 @@ GroupCount,Address,SizeWithoutPadding,Padding,NumAliases,PSS,Section,Name
,0x2de7008,152,0,1,152.0,d,base::android::kBaseRegisteredMethods
,0x2de70a0,4,0,1,4.0,d,base::android::g_renderer_histogram_code
,0x2de70a4,4,0,1,4.0,d,base::android::g_library_version_number
,0x2de70a8,101600,0,1,101600.0,d,** .data (unattributed)
,0x2cd8500,56,0,1,56.0,R,ChromeMainDelegateAndroid [vtable]
,0x2cd8538,24,0,1,24.0,R,mojo::MessageReceiver [vtable]
,0x2cd8550,12,0,1,12.0,R,kMethodsAnimationFrameTimeHistogram
,0x2cd855c,1065132,0,1,1065132.0,R,** .data.rel.ro (unattributed)
,0x2c176f0,56,0,1,56.0,R,ChromeMainDelegate [vtable]
,0x2c17728,24,0,1,24.0,R,chrome::mojom::FieldTrialRecorder [vtable]
,0x2c17740,789904,0,1,789904.0,R,chrome::mojom::FieldTrialRecorderProxy [vtable]
,0x2cd84e0,16,16,1,32.0,R,.Lswitch.table.45
,0x2cd84f0,8,0,1,8.0,R,kSystemClassPrefixes
,0x0,60,0,1,60.0,o,** ELF Section: .ARM.attributes
,0x0,28,0,1,28.0,o,** ELF Section: .note.gnu.gold-version
,0x0,436,0,1,436.0,o,** ELF Section: .shstrtab
,0x0,34841854,0,1,34841854.0,o,** ELF Section: .strtab
,0x0,17166112,0,1,17166112.0,o,** ELF Section: .symtab
,0x0,0,33902635,1,33902635.0,o,Overhead: ELF file
,0x154,19,0,1,19.0,o,** ELF Section: .interp
,0x168,36,0,1,36.0,o,** ELF Section: .note.gnu.build-id
,0x18c,6496,0,1,6496.0,o,** ELF Section: .dynsym
,0x1b0c,4025,0,1,4025.0,o,** ELF Section: .dynstr
,0x2ad4,2684,0,1,2684.0,o,** ELF Section: .hash
,0x3558,812,0,1,812.0,o,** ELF Section: .gnu.version
,0x3888,28,0,1,28.0,o,** ELF Section: .gnu.version_d
,0x38a4,96,0,1,96.0,o,** ELF Section: .gnu.version_r
,0x3904,2655384,0,1,2655384.0,o,** ELF Section: .rel.dyn
,0x29fbec,2816,0,1,2816.0,o,** ELF Section: .rel.plt
,0x2a06ec,4244,0,1,4244.0,o,** ELF Section: .plt
,0x2bd3d10,1536456,0,1,1536456.0,o,** ELF Section: .ARM.exidx
,0x2bd5858,183632,0,1,183632.0,o,** ELF Section: .ARM.extab
,0x2ddc608,8,0,1,8.0,o,** ELF Section: .init_array
,0x2ddc6f4,8,0,1,8.0,o,** ELF Section: .fini_array
,0x2ddc6fc,304,0,1,304.0,o,** ELF Section: .dynamic
,0x2ddc834,42956,0,1,42956.0,o,** ELF Section: .got
,0x266e600,5,0,2,2.5,r,"""Str1"""
,0x266e600,5,0,2,2.5,r,"""Str1"""
,0x266e605,16,0,1,16.0,r,"""String literal2"""
......@@ -36,6 +60,7 @@ GroupCount,Address,SizeWithoutPadding,Padding,NumAliases,PSS,Section,Name
,0x284e398,32,0,1,32.0,r,chrome::mojom::FilePatcher::Name_
,0x28f3450,48,675992,1,676040.0,r,kAnimationFrameTimeHistogramClassPath
,0x28f3480,4,0,1,4.0,r,blink::CSSValueKeywordsHash::findValueImpl::value_word_list
,0x28f3484,3286096,0,1,3286096.0,r,** .rodata (unattributed)
,0x28d900,16,0,1,16.0,t,_GLOBAL__sub_I_page_allocator.cc
,0x28d910,56,0,1,56.0,t,_GLOBAL__sub_I_bbr_sender.cc
,0x28d948,28,0,1,28.0,t,_GLOBAL__sub_I_pacing_sender.cc
......@@ -56,6 +81,7 @@ GroupCount,Address,SizeWithoutPadding,Padding,NumAliases,PSS,Section,Name
,0x2a2000,32,4002,1,4034.0,t,** outlined function
,0x2a2020,48,0,2,24.0,t,** outlined function * 2
,0x2a2020,48,0,2,24.0,t,aliasedWithOutlinedFunction
,0x2a2050,35898456,0,1,35898456.0,t,** .text (unattributed)
,0x0,262144,0,1,262144.0,b,ff_cos_131072
,0x0,131072,0,1,131072.0,b,ff_cos_131072_fixed
,0x0,131072,0,1,131072.0,b,ff_cos_65536
......
......@@ -10,35 +10,35 @@ Section Sizes (Total=0 bytes (0 bytes)):
.bss: 0 bytes (0 bytes) (not included in totals)
.data: 0 bytes (0 bytes) (0.0%)
.data.rel.ro: 0 bytes (0 bytes) (0.0%)
.data.rel.ro.local: 0 bytes (0 bytes) (0.0%)
.pak.nontranslated: 0 bytes (0 bytes) (0.0%)
.pak.translations: 0 bytes (0 bytes) (0.0%)
.rel.dyn: 0 bytes (0 bytes) (0.0%)
.rel.dyn: 0 bytes (0 bytes) (0.0%) (counted in .other)
.rodata: 0 bytes (0 bytes) (0.0%)
.text: 0 bytes (0 bytes) (0.0%)
Other section sizes:
.ARM.attributes: 0 bytes (0 bytes)
.ARM.exidx: 0 bytes (0 bytes)
.ARM.extab: 0 bytes (0 bytes)
.data.rel.ro.local: 0 bytes (0 bytes)
.dynamic: 0 bytes (0 bytes)
.dynstr: 0 bytes (0 bytes)
.dynsym: 0 bytes (0 bytes)
.fini_array: 0 bytes (0 bytes)
.gnu.version: 0 bytes (0 bytes)
.gnu.version_d: 0 bytes (0 bytes)
.gnu.version_r: 0 bytes (0 bytes)
.got: 0 bytes (0 bytes)
.hash: 0 bytes (0 bytes)
.init_array: 0 bytes (0 bytes)
.interp: 0 bytes (0 bytes)
.note.gnu.build-id: 0 bytes (0 bytes)
.note.gnu.gold-version: 0 bytes (0 bytes)
.plt: 0 bytes (0 bytes)
.rel.plt: 0 bytes (0 bytes)
.shstrtab: 0 bytes (0 bytes)
.strtab: 0 bytes (0 bytes)
.symtab: 0 bytes (0 bytes)
.ARM.attributes: 0 bytes (0 bytes) (counted in .other)
.ARM.exidx: 0 bytes (0 bytes) (counted in .other)
.ARM.extab: 0 bytes (0 bytes) (counted in .other)
.dynamic: 0 bytes (0 bytes) (counted in .other)
.dynstr: 0 bytes (0 bytes) (counted in .other)
.dynsym: 0 bytes (0 bytes) (counted in .other)
.fini_array: 0 bytes (0 bytes) (counted in .other)
.gnu.version: 0 bytes (0 bytes) (counted in .other)
.gnu.version_d: 0 bytes (0 bytes) (counted in .other)
.gnu.version_r: 0 bytes (0 bytes) (counted in .other)
.got: 0 bytes (0 bytes) (counted in .other)
.hash: 0 bytes (0 bytes) (counted in .other)
.init_array: 0 bytes (0 bytes) (counted in .other)
.interp: 0 bytes (0 bytes) (counted in .other)
.note.gnu.build-id: 0 bytes (0 bytes) (counted in .other)
.note.gnu.gold-version: 0 bytes (0 bytes) (counted in .other)
.plt: 0 bytes (0 bytes) (counted in .other)
.rel.plt: 0 bytes (0 bytes) (counted in .other)
.shstrtab: 0 bytes (0 bytes) (counted in .other)
.strtab: 0 bytes (0 bytes) (counted in .other)
.symtab: 0 bytes (0 bytes) (counted in .other)
2 symbols added (+), 2 changed (~), 3 removed (-), 243 unchanged (not shown)
Added/Removed by section: .data: +2 .pak.translations: -3
......
......@@ -16,15 +16,16 @@ Section Sizes (Total=0 bytes (0 bytes)):
.bss: 0 bytes (0 bytes) (not included in totals)
.data: 0 bytes (0 bytes) (0.0%)
.data.rel.ro: 0 bytes (0 bytes) (0.0%)
.data.rel.ro.local: 0 bytes (0 bytes) (0.0%)
.other: 0 bytes (0 bytes) (0.0%)
.rel.dyn: 0 bytes (0 bytes) (0.0%)
.rel.dyn: 0 bytes (0 bytes) (0.0%) (counted in .other)
.rodata: 0 bytes (0 bytes) (0.0%)
.text: 0 bytes (0 bytes) (0.0%)
0 symbols added (+), 0 changed (~), 0 removed (-), 51 unchanged (not shown)
0 symbols added (+), 0 changed (~), 0 removed (-), 77 unchanged (not shown)
Added/Removed by section:
Of changed symbols, 0 grew, 0 shrank
Number of unique symbols 46 -> 46 (+0)
Number of unique symbols 72 -> 72 (+0)
0 paths added, 0 removed, 0 changed
Showing 0 symbols (0 -> 0 unique) with total pss: 0 bytes
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment