Commit 3c5b0b54 authored by Andrew Grieve's avatar Andrew Grieve Committed by Commit Bot

SuperSize: Improve logic for when to omit symbols in html_report.py

It was using a hardcoded MAX_SYMBOLS. It now will keep adding symbols
until 95% of the size is attributed.

This increases the size of ndjson files for a monochrome .size file, but
should reduce the size of diffs.

Change-Id: I56146b20af43b0a47209943ea03bce98d562281f
Reviewed-on: https://chromium-review.googlesource.com/1176177
Commit-Queue: agrieve <agrieve@chromium.org>
Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#583496}
parent 45f3faa7
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import codecs import codecs
import collections import collections
import itertools
import json import json
import logging import logging
import os import os
...@@ -13,6 +14,7 @@ import os ...@@ -13,6 +14,7 @@ import os
import archive import archive
import diff import diff
import models import models
import path_util
_SYMBOL_TYPE_VTABLE = 'v' _SYMBOL_TYPE_VTABLE = 'v'
...@@ -42,7 +44,14 @@ _SMALL_SYMBOL_DESCRIPTIONS = { ...@@ -42,7 +44,14 @@ _SMALL_SYMBOL_DESCRIPTIONS = {
'o': 'Other small entries', 'o': 'Other small entries',
} }
_DEFAULT_SYMBOL_COUNT = 250000 # Always emit this many distict symbols (if present), even when small.
# No need to optimize file size at this point).
_MIN_SYMBOL_COUNT = 1000
# Small symbols grouped into "other" symbols may not comprise more than this
# fraction of total size.
_MAX_OTHER_SYMBOL_COVERAGE = .05
# Don't insert "other" symbols smaller than this (just noise at this point).
_MIN_OTHER_PSS = 1
def _GetSymbolType(symbol): def _GetSymbolType(symbol):
...@@ -54,11 +63,10 @@ def _GetSymbolType(symbol): ...@@ -54,11 +63,10 @@ def _GetSymbolType(symbol):
return symbol_type return symbol_type
def _GetOrAddFileNode(symbol, file_nodes, components): def _GetOrAddFileNode(path, component, file_nodes, components):
path = symbol.source_path or symbol.object_path
file_node = file_nodes.get(path) file_node = file_nodes.get(path)
if file_node is None: if file_node is None:
component_index = components.GetOrAdd(symbol.component) component_index = components.GetOrAdd(component)
file_node = { file_node = {
_COMPACT_FILE_PATH_KEY: path, _COMPACT_FILE_PATH_KEY: path,
_COMPACT_FILE_COMPONENT_INDEX_KEY: component_index, _COMPACT_FILE_COMPONENT_INDEX_KEY: component_index,
...@@ -88,6 +96,29 @@ class IndexedSet(object): ...@@ -88,6 +96,29 @@ class IndexedSet(object):
return index return index
def _PartitionSymbols(symbols):
# Dex methods (type "m") are whitelisted for the method_count mode on the
# UI. It's important to see details on all the methods.
dex_symbols = symbols.WhereIsDex()
ordered_symbols = dex_symbols.Inverted().Sorted()
abs_pss_target = (1 - _MAX_OTHER_SYMBOL_COVERAGE) * sum(
abs(s.pss) for s in ordered_symbols)
running_abs_pss = 0
ordered_count = 0
for ordered_count, s in enumerate(ordered_symbols):
running_abs_pss += abs(s.pss)
if running_abs_pss > abs_pss_target and ordered_count >= _MIN_SYMBOL_COUNT:
break
main_symbols = itertools.chain(dex_symbols, ordered_symbols[:ordered_count])
extra_symbols = ordered_symbols[ordered_count:]
logging.info('Found %d large symbols, %s small symbols',
len(dex_symbols) + ordered_count, len(extra_symbols))
return main_symbols, extra_symbols
def _MakeTreeViewList(symbols, include_all_symbols): def _MakeTreeViewList(symbols, include_all_symbols):
"""Builds JSON data of the symbols for the tree view HTML report. """Builds JSON data of the symbols for the tree view HTML report.
...@@ -100,27 +131,18 @@ def _MakeTreeViewList(symbols, include_all_symbols): ...@@ -100,27 +131,18 @@ def _MakeTreeViewList(symbols, include_all_symbols):
""" """
file_nodes = {} file_nodes = {}
components = IndexedSet() components = IndexedSet()
# Dict of path -> type -> accumulated pss.
small_symbol_pss = collections.defaultdict(
lambda: collections.defaultdict(float))
# Build a container for symbols smaller than min_symbol_size
small_symbols = collections.defaultdict(dict)
# Dex methods (type "m") are whitelisted for the method_count mode on the
# UI. It's important to see details on all the methods.
dex_symbols = symbols.WhereIsDex()
ordered_symbols = dex_symbols.Inverted().Sorted()
if include_all_symbols: if include_all_symbols:
symbol_count = len(ordered_symbols) main_symbols, extra_symbols = symbols, []
else: else:
symbol_count = max(_DEFAULT_SYMBOL_COUNT - len(dex_symbols), 0) logging.info('Partitioning symbols...')
main_symbols, extra_symbols = _PartitionSymbols(symbols)
main_symbols = dex_symbols + ordered_symbols[:symbol_count] # Bundle symbols by the file they belong to.
extra_symbols = ordered_symbols[symbol_count:] # Add all the file buckets into file_nodes.
logging.info('Found %d large symbols, %s small symbols',
len(main_symbols), len(extra_symbols))
# Bundle symbols by the file they belong to,
# and add all the file buckets into file_nodes
for symbol in main_symbols: for symbol in main_symbols:
symbol_type = _GetSymbolType(symbol) symbol_type = _GetSymbolType(symbol)
symbol_size = round(symbol.pss, 2) symbol_size = round(symbol.pss, 2)
...@@ -130,7 +152,9 @@ def _MakeTreeViewList(symbols, include_all_symbols): ...@@ -130,7 +152,9 @@ def _MakeTreeViewList(symbols, include_all_symbols):
if symbol.IsDelta() and symbol.diff_status == models.DIFF_STATUS_REMOVED: if symbol.IsDelta() and symbol.diff_status == models.DIFF_STATUS_REMOVED:
symbol_count = -1 symbol_count = -1
file_node = _GetOrAddFileNode(symbol, file_nodes, components) path = symbol.source_path or symbol.object_path
file_node = _GetOrAddFileNode(
path, symbol.component, file_nodes, components)
is_dex_method = symbol_type == _SYMBOL_TYPE_DEX_METHOD is_dex_method = symbol_type == _SYMBOL_TYPE_DEX_METHOD
symbol_entry = { symbol_entry = {
...@@ -149,23 +173,37 @@ def _MakeTreeViewList(symbols, include_all_symbols): ...@@ -149,23 +173,37 @@ def _MakeTreeViewList(symbols, include_all_symbols):
symbol_entry[_COMPACT_SYMBOL_FLAGS_KEY] = symbol.flags symbol_entry[_COMPACT_SYMBOL_FLAGS_KEY] = symbol.flags
file_node[_COMPACT_FILE_SYMBOLS_KEY].append(symbol_entry) file_node[_COMPACT_FILE_SYMBOLS_KEY].append(symbol_entry)
# Collect small symbols into a per-path dict.
for symbol in extra_symbols: for symbol in extra_symbols:
symbol_type = _GetSymbolType(symbol) symbol_type = _GetSymbolType(symbol)
path = symbol.source_path or symbol.object_path
file_node = _GetOrAddFileNode(symbol, file_nodes, components) tup = (path, symbol.component)
path = file_node[_COMPACT_FILE_PATH_KEY] small_symbol_pss[tup][symbol_type] += symbol.pss
small_type_symbol = small_symbols[path].get(symbol_type) # Insert small symbols.
if small_type_symbol is None: inserted_smalls_count = 0
small_type_symbol = { inserted_smalls_abs_pss = 0
skipped_smalls_count = 0
skipped_smalls_abs_pss = 0
for tup, type_to_pss in small_symbol_pss.iteritems():
path, component = tup
for symbol_type, pss in type_to_pss.iteritems():
if abs(pss) < _MIN_OTHER_PSS:
skipped_smalls_count += 1
skipped_smalls_abs_pss += abs(pss)
else:
inserted_smalls_count += 1
inserted_smalls_abs_pss += abs(pss)
file_node = _GetOrAddFileNode(path, component, file_nodes, components)
file_node[_COMPACT_FILE_SYMBOLS_KEY].append({
_COMPACT_SYMBOL_NAME_KEY: _SMALL_SYMBOL_DESCRIPTIONS[symbol_type], _COMPACT_SYMBOL_NAME_KEY: _SMALL_SYMBOL_DESCRIPTIONS[symbol_type],
_COMPACT_SYMBOL_TYPE_KEY: symbol_type, _COMPACT_SYMBOL_TYPE_KEY: symbol_type,
_COMPACT_SYMBOL_BYTE_SIZE_KEY: 0, _COMPACT_SYMBOL_BYTE_SIZE_KEY: pss,
} })
small_symbols[path][symbol_type] = small_type_symbol logging.debug(
file_node[_COMPACT_FILE_SYMBOLS_KEY].append(small_type_symbol) 'Created %d "other" symbols with PSS=%.1f. Omitted %d with PSS=%.1f',
inserted_smalls_count, inserted_smalls_abs_pss, skipped_smalls_count,
small_type_symbol[_COMPACT_SYMBOL_BYTE_SIZE_KEY] += symbol.pss skipped_smalls_abs_pss)
meta = { meta = {
'components': components.value_list, 'components': components.value_list,
...@@ -200,7 +238,6 @@ def BuildReport(out_file, size_file, before_size_file=(None, None), ...@@ -200,7 +238,6 @@ def BuildReport(out_file, size_file, before_size_file=(None, None),
else: else:
symbols = size_info.raw_symbols symbols = size_info.raw_symbols
logging.info('Creating JSON objects')
meta, tree_nodes = _MakeTreeViewList(symbols, all_symbols) meta, tree_nodes = _MakeTreeViewList(symbols, all_symbols)
meta.update({ meta.update({
'diff_mode': diff_mode, 'diff_mode': diff_mode,
...@@ -242,9 +279,9 @@ def _MakeDirIfDoesNotExist(rel_path): ...@@ -242,9 +279,9 @@ def _MakeDirIfDoesNotExist(rel_path):
def AddArguments(parser): def AddArguments(parser):
parser.add_argument('input_file', parser.add_argument('input_size_file',
help='Path to input .size file.') help='Path to input .size file.')
parser.add_argument('--report-file', metavar='PATH', required=True, parser.add_argument('output_report_file',
help='Write generated data to the specified ' help='Write generated data to the specified '
'.ndjson file.') '.ndjson file.')
parser.add_argument('--all-symbols', action='store_true', parser.add_argument('--all-symbols', action='store_true',
...@@ -255,22 +292,23 @@ def AddArguments(parser): ...@@ -255,22 +292,23 @@ def AddArguments(parser):
def Run(args, parser): def Run(args, parser):
if not args.input_file.endswith('.size'): if not args.input_size_file.endswith('.size'):
parser.error('Input must end with ".size"') parser.error('Input must end with ".size"')
if args.diff_with and not args.diff_with.endswith('.size'): if args.diff_with and not args.diff_with.endswith('.size'):
parser.error('Diff input must end with ".size"') parser.error('Diff input must end with ".size"')
if not args.report_file.endswith('.ndjson'): if not args.output_report_file.endswith('.ndjson'):
parser.error('Output must end with ".ndjson"') parser.error('Output must end with ".ndjson"')
with codecs.open(args.report_file, 'w', encoding='ascii') as out_file: with codecs.open(args.output_report_file, 'w', encoding='ascii') as out_file:
BuildReport( BuildReport(
out_file, out_file,
size_file=(args.input_file, None), size_file=(args.input_size_file, None),
before_size_file=(args.diff_with, None), before_size_file=(args.diff_with, None),
all_symbols=args.all_symbols all_symbols=args.all_symbols
) )
logging.warning('Report saved to %s', args.report_file) logging.warning('Report saved to %s', args.output_report_file)
logging.warning('Open server by running: \n' supersize_path = os.path.relpath(os.path.join(
'tools/binary_size/supersize start_server %s', path_util.SRC_ROOT, 'tools', 'binary_size', 'supersize'))
args.report_file) logging.warning('Open server by running: \n %s start_server %s',
supersize_path, args.output_report_file)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment