Commit d71f8940 authored by Jasper Chapman-Black's avatar Jasper Chapman-Black Committed by Commit Bot

Supersize: Include string literal prefix in name

Instead of naming string literals 'string literal', name ASCII-encodable
string literals using the first few characters of the literal (e.g.
"string contents" or "long string contents[...]").

Non-ASCII-encodable string literals are still called 'string literal',
and this change should be backwards-compatible.

Sample (filtered) output at
https://storage.googleapis.com/chrome-supersize/viewer.html?load_url=oneoffs%2Fsample-string-literals2.ndjson&byteunit=B&include=%22

This increased a sample .size filesize by 640K, about a 5% increase.

Bug: 939221
Change-Id: Iaf6a0200b1bf5ed9420f506377eb7b4d422d56cc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1815881
Commit-Queue: Jasper Chapman-Black <jaspercb@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#699097}
parent 3bc67512
......@@ -15,6 +15,7 @@ import logging
import os
import posixpath
import re
import string
import subprocess
import sys
import tempfile
......@@ -34,6 +35,7 @@ import ninja_parser
import nm
import obj_analyzer
import path_util
import string_extract
sys.path.insert(1, os.path.join(path_util.SRC_ROOT, 'tools', 'grit'))
from grit.format import data_pack
......@@ -159,6 +161,10 @@ def _NormalizeNames(raw_symbols):
elif symbol.IsDex():
symbol.full_name, symbol.template_name, symbol.name = (
function_signature.ParseJava(full_name))
elif symbol.IsStringLiteral():
symbol.full_name = full_name
symbol.template_name = full_name
symbol.name = full_name
elif symbol.IsNative():
# Remove [clone] suffix, and set flag accordingly.
# Search from left-to-right, as multiple [clone]s can exist.
......@@ -844,6 +850,24 @@ def _DeduceObjectPathForSwitchTables(raw_symbols, object_paths_by_name):
num_deduced, num_arbitrations, num_unassigned)
def _NameStringLiterals(raw_symbols, elf_path, tool_prefix):
# Assign ASCII-readable string literals names like "string contents".
STRING_LENGTH_CUTOFF = 30
for sym, name in string_extract.ReadStringLiterals(raw_symbols, elf_path,
tool_prefix):
# Newlines and tabs are used as delimiters in file_format.py
# At this point, names still have a terminating null byte.
name = name.translate(None, '\t\n').strip('\00')
is_printable = all(c in string.printable for c in name)
if not is_printable:
sym.full_name = models.STRING_LITERAL_NAME
elif len(name) > STRING_LENGTH_CUTOFF:
sym.full_name = '"{}[...]"'.format(name[:STRING_LENGTH_CUTOFF])
else:
sym.full_name = '"{}"'.format(name)
def _ParseElfInfo(map_path, elf_path, tool_prefix, track_string_literals,
outdir_context=None, linker_name=None):
"""Adds ELF section sizes and symbols."""
......@@ -956,6 +980,9 @@ def _ParseElfInfo(map_path, elf_path, tool_prefix, track_string_literals,
linker_map_parser.DeduceObjectPathsFromThinMap(raw_symbols, linker_map_extras)
if elf_path:
_NameStringLiterals(raw_symbols, elf_path, tool_prefix)
# If we have an ELF file, use its sizes as the source of truth, since some
# sections can differ from the .map.
return (elf_section_sizes if elf_path else map_section_sizes, raw_symbols,
......@@ -1521,7 +1548,7 @@ def CreateSizeInfo(
# Do not call _NormalizeNames() during archive since that method tends to need
# tweaks over time. Calling it only when loading .size files allows for more
# flexability.
# flexibility.
if normalize_names:
_NormalizeNames(raw_symbols)
......
......@@ -126,26 +126,8 @@ class _Session(object):
elf_path = self._ElfPathForSymbol(
size_info, tool_prefix, elf_path)
address, offset, _ = string_extract.LookupElfRodataInfo(
elf_path, tool_prefix)
adjust = offset - address
ret = []
with open(elf_path, 'rb') as f:
for symbol in thing:
if symbol.section != 'r' or (
not all_rodata and not symbol.IsStringLiteral()):
continue
f.seek(symbol.address + adjust)
data = f.read(symbol.size_without_padding)
# As of Oct 2017, there are ~90 symbols name .L.str(.##). These appear
# in the linker map file explicitly, and there doesn't seem to be a
# pattern as to which variables lose their kConstant name (the more
# common case), or which string literals don't get moved to
# ** merge strings (less common).
if symbol.IsStringLiteral() or (
all_rodata and data and data[-1] == '\0'):
ret.append((symbol, data))
return ret
return string_extract.ReadStringLiterals(
thing, elf_path, tool_prefix, all_rodata=all_rodata)
def _DiffFunc(self, before=None, after=None, sort=True):
"""Diffs two SizeInfo objects. Returns a DeltaSizeInfo.
......
......@@ -168,7 +168,6 @@ DIFF_COUNT_DELTA = [0, 0, 1, -1]
STRING_LITERAL_NAME = 'string literal'
class BaseSizeInfo(object):
"""Base class for SizeInfo and DeltaSizeInfo.
......@@ -359,7 +358,10 @@ class BaseSymbol(object):
self.name.endswith(']') and not self.name.endswith('[]'))
def IsStringLiteral(self):
return self.full_name == STRING_LITERAL_NAME
# String literals have names like "string" or "very_long_str[...]", while
# non-ASCII strings are named STRING_LITERAL_NAME.
return self.full_name.startswith(
'"') or self.full_name == STRING_LITERAL_NAME
# Used for diffs to know whether or not it is accurate to consider two symbols
# with the same name as being the same.
......
......@@ -10,6 +10,9 @@ LookupElfRodataInfo():
ReadFileChunks():
Reads raw data from a file, given a list of ranges in the file.
ReadStringLiterals():
Reads the ELF file to find the string contents of a list of string literals.
ResolveStringPiecesIndirect():
BulkForkAndCall() target: Given {path: [string addresses]} and
[raw_string_data for each string_section]:
......@@ -290,3 +293,29 @@ def ResolveStringPieces(encoded_strings_by_path, string_data):
ret = _AnnotateStringData(string_data, GeneratePathAndValues())
return [concurrent.EncodeDictOfLists(x) for x in ret]
def ReadStringLiterals(symbols, elf_path, tool_prefix, all_rodata=False):
"""Returns an iterable of (symbol, string) for all string literal symbols.
Args:
symbols: An iterable of Symbols
elf_path: Path to the executable containing the symbols.
all_rodata: Assume every symbol within .rodata that ends with a \0 is a
string literal.
"""
address, offset, _ = LookupElfRodataInfo(elf_path, tool_prefix)
adjust = offset - address
with open(elf_path, 'rb') as f:
for symbol in symbols:
if symbol.section != 'r':
continue
f.seek(symbol.address + adjust)
data = f.read(symbol.size_without_padding)
# As of Oct 2017, there are ~90 symbols name .L.str(.##). These appear
# in the linker map file explicitly, and there doesn't seem to be a
# pattern as to which variables lose their kConstant name (the more
# common case), or which string literals don't get moved to
# ** merge strings (less common).
if symbol.IsStringLiteral() or (all_rodata and data and data[-1] == '\0'):
yield ((symbol, data))
......@@ -288,9 +288,9 @@ Section .other: has 100.0% of 39228839 bytes accounted for from 5 symbols. 0 byt
.other@0(size_without_padding=1024,padding=0,full_name=res/drawable-v13/test.xml,object_path=,source_path=chrome/android/res/drawable/test.xml,flags={},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=764,full_name=Overhead: APK file,object_path=,source_path=,flags={},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=33984171,full_name=Overhead: ELF file,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name="String literal2",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e630(size_without_padding=16,padding=27,full_name=** merge strings,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284d600(size_without_padding=3425,padding=1961920,full_name=** merge constants,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284e364(size_without_padding=0,padding=3,full_name=** symbol gap 0,object_path=,source_path=,flags={},num_aliases=1,component=)
......
......@@ -85,9 +85,9 @@ Section .other: has 100.0% of 33984171 bytes accounted for from 1 symbols. 0 byt
.data.rel.ro.local@2cd84e0(size_without_padding=16,padding=16,full_name=.Lswitch.table.45,object_path=third_party/gvr-android-sdk/libgvr_shim_static_arm.a/libcontroller_api_impl.a_controller_api_impl.o,source_path=,flags={},num_aliases=1,component=)
.data.rel.ro.local@2cd84f0(size_without_padding=8,padding=0,full_name=kSystemClassPrefixes,object_path=third_party/gvr-android-sdk/libgvr_shim_static_arm.a/libport_android_jni.a_jni_utils.o,source_path=,flags={anon},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=33984171,full_name=Overhead: ELF file,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name="String literal2",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e630(size_without_padding=16,padding=27,full_name=** merge strings,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284d600(size_without_padding=3425,padding=1961920,full_name=** merge constants,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284e364(size_without_padding=0,padding=3,full_name=** symbol gap 0,object_path=,source_path=,flags={},num_aliases=1,component=)
......
......@@ -289,9 +289,9 @@ Section .other: has 100.0% of 39228839 bytes accounted for from 5 symbols. 0 byt
.other@0(size_without_padding=1024,padding=0,full_name=res/drawable-v13/test.xml,object_path=,source_path=chrome/android/res/drawable/test.xml,flags={},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=764,full_name=Overhead: APK file,object_path=,source_path=,flags={},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=33984171,full_name=Overhead: ELF file,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name="String literal2",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e630(size_without_padding=16,padding=27,full_name=** merge strings,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284d600(size_without_padding=3425,padding=1961920,full_name=** merge constants,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284e364(size_without_padding=0,padding=3,full_name=** symbol gap 0,object_path=,source_path=,flags={},num_aliases=1,component=)
......
......@@ -87,9 +87,9 @@ Section .other: has 100.0% of 33984171 bytes accounted for from 1 symbols. 0 byt
.data.rel.ro.local@2cd84e0(size_without_padding=16,padding=16,full_name=.Lswitch.table.45,object_path=third_party/gvr-android-sdk/libgvr_shim_static_arm.a/libcontroller_api_impl.a_controller_api_impl.o,source_path=,flags={},num_aliases=1,component=)
.data.rel.ro.local@2cd84f0(size_without_padding=8,padding=0,full_name=kSystemClassPrefixes,object_path=third_party/gvr-android-sdk/libgvr_shim_static_arm.a/libport_android_jni.a_jni_utils.o,source_path=,flags={anon},num_aliases=1,component=)
.other@0(size_without_padding=0,padding=33984171,full_name=Overhead: ELF file,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal)
.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=2,component=Internal>Android)
.rodata@266e605(size_without_padding=16,padding=0,full_name="String literal2",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android)
.rodata@266e630(size_without_padding=16,padding=27,full_name=** merge strings,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284d600(size_without_padding=3425,padding=1961920,full_name=** merge constants,object_path=,source_path=,flags={},num_aliases=1,component=)
.rodata@284e364(size_without_padding=0,padding=3,full_name=** symbol gap 0,object_path=,source_path=,flags={},num_aliases=1,component=)
......
......@@ -60,8 +60,8 @@ Print(c[-1].GroupedByPath(depth=2).Sorted())
# For even more inspiration, look at canned_queries.py
# (and feel free to add your own!).
0: (.rodata@266e600(size_without_padding=5,padding=0,full_name=string literal,object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal), 'Str1\x00')
1: (.rodata@266e605(size_without_padding=16,padding=0,full_name=string literal,object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android), 'String literal2\x00')
0: (.rodata@266e600(size_without_padding=5,padding=0,full_name="Str1",object_path=base/base/page_allocator.o,source_path=base/page_allocator.cc,flags={},num_aliases=2,component=Blink>Internal), 'Str1\x00')
1: (.rodata@266e605(size_without_padding=16,padding=0,full_name="String literal2",object_path=third_party/icu/icuuc/ucnv_ext.o,source_path=third_party/icu/ucnv_ext.c,flags={gen},num_aliases=1,component=Internal>Android), 'String literal2\x00')
Metadata:
elf_arch=arm
elf_build_id=WhatAnAmazingBuildId
......@@ -126,11 +126,11 @@ Index | Running Total | Section@Address | PSS | Path
13) 34774455 (92.7%) o@0x0 33984171 {no path}
Overhead: ELF file
14) 34774457 (92.7%) r@0x266e600 2.5 (size=5) base/page_allocator.cc
string literal (num_aliases=2)
"Str1" (num_aliases=2)
15) 34774460 (92.7%) r@0x266e600 2.5 (size=5) $root_gen_dir/third_party/icu/ucnv_ext.c
string literal (num_aliases=2)
"Str1" (num_aliases=2)
16) 34774476 (92.7%) r@0x266e605 16 $root_gen_dir/third_party/icu/ucnv_ext.c
string literal
"String literal2"
17) 34774519 (92.7%) r@0x266e630 43 {no path}
** merge strings
18) 36739864 (98.0%) r@0x284d600 1965345 {no path}
......
......@@ -25,9 +25,9 @@ GroupCount,Address,SizeWithoutPadding,Padding,NumAliases,PSS,Section,Name
,0x2cd84e0,16,16,1,32.0,R,.Lswitch.table.45
,0x2cd84f0,8,0,1,8.0,R,kSystemClassPrefixes
,0x0,0,33984171,1,33984171.0,o,Overhead: ELF file
,0x266e600,5,0,2,2.5,r,string literal
,0x266e600,5,0,2,2.5,r,string literal
,0x266e605,16,0,1,16.0,r,string literal
,0x266e600,5,0,2,2.5,r,"""Str1"""
,0x266e600,5,0,2,2.5,r,"""Str1"""
,0x266e605,16,0,1,16.0,r,"""String literal2"""
,0x266e630,16,27,1,43.0,r,** merge strings
,0x284d600,3425,1961920,1,1965345.0,r,** merge constants
,0x284e364,0,3,1,3.0,r,** symbol gap 0
......
......@@ -163,13 +163,13 @@ Index | Running Total | Section@Address | ...
flags={} name=Overhead: ELF file
14) 34774457 (92.7%) r@0x266e600 pss=2.5 (size=5) padding=0 num_aliases=2
source_path=base/page_allocator.cc object_path=base/base/page_allocator.o
flags={} name=string literal
flags={} name="Str1"
15) 34774460 (92.7%) r@0x266e600 pss=2.5 (size=5) padding=0 num_aliases=2
source_path=third_party/icu/ucnv_ext.c object_path=third_party/icu/icuuc/ucnv_ext.o
flags={gen} name=string literal
flags={gen} name="Str1"
16) 34774476 (92.7%) r@0x266e605 pss=16 padding=0 num_aliases=1
source_path=third_party/icu/ucnv_ext.c object_path=third_party/icu/icuuc/ucnv_ext.o
flags={gen} name=string literal
flags={gen} name="String literal2"
17) 34774519 (92.7%) r@0x266e630 pss=43 padding=27 num_aliases=1
source_path= object_path=
flags={} name=** merge strings
......@@ -344,13 +344,13 @@ Index | Running Total | Section@Address | ...
flags={} name=Overhead: ELF file
14) 34774457 (92.7%) r@0x266e600 pss=2.5 (size=5) padding=0 num_aliases=2
source_path=base/page_allocator.cc object_path=base/base/page_allocator.o
flags={} name=string literal
flags={} name="Str1"
15) 34774460 (92.7%) r@0x266e600 pss=2.5 (size=5) padding=0 num_aliases=2
source_path=third_party/icu/ucnv_ext.c object_path=third_party/icu/icuuc/ucnv_ext.o
flags={gen} name=string literal
flags={gen} name="Str1"
16) 34774476 (92.7%) r@0x266e605 pss=16 padding=0 num_aliases=1
source_path=third_party/icu/ucnv_ext.c object_path=third_party/icu/icuuc/ucnv_ext.o
flags={gen} name=string literal
flags={gen} name="String literal2"
17) 34774519 (92.7%) r@0x266e630 pss=43 padding=27 num_aliases=1
source_path= object_path=
flags={} name=** merge strings
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment