Commit b08bfffd authored by Samuel Huang's avatar Samuel Huang Committed by Commit Bot

[Supersize] Add LLD-LTO string literal support.

This is the last of the series of CLs to add LLD-LTO string literal
support to Supersize by parsing BC .o files when LLD-LTO is enabled.
Details:
- Add ResolveStringPieces() (alongside ResolveStringPiecesIndirect()) to
  process strings extracted from BC files.
- Check each .o files processed to decide if it's a BC file.
- nm is still used to process all .o files (including BC files) to
  extract symbols.
- Any BC .o files found are processed by the new code that calls
  llvm-bcanalyzer. This provides flexibility since some .o files may
  still be ELF.

Bug: 723798
Change-Id: I5b7d1db9f91a2cd6749fe4c57595ac46eee664a8
Reviewed-on: https://chromium-review.googlesource.com/1130006Reviewed-by: default avatarSamuel Huang <huangs@chromium.org>
Reviewed-by: default avataragrieve <agrieve@chromium.org>
Commit-Queue: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#577622}
parent 6735d97d
......@@ -50,6 +50,7 @@ import sys
import threading
import traceback
import bcanalyzer
import concurrent
import demangle
import nm
......@@ -81,9 +82,10 @@ def _MakeToolPrefixAbsolute(tool_prefix):
class _PathsByType:
def __init__(self, arch, obj):
def __init__(self, arch, obj, bc):
self.arch = arch
self.obj = obj
self.bc = bc
class _BulkObjectFileAnalyzerWorker(object):
......@@ -93,6 +95,7 @@ class _BulkObjectFileAnalyzerWorker(object):
self._list_of_encoded_elf_string_ranges_by_path = None
self._paths_by_name = collections.defaultdict(list)
self._encoded_string_addresses_by_path_chunks = []
self._encoded_strings_by_path_chunks = []
def _ClassifyPaths(self, paths):
"""Classifies |paths| (.o and .a files) by file type into separate lists.
......@@ -102,12 +105,19 @@ class _BulkObjectFileAnalyzerWorker(object):
"""
arch_paths = []
obj_paths = []
bc_paths = []
for path in paths:
if path.endswith('.a'):
# .a files are typically system libraries containing .o files that are
# ELF files (and never BC files).
arch_paths.append(path)
elif bcanalyzer.IsBitcodeFile(os.path.join(self._output_directory, path)):
# Chromium build tools create BC files with .o extension. As a result,
# IsBitcodeFile() is needed to distinguish BC files from ELF .o files.
bc_paths.append(path)
else:
obj_paths.append(path)
return _PathsByType(arch=arch_paths, obj=obj_paths)
return _PathsByType(arch=arch_paths, obj=obj_paths, bc=bc_paths)
def _MakeBatches(self, paths, size=None):
if size is None:
......@@ -124,12 +134,13 @@ class _BulkObjectFileAnalyzerWorker(object):
output_directory=self._output_directory)
def _RunNm(self, paths_by_type):
"""Calls nm to get symbols and string addresses."""
"""Calls nm to get symbols and (for non-BC files) string addresses."""
# Downstream functions rely upon .a not being grouped.
batches = self._MakeBatches(paths_by_type.arch, None)
# Combine object files and Bitcode files for nm
# Combine object files and Bitcode files for nm.
BATCH_SIZE = 50 # Arbitrarily chosen.
batches.extend(self._MakeBatches(paths_by_type.obj, BATCH_SIZE))
batches.extend(
self._MakeBatches(paths_by_type.obj + paths_by_type.bc, BATCH_SIZE))
results = self._DoBulkFork(nm.RunNmOnIntermediates, batches)
# Names are still mangled.
......@@ -143,12 +154,24 @@ class _BulkObjectFileAnalyzerWorker(object):
if encoded_strs != concurrent.EMPTY_ENCODED_DICT:
self._encoded_string_addresses_by_path_chunks.append(encoded_strs)
def _RunLlvmBcAnalyzer(self, paths_by_type):
"""Calls llvm-bcanalyzer to extract string data (for LLD-LTO)."""
BATCH_SIZE = 50 # Arbitrarily chosen.
batches = self._MakeBatches(paths_by_type.bc, BATCH_SIZE)
results = self._DoBulkFork(
bcanalyzer.RunBcAnalyzerOnIntermediates, batches)
for encoded_strs in results:
if encoded_strs != concurrent.EMPTY_ENCODED_DICT:
self._encoded_strings_by_path_chunks.append(encoded_strs);
def AnalyzePaths(self, paths):
logging.debug('worker: AnalyzePaths() started.')
paths_by_type = self._ClassifyPaths(paths)
logging.info('File counts: {\'arch\': %d, \'obj\': %d}',
len(paths_by_type.arch), len(paths_by_type.obj))
logging.info('File counts: {\'arch\': %d, \'obj\': %d, \'bc\': %d}',
len(paths_by_type.arch), len(paths_by_type.obj),
len(paths_by_type.bc))
self._RunNm(paths_by_type)
self._RunLlvmBcAnalyzer(paths_by_type)
logging.debug('worker: AnalyzePaths() completed.')
def SortPaths(self):
......@@ -179,6 +202,14 @@ class _BulkObjectFileAnalyzerWorker(object):
output_directory=self._output_directory)
return list(results)
def _GetEncodedRangesFromStrings(self, string_data):
params = ((chunk,) for chunk in self._encoded_strings_by_path_chunks)
# Order of the jobs doesn't matter since each job owns independent paths,
# and our output is a dict where paths are the key.
results = concurrent.BulkForkAndCall(
string_extract.ResolveStringPieces, params, string_data=string_data)
return list(results)
def AnalyzeStringLiterals(self, elf_path, elf_string_ranges):
logging.debug('worker: AnalyzeStringLiterals() started.')
string_data = self._ReadElfStringData(elf_path, elf_string_ranges)
......@@ -186,6 +217,7 @@ class _BulkObjectFileAnalyzerWorker(object):
# [source_idx][batch_idx][section_idx] -> Encoded {path: [string_ranges]}.
encoded_ranges_sources = [
self._GetEncodedRangesFromStringAddresses(string_data),
self._GetEncodedRangesFromStrings(string_data),
]
# [section_idx] -> {path: [string_ranges]}.
self._list_of_encoded_elf_string_ranges_by_path = []
......
......@@ -18,8 +18,17 @@ ResolveStringPiecesIndirect():
each string_section. If found, translates to string_range and annotates it
to the string_section.
- Returns [{path: [string_ranges]} for each string_section].
ResolveStringPieces():
BulkForkAndCall() target: Given {path: [strings]} and
[raw_string_data for each string_section]:
- For each path, searches for src_strings in at most 1 raw_string_data over
each string_section. If found, translates to string_range and annotates it
to the string_section.
- Returns [{path: [string_ranges]} for each string_section].
"""
import ast
import collections
import itertools
import logging
......@@ -266,3 +275,18 @@ def ResolveStringPiecesIndirect(encoded_string_addresses_by_path, string_data,
ret = _AnnotateStringData(string_data, GeneratePathAndValues())
return [concurrent.EncodeDictOfLists(x) for x in ret]
# This is a target for BulkForkAndCall().
def ResolveStringPieces(encoded_strings_by_path, string_data):
# ast.literal_eval() undoes repr() applied to strings.
strings_by_path = concurrent.DecodeDictOfLists(
encoded_strings_by_path, value_transform=ast.literal_eval)
def GeneratePathAndValues():
for path, strings in strings_by_path.iteritems():
for value in strings:
yield path, value
ret = _AnnotateStringData(string_data, GeneratePathAndValues())
return [concurrent.EncodeDictOfLists(x) for x in ret]
......@@ -42,6 +42,7 @@
libsupersize/apkanalyzer.py
libsupersize/ar.py
libsupersize/archive.py
libsupersize/bcanalyzer.py
libsupersize/canned_queries.py
libsupersize/concurrent.py
libsupersize/console.py
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment