[Supersize] Split nm.py into {nm.py, obj_analyzer.py, string_extract.py}.

This is a code movement CL with minimal changes. Details: - Add obj_analyzer.py: New home for BulkObjectFilenalyzer, along with helper functions. Also inherits main() from nm.py for testing. - Add string_extract.py: New home for {LookupElfRodataInfo(), ReadFileChunks(), and ResolveStringPieces()}. Add top-level comments. - nm.py: Have content moved to the new files. Also, exposing RunNmOnIntermediates(), to be called from obj_analyzer.py. - Update archive.py and console.py to adapt to new code locations. Bug: 723798 Change-Id: I1d1670f04549a416f06de1da03c1a2b03c378461 Reviewed-on: https://chromium-review.googlesource.com/1136943 Commit-Queue: Samuel Huang <huangs@chromium.org> Reviewed-by: agrieve <agrieve@chromium.org> Cr-Commit-Position: refs/heads/master@{#575062}

[Supersize] Split nm.py into {nm.py, obj_analyzer.py, string_extract.py}.
This is a code movement CL with minimal changes. Details: - Add obj_analyzer.py: New home for BulkObjectFilenalyzer, along with helper functions. Also inherits main() from nm.py for testing. - Add string_extract.py: New home for {LookupElfRodataInfo(), ReadFileChunks(), and ResolveStringPieces()}. Add top-level comments. - nm.py: Have content moved to the new files. Also, exposing RunNmOnIntermediates(), to be called from obj_analyzer.py. - Update archive.py and console.py to adapt to new code locations. Bug: 723798 Change-Id: I1d1670f04549a416f06de1da03c1a2b03c378461 Reviewed-on: https://chromium-review.googlesource.com/1136943 Commit-Queue: Samuel Huang <huangs@chromium.org> Reviewed-by: agrieve <agrieve@chromium.org> Cr-Commit-Position: refs/heads/master@{#575062}
5fbc1731 · Samuel Huang · Commit Bot · af58eea6 · 5fbc1731 · 5fbc1731
Commit 5fbc1731 authored Jul 13, 2018 by Samuel Huang Committed by Commit Bot Jul 13, 2018
6 changed files
--- a/tools/binary_size/libsupersize/archive.py
+++ b/tools/binary_size/libsupersize/archive.py
@@ -30,6 +30,7 @@ import linker_map_parser
 import models
 import ninja_parser
 import nm
+import obj_analyzer
 import path_util
 sys.path.insert(1, os.path.join(path_util.SRC_ROOT, 'tools', 'grit'))
@@ -766,7 +767,7 @@ def _ParseElfInfo(map_path, elf_path, tool_prefix, track_string_literals,
    # Rather than record all paths for each symbol, set the paths to be the
    # common ancestor of all paths.
    if outdir_context:
-      bulk_analyzer = nm.BulkObjectFileAnalyzer(
+      bulk_analyzer = obj_analyzer.BulkObjectFileAnalyzer(
          tool_prefix, outdir_context.output_directory)
      bulk_analyzer.AnalyzePaths(outdir_context.elf_object_paths)

--- a/tools/binary_size/libsupersize/console.py
+++ b/tools/binary_size/libsupersize/console.py
@@ -23,8 +23,8 @@ import diff
 import file_format
 import match_util
 import models
-import nm
 import path_util
+import string_extract
 # Number of lines before using less for Print().
@@ -122,7 +122,8 @@ class _Session(object):
    elf_path = self._ElfPathForSymbol(
        size_info, tool_prefix, elf_path)
-    address, offset, _ = nm.LookupElfRodataInfo(elf_path, tool_prefix)
+    address, offset, _ = string_extract.LookupElfRodataInfo(
+        elf_path, tool_prefix)
    adjust = offset - address
    ret = []
    with open(elf_path, 'rb') as f:

--- a/tools/binary_size/libsupersize/nm.py
+++ b/tools/binary_size/libsupersize/nm.py
-#!/usr/bin/env python
 # Copyright 2017 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
@@ -7,85 +6,27 @@
 The design of this file is entirely to work around Python's lack of concurrency.
-CollectAliasesByAddress:
+CollectAliasesByAddress():
  Runs "nm" on the elf to collect all symbol names. This reveals symbol names of
  identical-code-folded functions.
-CollectAliasesByAddressAsync:
+CollectAliasesByAddressAsync():
  Runs CollectAliasesByAddress in a subprocess and returns a promise.
-_BulkObjectFileAnalyzerMaster:
+RunNmOnIntermediates():
-  Creates a subprocess and sends IPCs to it asking it to do work.
+  BulkForkAndCall() target: Runs nm on a .a file or a list of .o files, parses
+  the output, extracts symbol information, and (if available) extracts string
-_BulkObjectFileAnalyzerSlave:
+  offset information.
-  Receives IPCs and delegates logic to _BulkObjectFileAnalyzerWorker.
-  Runs _BulkObjectFileAnalyzerWorker on a background thread in order to stay
-  responsive to IPCs.
-_BulkObjectFileAnalyzerWorker:
-  Performs the actual work. Uses Process Pools to shard out per-object-file
-  work and then aggregates results.
-BulkObjectFileAnalyzer:
-  Alias for _BulkObjectFileAnalyzerMaster, but when SUPERSIZE_DISABLE_ASYNC=1,
-  alias for _BulkObjectFileAnalyzerWorker.
-  * AnalyzePaths: Run "nm" on all .o files to collect symbol names that exist
-    within each. Does not work with thin archives (expand them first).
-  * SortPaths: Sort results of AnalyzePaths().
-  * AnalyzeStringLiterals: Must be run after AnalyzePaths() has completed.
-    Extracts string literals from .o files, and then locates them within the
-    "** merge strings" sections within an ELF's .rodata section.
-This file can also be run stand-alone in order to test out the logic on smaller
-sample sizes.
 """
-from __future__ import print_function
-import argparse
-import atexit
 import collections
-import errno
-import itertools
-import logging
 import os
-import multiprocessing
-import Queue
-import signal
 import subprocess
-import sys
-import threading
-import traceback
-import ar
 import concurrent
 import demangle
-import models
 import path_util
-_MSG_ANALYZE_PATHS = 1
-_MSG_SORT_PATHS = 2
-_MSG_ANALYZE_STRINGS = 3
-_MSG_GET_SYMBOL_NAMES = 4
-_MSG_GET_STRINGS = 5
-_active_pids = None
-def _DecodePosition(x):
-  # Encoded as "123:123"
-  sep_idx = x.index(':')
-  return (int(x[:sep_idx]), int(x[sep_idx + 1:]))
-def _MakeToolPrefixAbsolute(tool_prefix):
-  # Ensure tool_prefix is absolute so that CWD does not affect it
-  if os.path.sep in tool_prefix:
-    # Use abspath() on the dirname to avoid it stripping a trailing /.
-    dirname = os.path.dirname(tool_prefix)
-    tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
-  return tool_prefix
 def _IsRelevantNmName(name):
  # Skip lines like:
@@ -184,86 +125,6 @@ def CollectAliasesByAddressAsync(elf_path, tool_prefix):
      decode_func=decode)
-def _LookupStringSectionPositions(target, tool_prefix, output_directory):
-  """Returns a dict of object_path -> [(offset, size)...] of .rodata sections.
-  Args:
-    target: An archive path string (e.g., "foo.a") or a list of object paths.
-  """
-  is_archive = isinstance(target, basestring)
-  args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide']
-  if is_archive:
-    args.append(target)
-  else:
-    # Assign path for when len(target) == 1, (no File: line exists).
-    path = target[0]
-    args.extend(target)
-  output = subprocess.check_output(args, cwd=output_directory)
-  lines = output.splitlines()
-  section_positions_by_path = {}
-  cur_offsets = []
-  for line in lines:
-    # File: base/third_party/libevent/libevent.a(buffer.o)
-    # [Nr] Name              Type        Addr     Off    Size   ES Flg Lk Inf Al
-    # [11] .rodata.str1.1    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  1
-    # [11] .rodata.str4.4    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  4
-    # [11] .rodata.str8.8    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  8
-    # [80] .rodata..L.str    PROGBITS    00000000 000530 000002 00   A  0   0  1
-    # The various string sections differ by alignment.
-    # The presence of a wchar_t literal (L"asdf") seems to make a str4 section.
-    # When multiple sections exist, nm gives us no indication as to which
-    # section each string corresponds to.
-    if line.startswith('File: '):
-      if cur_offsets:
-        section_positions_by_path[path] = cur_offsets
-        cur_offsets = []
-      path = line[6:]
-    elif '.rodata.' in line:
-      progbits_idx = line.find('PROGBITS ')
-      if progbits_idx != -1:
-        fields = line[progbits_idx:].split()
-        position = (int(fields[2], 16), int(fields[3], 16))
-        # The heuristics in _IterStringLiterals rely on str1 coming first.
-        if fields[-1] == '1':
-          cur_offsets.insert(0, position)
-        else:
-          cur_offsets.append(position)
-  if cur_offsets:
-    section_positions_by_path[path] = cur_offsets
-  return section_positions_by_path
-def LookupElfRodataInfo(elf_path, tool_prefix):
-  """Returns (address, offset, size) for the .rodata section."""
-  args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide', elf_path]
-  output = subprocess.check_output(args)
-  lines = output.splitlines()
-  for line in lines:
-    # [Nr] Name           Type        Addr     Off     Size   ES Flg Lk Inf Al
-    # [07] .rodata        PROGBITS    025e7000 237c000 5ec4f6 00   A  0   0 256
-    if '.rodata ' in line:
-      fields = line[line.index(models.SECTION_RODATA):].split()
-      return int(fields[2], 16), int(fields[3], 16), int(fields[4], 16)
-  raise AssertionError('No .rodata for command: ' + repr(args))
-def _ReadFileChunks(path, positions):
-  """Returns a list of strings corresponding to |positions|.
-  Args:
-    positions: List of (offset, size).
-  """
-  ret = []
-  if not positions:
-    return ret
-  with open(path, 'rb') as f:
-    for offset, size in positions:
-      f.seek(offset)
-      ret.append(f.read(size))
-  return ret
 def _ParseOneObjectFileNmOutput(lines):
  # Constructors are often repeated because they have the same unmangled
  # name, but multiple mangled names. See:
@@ -288,145 +149,8 @@ def _ParseOneObjectFileNmOutput(lines):
  return string_addresses, symbol_names
-def _ReadStringSections(target, output_directory, positions_by_path):
-  """Returns a dict of object_path -> [string...] of .rodata chunks.
-  Args:
-    target: An archive path string (e.g., "foo.a") or a list of object paths.
-    positions_by_path: A dict of object_path -> [(offset, size)...]
-  """
-  is_archive = isinstance(target, basestring)
-  string_sections_by_path = {}
-  if is_archive:
-    for subpath, chunk in ar.IterArchiveChunks(
-        os.path.join(output_directory, target)):
-      path = '{}({})'.format(target, subpath)
-      positions = positions_by_path.get(path)
-      # No positions if file has no string literals.
-      if positions:
-        string_sections_by_path[path] = (
-            [chunk[offset:offset + size] for offset, size in positions])
-  else:
-    for path in target:
-      positions = positions_by_path.get(path)
-      # We already log a warning about this in _IterStringLiterals().
-      if positions:
-        string_sections_by_path[path] = _ReadFileChunks(
-            os.path.join(output_directory, path), positions)
-  return string_sections_by_path
-def _ExtractArchivePath(path):
-  # E.g. foo/bar.a(baz.o)
-  if path.endswith(')'):
-    start_idx = path.index('(')
-    return path[:start_idx]
-  return None
-def _IterStringLiterals(path, addresses, obj_sections):
-  """Yields all string literals (including \0) for the given object path.
-  Args:
-    path: Object file path.
-    addresses: List of string offsets encoded as hex strings.
-    obj_sections: List of contents of .rodata.str sections read from the given
-        object file.
-  """
-  next_offsets = sorted(int(a, 16) for a in addresses)
-  if not obj_sections:
-    # Happens when there is an address for a symbol which is not actually a
-    # string literal, or when string_sections_by_path is missing an entry.
-    logging.warning('Object has %d strings but no string sections: %s',
-                    len(addresses), path)
-    return
-  for section_data in obj_sections:
-    cur_offsets = next_offsets
-    # Always assume first element is 0. I'm not entirely sure why this is
-    # necessary, but strings get missed without it.
-    next_offsets = [0]
-    prev_offset = 0
-    # TODO(agrieve): Switch to using nm --print-size in order to capture the
-    #     address+size of each string rather than just the address.
-    for offset in cur_offsets[1:]:
-      if offset >= len(section_data):
-        # Remaining offsets are for next section.
-        next_offsets.append(offset)
-        continue
-      # Figure out which offsets apply to this section via heuristic of them
-      # all ending with a null character.
-      if offset == prev_offset or section_data[offset - 1] != '\0':
-        next_offsets.append(offset)
-        continue
-      yield section_data[prev_offset:offset]
-      prev_offset = offset
-    if prev_offset < len(section_data):
-      yield section_data[prev_offset:]
-# This is a target for BulkForkAndCall().
-def _ResolveStringPieces(encoded_string_addresses_by_path, string_data,
-                         tool_prefix, output_directory):
-  string_addresses_by_path = concurrent.DecodeDictOfLists(
-      encoded_string_addresses_by_path)
-  # Assign |target| as archive path, or a list of object paths.
-  any_path = next(string_addresses_by_path.iterkeys())
-  target = _ExtractArchivePath(any_path)
-  if not target:
-    target = string_addresses_by_path.keys()
-  # Run readelf to find location of .rodata within the .o files.
-  section_positions_by_path = _LookupStringSectionPositions(
-      target, tool_prefix, output_directory)
-  # Load the .rodata sections (from object files) as strings.
-  string_sections_by_path = _ReadStringSections(
-      target, output_directory, section_positions_by_path)
-  # list of elf_positions_by_path.
-  ret = [collections.defaultdict(list) for _ in string_data]
-  # Brute-force search of strings within ** merge strings sections.
-  # This is by far the slowest part of AnalyzeStringLiterals().
-  # TODO(agrieve): Pre-process string_data into a dict of literal->address (at
-  #     least for ascii strings).
-  for path, object_addresses in string_addresses_by_path.iteritems():
-    for value in _IterStringLiterals(
-        path, object_addresses, string_sections_by_path.get(path)):
-      first_match = -1
-      first_match_dict = None
-      for target_dict, data in itertools.izip(ret, string_data):
-        # Set offset so that it will be 0 when len(value) is added to it below.
-        offset = -len(value)
-        while True:
-          offset = data.find(value, offset + len(value))
-          if offset == -1:
-            break
-          # Preferring exact matches (those following \0) over substring matches
-          # significantly increases accuracy (although shows that linker isn't
-          # being optimal).
-          if offset == 0 or data[offset - 1] == '\0':
-            break
-          if first_match == -1:
-            first_match = offset
-            first_match_dict = target_dict
-        if offset != -1:
-          break
-      if offset == -1:
-        # Exact match not found, so take suffix match if it exists.
-        offset = first_match
-        target_dict = first_match_dict
-      # Missing strings happen when optimization make them unused.
-      if offset != -1:
-        # Encode tuple as a string for easier mashalling.
-        target_dict[path].append(
-            str(offset) + ':' + str(len(value)))
-  return [concurrent.EncodeDictOfLists(x) for x in ret]
 # This is a target for BulkForkAndCall().
-def _RunNmOnIntermediates(target, tool_prefix, output_directory):
+def RunNmOnIntermediates(target, tool_prefix, output_directory):
  """Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path.
  Args:
@@ -471,295 +195,3 @@ def _RunNmOnIntermediates(target, tool_prefix, output_directory):
  #     down on marshalling overhead.
  return (concurrent.EncodeDictOfLists(symbol_names_by_path),
          concurrent.EncodeDictOfLists(string_addresses_by_path))
-class _BulkObjectFileAnalyzerWorker(object):
-  def __init__(self, tool_prefix, output_directory):
-    self._tool_prefix = _MakeToolPrefixAbsolute(tool_prefix)
-    self._output_directory = output_directory
-    self._paths_by_name = collections.defaultdict(list)
-    self._encoded_string_addresses_by_path_chunks = []
-    self._list_of_encoded_elf_string_positions_by_path = None
-  def AnalyzePaths(self, paths):
-    def iter_job_params():
-      object_paths = []
-      for path in paths:
-        # Note: _ResolveStringPieces relies upon .a not being grouped.
-        if path.endswith('.a'):
-          yield (path,)
-        else:
-          object_paths.append(path)
-      BATCH_SIZE = 50  # Chosen arbitrarily.
-      for i in xrange(0, len(object_paths), BATCH_SIZE):
-        batch = object_paths[i:i + BATCH_SIZE]
-        yield (batch,)
-    params = list(iter_job_params())
-    # Order of the jobs doesn't matter since each job owns independent paths,
-    # and our output is a dict where paths are the key.
-    results = concurrent.BulkForkAndCall(
-        _RunNmOnIntermediates, params, tool_prefix=self._tool_prefix,
-        output_directory=self._output_directory)
-    # Names are still mangled.
-    all_paths_by_name = self._paths_by_name
-    for encoded_syms, encoded_strs in results:
-      symbol_names_by_path = concurrent.DecodeDictOfLists(encoded_syms)
-      for path, names in symbol_names_by_path.iteritems():
-        for name in names:
-          all_paths_by_name[name].append(path)
-      if encoded_strs != concurrent.EMPTY_ENCODED_DICT:
-        self._encoded_string_addresses_by_path_chunks.append(encoded_strs)
-    logging.debug('worker: AnalyzePaths() completed.')
-  def SortPaths(self):
-    # Finally, demangle all names, which can result in some merging of lists.
-    self._paths_by_name = demangle.DemangleKeysAndMergeLists(
-        self._paths_by_name, self._tool_prefix)
-    # Sort and uniquefy.
-    for key in self._paths_by_name.iterkeys():
-      self._paths_by_name[key] = sorted(set(self._paths_by_name[key]))
-  def AnalyzeStringLiterals(self, elf_path, elf_string_positions):
-    logging.debug('worker: AnalyzeStringLiterals() started.')
-    # Read string_data from elf_path, to be shared by forked processes.
-    address, offset, _ = LookupElfRodataInfo(elf_path, self._tool_prefix)
-    adjust = address - offset
-    abs_string_positions = (
-        (addr - adjust, s) for addr, s in elf_string_positions)
-    string_data = _ReadFileChunks(elf_path, abs_string_positions)
-    params = ((chunk,)
-        for chunk in self._encoded_string_addresses_by_path_chunks)
-    # Order of the jobs doesn't matter since each job owns independent paths,
-    # and our output is a dict where paths are the key.
-    results = concurrent.BulkForkAndCall(
-        _ResolveStringPieces, params, string_data=string_data,
-        tool_prefix=self._tool_prefix, output_directory=self._output_directory)
-    results = list(results)
-    final_result = []
-    for i in xrange(len(elf_string_positions)):
-      final_result.append(
-          concurrent.JoinEncodedDictOfLists([r[i] for r in results]))
-    self._list_of_encoded_elf_string_positions_by_path = final_result
-    logging.debug('worker: AnalyzeStringLiterals() completed.')
-  def GetSymbolNames(self):
-    return self._paths_by_name
-  def GetStringPositions(self):
-    return [concurrent.DecodeDictOfLists(x, value_transform=_DecodePosition)
-            for x in self._list_of_encoded_elf_string_positions_by_path]
-  def GetEncodedStringPositions(self):
-    return self._list_of_encoded_elf_string_positions_by_path
-  def Close(self):
-    pass
-def _TerminateSubprocesses():
-  global _active_pids
-  if _active_pids:
-    for pid in _active_pids:
-      os.kill(pid, signal.SIGKILL)
-    _active_pids = []
-class _BulkObjectFileAnalyzerMaster(object):
-  """Runs BulkObjectFileAnalyzer in a subprocess."""
-  def __init__(self, tool_prefix, output_directory):
-    self._child_pid = None
-    self._pipe = None
-    self._tool_prefix = tool_prefix
-    self._output_directory = output_directory
-  def _Spawn(self):
-    global _active_pids
-    parent_conn, child_conn = multiprocessing.Pipe()
-    self._child_pid = os.fork()
-    if self._child_pid:
-      # We are the parent process.
-      if _active_pids is None:
-        _active_pids = []
-        atexit.register(_TerminateSubprocesses)
-      _active_pids.append(self._child_pid)
-      self._pipe = parent_conn
-    else:
-      # We are the child process.
-      logging.root.handlers[0].setFormatter(logging.Formatter(
-          'nm: %(levelname).1s %(relativeCreated)6d %(message)s'))
-      worker_analyzer = _BulkObjectFileAnalyzerWorker(
-          self._tool_prefix, self._output_directory)
-      slave = _BulkObjectFileAnalyzerSlave(worker_analyzer, child_conn)
-      slave.Run()
-  def AnalyzePaths(self, paths):
-    if self._child_pid is None:
-      self._Spawn()
-    logging.debug('Sending batch of %d paths to subprocess', len(paths))
-    payload = '\x01'.join(paths)
-    self._pipe.send((_MSG_ANALYZE_PATHS, payload))
-  def SortPaths(self):
-    self._pipe.send((_MSG_SORT_PATHS,))
-  def AnalyzeStringLiterals(self, elf_path, string_positions):
-    self._pipe.send((_MSG_ANALYZE_STRINGS, elf_path, string_positions))
-  def GetSymbolNames(self):
-    self._pipe.send((_MSG_GET_SYMBOL_NAMES,))
-    self._pipe.recv()  # None
-    logging.debug('Decoding nm results from forked process')
-    encoded_paths_by_name = self._pipe.recv()
-    return concurrent.DecodeDictOfLists(encoded_paths_by_name)
-  def GetStringPositions(self):
-    self._pipe.send((_MSG_GET_STRINGS,))
-    self._pipe.recv()  # None
-    logging.debug('Decoding string symbol results from forked process')
-    result = self._pipe.recv()
-    return [concurrent.DecodeDictOfLists(x, value_transform=_DecodePosition)
-            for x in result]
-  def Close(self):
-    self._pipe.close()
-    # Child process should terminate gracefully at this point, but leave it in
-    # _active_pids to be killed just in case.
-class _BulkObjectFileAnalyzerSlave(object):
-  """The subprocess entry point."""
-  def __init__(self, worker_analyzer, pipe):
-    self._worker_analyzer = worker_analyzer
-    self._pipe = pipe
-    # Use a worker thread so that AnalyzeStringLiterals() is non-blocking. The
-    # thread allows the main thread to process a call to GetSymbolNames() while
-    # AnalyzeStringLiterals() is in progress.
-    self._job_queue = Queue.Queue()
-    self._worker_thread = threading.Thread(target=self._WorkerThreadMain)
-    self._allow_analyze_paths = True
-  def _WorkerThreadMain(self):
-    while True:
-      # Handle exceptions so test failure will be explicit and not block.
-      try:
-        func = self._job_queue.get()
-        func()
-      except Exception:
-        traceback.print_exc()
-      self._job_queue.task_done()
-  def _WaitForAnalyzePathJobs(self):
-    if self._allow_analyze_paths:
-      self._job_queue.join()
-      self._allow_analyze_paths = False
-  # Handle messages in a function outside the event loop, so local variables are
-  # independent across messages, and can be bound to jobs by lambdas using
-  # closures instead of functools.partial().
-  def _HandleMessage(self, message):
-    if message[0] == _MSG_ANALYZE_PATHS:
-      assert self._allow_analyze_paths, (
-          'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.')
-      paths = message[1].split('\x01')
-      self._job_queue.put(lambda: self._worker_analyzer.AnalyzePaths(paths))
-    elif message[0] == _MSG_SORT_PATHS:
-      assert self._allow_analyze_paths, (
-          'Cannot call SortPaths() after AnalyzeStringLiterals()s.')
-      self._job_queue.put(self._worker_analyzer.SortPaths)
-    elif message[0] == _MSG_ANALYZE_STRINGS:
-      self._WaitForAnalyzePathJobs()
-      elf_path, string_positions = message[1:]
-      self._job_queue.put(
-          lambda: self._worker_analyzer.AnalyzeStringLiterals(
-              elf_path, string_positions))
-    elif message[0] == _MSG_GET_SYMBOL_NAMES:
-      self._WaitForAnalyzePathJobs()
-      self._pipe.send(None)
-      paths_by_name = self._worker_analyzer.GetSymbolNames()
-      self._pipe.send(concurrent.EncodeDictOfLists(paths_by_name))
-    elif message[0] == _MSG_GET_STRINGS:
-      self._job_queue.join()
-      # Send a None packet so that other side can measure IPC transfer time.
-      self._pipe.send(None)
-      self._pipe.send(self._worker_analyzer.GetEncodedStringPositions())
-  def Run(self):
-    try:
-      self._worker_thread.start()
-      while True:
-        self._HandleMessage(self._pipe.recv())
-    except EOFError:
-      pass
-    except EnvironmentError, e:
-      # Parent process exited so don't log.
-      if e.errno in (errno.EPIPE, errno.ECONNRESET):
-        sys.exit(1)
-    logging.debug('nm bulk subprocess finished.')
-    sys.exit(0)
-BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster
-if concurrent.DISABLE_ASYNC:
-  BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker
-def main():
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--multiprocess', action='store_true')
-  parser.add_argument('--tool-prefix', required=True)
-  parser.add_argument('--output-directory', required=True)
-  parser.add_argument('--elf-file', type=os.path.realpath)
-  parser.add_argument('--show-names', action='store_true')
-  parser.add_argument('--show-strings', action='store_true')
-  parser.add_argument('objects', type=os.path.realpath, nargs='+')
-  args = parser.parse_args()
-  logging.basicConfig(level=logging.DEBUG,
-                      format='%(levelname).1s %(relativeCreated)6d %(message)s')
-  if args.multiprocess:
-    bulk_analyzer = _BulkObjectFileAnalyzerMaster(
-        args.tool_prefix, args.output_directory)
-  else:
-    concurrent.DISABLE_ASYNC = True
-    bulk_analyzer = _BulkObjectFileAnalyzerWorker(
-        args.tool_prefix, args.output_directory)
-  # Pass individually to test multiple calls.
-  for path in args.objects:
-    bulk_analyzer.AnalyzePaths([path])
-  bulk_analyzer.SortPaths()
-  names_to_paths = bulk_analyzer.GetSymbolNames()
-  print('Found {} names'.format(len(names_to_paths)))
-  if args.show_names:
-    for name, paths in names_to_paths.iteritems():
-      print('{}: {!r}'.format(name, paths))
-  if args.elf_file:
-    address, offset, size = LookupElfRodataInfo(
-        args.elf_file, args.tool_prefix)
-    bulk_analyzer.AnalyzeStringLiterals(args.elf_file, ((address, size),))
-    positions_by_path = bulk_analyzer.GetStringPositions()[0]
-    print('Found {} string literals'.format(sum(
-        len(v) for v in positions_by_path.itervalues())))
-    if args.show_strings:
-      logging.debug('.rodata adjust=%d', address - offset)
-      for path, positions in positions_by_path.iteritems():
-        strs = _ReadFileChunks(
-            args.elf_file, ((offset + addr, size) for addr, size in positions))
-        print('{}: {!r}'.format(
-            path, [s if len(s) < 20 else s[:20] + '...' for s in strs]))
-if __name__ == '__main__':
-  main()
--- a/tools/binary_size/libsupersize/obj_analyzer.py
+++ b/tools/binary_size/libsupersize/obj_analyzer.py
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Analyzer for Object Files.
+This file works around Python's lack of concurrency.
+_BulkObjectFileAnalyzerMaster:
+  Creates a subprocess and sends IPCs to it asking it to do work.
+_BulkObjectFileAnalyzerSlave:
+  Receives IPCs and delegates logic to _BulkObjectFileAnalyzerWorker.
+  Runs _BulkObjectFileAnalyzerWorker on a background thread in order to stay
+  responsive to IPCs.
+_BulkObjectFileAnalyzerWorker:
+  Performs the actual work. Uses Process Pools to shard out per-object-file
+  work and then aggregates results.
+BulkObjectFileAnalyzer:
+  Alias for _BulkObjectFileAnalyzerMaster, but when SUPERSIZE_DISABLE_ASYNC=1,
+  alias for _BulkObjectFileAnalyzerWorker.
+  * AnalyzePaths: Run "nm" on all .o files to collect symbol names that exist
+    within each. Does not work with thin archives (expand them first).
+  * SortPaths: Sort results of AnalyzePaths().
+  * AnalyzeStringLiterals: Must be run after AnalyzePaths() has completed.
+    Extracts string literals from .o files, and then locates them within the
+    "** merge strings" sections within an ELF's .rodata section.
+This file can also be run stand-alone in order to test out the logic on smaller
+sample sizes.
+"""
+from __future__ import print_function
+import argparse
+import atexit
+import collections
+import errno
+import logging
+import os
+import multiprocessing
+import Queue
+import signal
+import sys
+import threading
+import traceback
+import concurrent
+import demangle
+import nm
+import string_extract
+_MSG_ANALYZE_PATHS = 1
+_MSG_SORT_PATHS = 2
+_MSG_ANALYZE_STRINGS = 3
+_MSG_GET_SYMBOL_NAMES = 4
+_MSG_GET_STRINGS = 5
+_active_pids = None
+def _DecodePosition(x):
+  # Encoded as "123:123"
+  sep_idx = x.index(':')
+  return (int(x[:sep_idx]), int(x[sep_idx + 1:]))
+def _MakeToolPrefixAbsolute(tool_prefix):
+  # Ensure tool_prefix is absolute so that CWD does not affect it
+  if os.path.sep in tool_prefix:
+    # Use abspath() on the dirname to avoid it stripping a trailing /.
+    dirname = os.path.dirname(tool_prefix)
+    tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]
+  return tool_prefix
+class _BulkObjectFileAnalyzerWorker(object):
+  def __init__(self, tool_prefix, output_directory):
+    self._tool_prefix = _MakeToolPrefixAbsolute(tool_prefix)
+    self._output_directory = output_directory
+    self._paths_by_name = collections.defaultdict(list)
+    self._encoded_string_addresses_by_path_chunks = []
+    self._list_of_encoded_elf_string_positions_by_path = None
+  def AnalyzePaths(self, paths):
+    def iter_job_params():
+      object_paths = []
+      for path in paths:
+        # Note: ResolveStringPieces() relies upon .a not being grouped.
+        if path.endswith('.a'):
+          yield (path,)
+        else:
+          object_paths.append(path)
+      BATCH_SIZE = 50  # Chosen arbitrarily.
+      for i in xrange(0, len(object_paths), BATCH_SIZE):
+        batch = object_paths[i:i + BATCH_SIZE]
+        yield (batch,)
+    params = list(iter_job_params())
+    # Order of the jobs doesn't matter since each job owns independent paths,
+    # and our output is a dict where paths are the key.
+    results = concurrent.BulkForkAndCall(
+        nm.RunNmOnIntermediates, params, tool_prefix=self._tool_prefix,
+        output_directory=self._output_directory)
+    # Names are still mangled.
+    all_paths_by_name = self._paths_by_name
+    for encoded_syms, encoded_strs in results:
+      symbol_names_by_path = concurrent.DecodeDictOfLists(encoded_syms)
+      for path, names in symbol_names_by_path.iteritems():
+        for name in names:
+          all_paths_by_name[name].append(path)
+      if encoded_strs != concurrent.EMPTY_ENCODED_DICT:
+        self._encoded_string_addresses_by_path_chunks.append(encoded_strs)
+    logging.debug('worker: AnalyzePaths() completed.')
+  def SortPaths(self):
+    # Finally, demangle all names, which can result in some merging of lists.
+    self._paths_by_name = demangle.DemangleKeysAndMergeLists(
+        self._paths_by_name, self._tool_prefix)
+    # Sort and uniquefy.
+    for key in self._paths_by_name.iterkeys():
+      self._paths_by_name[key] = sorted(set(self._paths_by_name[key]))
+  def AnalyzeStringLiterals(self, elf_path, elf_string_positions):
+    logging.debug('worker: AnalyzeStringLiterals() started.')
+    # Read string_data from elf_path, to be shared by forked processes.
+    address, offset, _ = string_extract.LookupElfRodataInfo(
+        elf_path, self._tool_prefix)
+    adjust = address - offset
+    abs_string_positions = (
+        (addr - adjust, s) for addr, s in elf_string_positions)
+    string_data = string_extract.ReadFileChunks(elf_path, abs_string_positions)
+    params = ((chunk,)
+        for chunk in self._encoded_string_addresses_by_path_chunks)
+    # Order of the jobs doesn't matter since each job owns independent paths,
+    # and our output is a dict where paths are the key.
+    results = concurrent.BulkForkAndCall(
+        string_extract.ResolveStringPieces, params, string_data=string_data,
+        tool_prefix=self._tool_prefix, output_directory=self._output_directory)
+    results = list(results)
+    final_result = []
+    for i in xrange(len(elf_string_positions)):
+      final_result.append(
+          concurrent.JoinEncodedDictOfLists([r[i] for r in results]))
+    self._list_of_encoded_elf_string_positions_by_path = final_result
+    logging.debug('worker: AnalyzeStringLiterals() completed.')
+  def GetSymbolNames(self):
+    return self._paths_by_name
+  def GetStringPositions(self):
+    return [concurrent.DecodeDictOfLists(x, value_transform=_DecodePosition)
+            for x in self._list_of_encoded_elf_string_positions_by_path]
+  def GetEncodedStringPositions(self):
+    return self._list_of_encoded_elf_string_positions_by_path
+  def Close(self):
+    pass
+def _TerminateSubprocesses():
+  global _active_pids
+  if _active_pids:
+    for pid in _active_pids:
+      os.kill(pid, signal.SIGKILL)
+    _active_pids = []
+class _BulkObjectFileAnalyzerMaster(object):
+  """Runs BulkObjectFileAnalyzer in a subprocess."""
+  def __init__(self, tool_prefix, output_directory):
+    self._child_pid = None
+    self._pipe = None
+    self._tool_prefix = tool_prefix
+    self._output_directory = output_directory
+  def _Spawn(self):
+    global _active_pids
+    parent_conn, child_conn = multiprocessing.Pipe()
+    self._child_pid = os.fork()
+    if self._child_pid:
+      # We are the parent process.
+      if _active_pids is None:
+        _active_pids = []
+        atexit.register(_TerminateSubprocesses)
+      _active_pids.append(self._child_pid)
+      self._pipe = parent_conn
+    else:
+      # We are the child process.
+      logging.root.handlers[0].setFormatter(logging.Formatter(
+          'nm: %(levelname).1s %(relativeCreated)6d %(message)s'))
+      worker_analyzer = _BulkObjectFileAnalyzerWorker(
+          self._tool_prefix, self._output_directory)
+      slave = _BulkObjectFileAnalyzerSlave(worker_analyzer, child_conn)
+      slave.Run()
+  def AnalyzePaths(self, paths):
+    if self._child_pid is None:
+      self._Spawn()
+    logging.debug('Sending batch of %d paths to subprocess', len(paths))
+    payload = '\x01'.join(paths)
+    self._pipe.send((_MSG_ANALYZE_PATHS, payload))
+  def SortPaths(self):
+    self._pipe.send((_MSG_SORT_PATHS,))
+  def AnalyzeStringLiterals(self, elf_path, string_positions):
+    self._pipe.send((_MSG_ANALYZE_STRINGS, elf_path, string_positions))
+  def GetSymbolNames(self):
+    self._pipe.send((_MSG_GET_SYMBOL_NAMES,))
+    self._pipe.recv()  # None
+    logging.debug('Decoding nm results from forked process')
+    encoded_paths_by_name = self._pipe.recv()
+    return concurrent.DecodeDictOfLists(encoded_paths_by_name)
+  def GetStringPositions(self):
+    self._pipe.send((_MSG_GET_STRINGS,))
+    self._pipe.recv()  # None
+    logging.debug('Decoding string symbol results from forked process')
+    result = self._pipe.recv()
+    return [concurrent.DecodeDictOfLists(x, value_transform=_DecodePosition)
+            for x in result]
+  def Close(self):
+    self._pipe.close()
+    # Child process should terminate gracefully at this point, but leave it in
+    # _active_pids to be killed just in case.
+class _BulkObjectFileAnalyzerSlave(object):
+  """The subprocess entry point."""
+  def __init__(self, worker_analyzer, pipe):
+    self._worker_analyzer = worker_analyzer
+    self._pipe = pipe
+    # Use a worker thread so that AnalyzeStringLiterals() is non-blocking. The
+    # thread allows the main thread to process a call to GetSymbolNames() while
+    # AnalyzeStringLiterals() is in progress.
+    self._job_queue = Queue.Queue()
+    self._worker_thread = threading.Thread(target=self._WorkerThreadMain)
+    self._allow_analyze_paths = True
+  def _WorkerThreadMain(self):
+    while True:
+      # Handle exceptions so test failure will be explicit and not block.
+      try:
+        func = self._job_queue.get()
+        func()
+      except Exception:
+        traceback.print_exc()
+      self._job_queue.task_done()
+  def _WaitForAnalyzePathJobs(self):
+    if self._allow_analyze_paths:
+      self._job_queue.join()
+      self._allow_analyze_paths = False
+  # Handle messages in a function outside the event loop, so local variables are
+  # independent across messages, and can be bound to jobs by lambdas using
+  # closures instead of functools.partial().
+  def _HandleMessage(self, message):
+    if message[0] == _MSG_ANALYZE_PATHS:
+      assert self._allow_analyze_paths, (
+          'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.')
+      paths = message[1].split('\x01')
+      self._job_queue.put(lambda: self._worker_analyzer.AnalyzePaths(paths))
+    elif message[0] == _MSG_SORT_PATHS:
+      assert self._allow_analyze_paths, (
+          'Cannot call SortPaths() after AnalyzeStringLiterals()s.')
+      self._job_queue.put(self._worker_analyzer.SortPaths)
+    elif message[0] == _MSG_ANALYZE_STRINGS:
+      self._WaitForAnalyzePathJobs()
+      elf_path, string_positions = message[1:]
+      self._job_queue.put(
+          lambda: self._worker_analyzer.AnalyzeStringLiterals(
+              elf_path, string_positions))
+    elif message[0] == _MSG_GET_SYMBOL_NAMES:
+      self._WaitForAnalyzePathJobs()
+      self._pipe.send(None)
+      paths_by_name = self._worker_analyzer.GetSymbolNames()
+      self._pipe.send(concurrent.EncodeDictOfLists(paths_by_name))
+    elif message[0] == _MSG_GET_STRINGS:
+      self._job_queue.join()
+      # Send a None packet so that other side can measure IPC transfer time.
+      self._pipe.send(None)
+      self._pipe.send(self._worker_analyzer.GetEncodedStringPositions())
+  def Run(self):
+    try:
+      self._worker_thread.start()
+      while True:
+        self._HandleMessage(self._pipe.recv())
+    except EOFError:
+      pass
+    except EnvironmentError, e:
+      # Parent process exited so don't log.
+      if e.errno in (errno.EPIPE, errno.ECONNRESET):
+        sys.exit(1)
+    logging.debug('nm bulk subprocess finished.')
+    sys.exit(0)
+BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster
+if concurrent.DISABLE_ASYNC:
+  BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--multiprocess', action='store_true')
+  parser.add_argument('--tool-prefix', required=True)
+  parser.add_argument('--output-directory', required=True)
+  parser.add_argument('--elf-file', type=os.path.realpath)
+  parser.add_argument('--show-names', action='store_true')
+  parser.add_argument('--show-strings', action='store_true')
+  parser.add_argument('objects', type=os.path.realpath, nargs='+')
+  args = parser.parse_args()
+  logging.basicConfig(level=logging.DEBUG,
+                      format='%(levelname).1s %(relativeCreated)6d %(message)s')
+  if args.multiprocess:
+    bulk_analyzer = _BulkObjectFileAnalyzerMaster(
+        args.tool_prefix, args.output_directory)
+  else:
+    concurrent.DISABLE_ASYNC = True
+    bulk_analyzer = _BulkObjectFileAnalyzerWorker(
+        args.tool_prefix, args.output_directory)
+  # Pass individually to test multiple calls.
+  for path in args.objects:
+    bulk_analyzer.AnalyzePaths([path])
+  bulk_analyzer.SortPaths()
+  names_to_paths = bulk_analyzer.GetSymbolNames()
+  print('Found {} names'.format(len(names_to_paths)))
+  if args.show_names:
+    for name, paths in names_to_paths.iteritems():
+      print('{}: {!r}'.format(name, paths))
+  if args.elf_file:
+    address, offset, size = string_extract.LookupElfRodataInfo(
+        args.elf_file, args.tool_prefix)
+    bulk_analyzer.AnalyzeStringLiterals(args.elf_file, ((address, size),))
+    positions_by_path = bulk_analyzer.GetStringPositions()[0]
+    print('Found {} string literals'.format(sum(
+        len(v) for v in positions_by_path.itervalues())))
+    if args.show_strings:
+      logging.debug('.rodata adjust=%d', address - offset)
+      for path, positions in positions_by_path.iteritems():
+        strs = string_extract.ReadFileChunks(
+            args.elf_file, ((offset + addr, size) for addr, size in positions))
+        print('{}: {!r}'.format(
+            path, [s if len(s) < 20 else s[:20] + '...' for s in strs]))
+if __name__ == '__main__':
+  main()
--- a/tools/binary_size/libsupersize/string_extract.py
+++ b/tools/binary_size/libsupersize/string_extract.py
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities to extract string literals from object files.
+LookupElfRodataInfo():
+  Runs readelf to extract and return .rodata section spec of an ELF file.
+ReadFileChunks():
+  Reads raw data from a file, given a list of ranges in the file.
+ResolveStringPieces():
+  BulkForkAndCall() target: Given {path: [string addresses]} and
+  [raw_string_data for each string_section]:
+  - Reads {path: [src_strings]}.
+  - For each path, searches for src_strings in at most 1 raw_string_data over
+    each string_section. If found, translates to string_range and annotates it
+    to the string_section.
+  - Returns [{path: [string_ranges]} for each string_section].
+"""
+import collections
+import itertools
+import logging
+import os
+import subprocess
+import ar
+import concurrent
+import models
+import path_util
+def LookupElfRodataInfo(elf_path, tool_prefix):
+  """Returns (address, offset, size) for the .rodata section."""
+  args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide', elf_path]
+  output = subprocess.check_output(args)
+  lines = output.splitlines()
+  for line in lines:
+    # [Nr] Name           Type        Addr     Off     Size   ES Flg Lk Inf Al
+    # [07] .rodata        PROGBITS    025e7000 237c000 5ec4f6 00   A  0   0 256
+    if '.rodata ' in line:
+      fields = line[line.index(models.SECTION_RODATA):].split()
+      return int(fields[2], 16), int(fields[3], 16), int(fields[4], 16)
+  raise AssertionError('No .rodata for command: ' + repr(args))
+def ReadFileChunks(path, positions):
+  """Returns a list of strings corresponding to |positions|.
+  Args:
+    positions: List of (offset, size).
+  """
+  ret = []
+  if not positions:
+    return ret
+  with open(path, 'rb') as f:
+    for offset, size in positions:
+      f.seek(offset)
+      ret.append(f.read(size))
+  return ret
+def _ExtractArchivePath(path):
+  # E.g. foo/bar.a(baz.o)
+  if path.endswith(')'):
+    start_idx = path.index('(')
+    return path[:start_idx]
+  return None
+def _LookupStringSectionPositions(target, tool_prefix, output_directory):
+  """Returns a dict of object_path -> [(offset, size)...] of .rodata sections.
+  Args:
+    target: An archive path string (e.g., "foo.a") or a list of object paths.
+  """
+  is_archive = isinstance(target, basestring)
+  args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide']
+  if is_archive:
+    args.append(target)
+  else:
+    # Assign path for when len(target) == 1, (no File: line exists).
+    path = target[0]
+    args.extend(target)
+  output = subprocess.check_output(args, cwd=output_directory)
+  lines = output.splitlines()
+  section_positions_by_path = {}
+  cur_offsets = []
+  for line in lines:
+    # File: base/third_party/libevent/libevent.a(buffer.o)
+    # [Nr] Name              Type        Addr     Off    Size   ES Flg Lk Inf Al
+    # [11] .rodata.str1.1    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  1
+    # [11] .rodata.str4.4    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  4
+    # [11] .rodata.str8.8    PROGBITS    00000000 0000b4 000004 01 AMS  0   0  8
+    # [80] .rodata..L.str    PROGBITS    00000000 000530 000002 00   A  0   0  1
+    # The various string sections differ by alignment.
+    # The presence of a wchar_t literal (L"asdf") seems to make a str4 section.
+    # When multiple sections exist, nm gives us no indication as to which
+    # section each string corresponds to.
+    if line.startswith('File: '):
+      if cur_offsets:
+        section_positions_by_path[path] = cur_offsets
+        cur_offsets = []
+      path = line[6:]
+    elif '.rodata.' in line:
+      progbits_idx = line.find('PROGBITS ')
+      if progbits_idx != -1:
+        fields = line[progbits_idx:].split()
+        position = (int(fields[2], 16), int(fields[3], 16))
+        # The heuristics in _IterStringLiterals rely on str1 coming first.
+        if fields[-1] == '1':
+          cur_offsets.insert(0, position)
+        else:
+          cur_offsets.append(position)
+  if cur_offsets:
+    section_positions_by_path[path] = cur_offsets
+  return section_positions_by_path
+def _ReadStringSections(target, output_directory, positions_by_path):
+  """Returns a dict of object_path -> [string...] of .rodata chunks.
+  Args:
+    target: An archive path string (e.g., "foo.a") or a list of object paths.
+    positions_by_path: A dict of object_path -> [(offset, size)...]
+  """
+  is_archive = isinstance(target, basestring)
+  string_sections_by_path = {}
+  if is_archive:
+    for subpath, chunk in ar.IterArchiveChunks(
+        os.path.join(output_directory, target)):
+      path = '{}({})'.format(target, subpath)
+      positions = positions_by_path.get(path)
+      # No positions if file has no string literals.
+      if positions:
+        string_sections_by_path[path] = (
+            [chunk[offset:offset + size] for offset, size in positions])
+  else:
+    for path in target:
+      positions = positions_by_path.get(path)
+      # We already log a warning about this in _IterStringLiterals().
+      if positions:
+        string_sections_by_path[path] = ReadFileChunks(
+            os.path.join(output_directory, path), positions)
+  return string_sections_by_path
+def _IterStringLiterals(path, addresses, obj_sections):
+  """Yields all string literals (including \0) for the given object path.
+  Args:
+    path: Object file path.
+    addresses: List of string offsets encoded as hex strings.
+    obj_sections: List of contents of .rodata.str sections read from the given
+        object file.
+  """
+  next_offsets = sorted(int(a, 16) for a in addresses)
+  if not obj_sections:
+    # Happens when there is an address for a symbol which is not actually a
+    # string literal, or when string_sections_by_path is missing an entry.
+    logging.warning('Object has %d strings but no string sections: %s',
+                    len(addresses), path)
+    return
+  for section_data in obj_sections:
+    cur_offsets = next_offsets
+    # Always assume first element is 0. I'm not entirely sure why this is
+    # necessary, but strings get missed without it.
+    next_offsets = [0]
+    prev_offset = 0
+    # TODO(agrieve): Switch to using nm --print-size in order to capture the
+    #     address+size of each string rather than just the address.
+    for offset in cur_offsets[1:]:
+      if offset >= len(section_data):
+        # Remaining offsets are for next section.
+        next_offsets.append(offset)
+        continue
+      # Figure out which offsets apply to this section via heuristic of them
+      # all ending with a null character.
+      if offset == prev_offset or section_data[offset - 1] != '\0':
+        next_offsets.append(offset)
+        continue
+      yield section_data[prev_offset:offset]
+      prev_offset = offset
+    if prev_offset < len(section_data):
+      yield section_data[prev_offset:]
+# This is a target for BulkForkAndCall().
+def ResolveStringPieces(encoded_string_addresses_by_path, string_data,
+                        tool_prefix, output_directory):
+  string_addresses_by_path = concurrent.DecodeDictOfLists(
+      encoded_string_addresses_by_path)
+  # Assign |target| as archive path, or a list of object paths.
+  any_path = next(string_addresses_by_path.iterkeys())
+  target = _ExtractArchivePath(any_path)
+  if not target:
+    target = string_addresses_by_path.keys()
+  # Run readelf to find location of .rodata within the .o files.
+  section_positions_by_path = _LookupStringSectionPositions(
+      target, tool_prefix, output_directory)
+  # Load the .rodata sections (from object files) as strings.
+  string_sections_by_path = _ReadStringSections(
+      target, output_directory, section_positions_by_path)
+  # list of elf_positions_by_path.
+  ret = [collections.defaultdict(list) for _ in string_data]
+  # Brute-force search of strings within ** merge strings sections.
+  # This is by far the slowest part of AnalyzeStringLiterals().
+  # TODO(agrieve): Pre-process string_data into a dict of literal->address (at
+  #     least for ascii strings).
+  for path, object_addresses in string_addresses_by_path.iteritems():
+    for value in _IterStringLiterals(
+        path, object_addresses, string_sections_by_path.get(path)):
+      first_match = -1
+      first_match_dict = None
+      for target_dict, data in itertools.izip(ret, string_data):
+        # Set offset so that it will be 0 when len(value) is added to it below.
+        offset = -len(value)
+        while True:
+          offset = data.find(value, offset + len(value))
+          if offset == -1:
+            break
+          # Preferring exact matches (those following \0) over substring matches
+          # significantly increases accuracy (although shows that linker isn't
+          # being optimal).
+          if offset == 0 or data[offset - 1] == '\0':
+            break
+          if first_match == -1:
+            first_match = offset
+            first_match_dict = target_dict
+        if offset != -1:
+          break
+      if offset == -1:
+        # Exact match not found, so take suffix match if it exists.
+        offset = first_match
+        target_dict = first_match_dict
+      # Missing strings happen when optimization make them unused.
+      if offset != -1:
+        # Encode tuple as a string for easier mashalling.
+        target_dict[path].append(
+            str(offset) + ':' + str(len(value)))
+  return [concurrent.EncodeDictOfLists(x) for x in ret]
--- a/tools/binary_size/supersize.pydeps
+++ b/tools/binary_size/supersize.pydeps
@@ -57,4 +57,6 @@ libsupersize/match_util.py
 libsupersize/models.py
 libsupersize/ninja_parser.py
 libsupersize/nm.py
+libsupersize/obj_analyzer.py
 libsupersize/path_util.py
+libsupersize/string_extract.py