Orderfile: phased_orderfile updates

This performs the per-process phased offset processing used for the orderfile. Bug: 758566 Change-Id: I5cbdb69c4834d95a52f2e43ad3f72e4845413883 Reviewed-on: https://chromium-review.googlesource.com/1144935 Commit-Queue: Matthew Cary <mattcary@chromium.org> Reviewed-by: Benoit L <lizeb@chromium.org> Reviewed-by: Egor Pasko <pasko@chromium.org> Cr-Commit-Position: refs/heads/master@{#577537}

Orderfile: phased_orderfile updates
This performs the per-process phased offset processing used for the orderfile. Bug: 758566 Change-Id: I5cbdb69c4834d95a52f2e43ad3f72e4845413883 Reviewed-on: https://chromium-review.googlesource.com/1144935 Commit-Queue: Matthew Cary <mattcary@chromium.org> Reviewed-by: Benoit L <lizeb@chromium.org> Reviewed-by: Egor Pasko <pasko@chromium.org> Cr-Commit-Position: refs/heads/master@{#577537}
c51effeb · Matthew Cary · Commit Bot · a14a69a7 · c51effeb · c51effeb
Commit c51effeb authored Jul 24, 2018 by Matthew Cary Committed by Commit Bot Jul 24, 2018
5 changed files
--- a/tools/cygprofile/phased_orderfile.py
+++ b/tools/cygprofile/phased_orderfile.py
@@ -13,6 +13,13 @@ labeled 1, is called "interaction". These two phases are used to create an
 orderfile with three parts: the code touched only in startup, the code
 touched only during interaction, and code common to the two phases. We refer to
 these parts as the orderfile phases.
+
+Example invocation, with PROFILE_DIR the location of the profile data pulled
+from a device and LIBTYPE either monochrome or chrome as appropriate.
+./tools/cygprofile/phased_orderfile.py \
+    --profile-directory=PROFILE_DIR \
+    --instrumented-build-dir=out-android/Orderfile/ \
+    --library-name=libLIBTYPE.so --offset-output-base=PROFILE_DIR/offset
 """

 import argparse
@@ -26,7 +33,7 @@ import process_profiles


 # Files matched when using this script to analyze directly (see main()).
-PROFILE_GLOB = 'cygprofile-*.txt_*'
+PROFILE_GLOB = 'profile-hitmap-*.txt_*'


 OrderfilePhaseOffsets = collections.namedtuple(
@@ -45,6 +52,9 @@ class PhasedAnalyzer(object):
  COMMON_STABILITY_THRESHOLD = 1.75
  INTERACTION_STABILITY_THRESHOLD = 2.5

+  # The process name of the browser as used in the profile dumps.
+  BROWSER = 'browser'
+
  def __init__(self, profiles, processor):
    """Intialize.

@@ -54,7 +64,11 @@ class PhasedAnalyzer(object):
    """
    self._profiles = profiles
    self._processor = processor
+
+    # These members cache various computed values.
    self._phase_offsets = None
+    self._annotated_offsets = None
+    self._process_list = None

  def IsStableProfile(self):
    """Verify that the profiling has been stable.
@@ -65,7 +79,7 @@ class PhasedAnalyzer(object):
      True if the profile was stable as described above.
    """
    (startup_stability, common_stability,
-     interaction_stability) = self.ComputeStability()
+     interaction_stability) = [s[0] for s in self.ComputeStability()]

    stable = True
    if startup_stability > self.STARTUP_STABILITY_THRESHOLD:
@@ -89,11 +103,46 @@ class PhasedAnalyzer(object):
    they cover.

    Returns:
-      (float, float, float) A heuristic stability metric for startup, common and
-          interaction orderfile phases, respectively.
+      ((float, int), (float, int), (float, int)) A heuristic stability metric
+          for startup, common and interaction orderfile phases,
+          respectively. Each metric is a pair of the ratio of symbol sizes as
+          described above, and the size of the intersection.
+    """
+    (startup_intersection, startup_union,
+     common_intersection, common_union,
+     interaction_intersection, interaction_union,
+     _, _) = self.GetCombinedOffsets()
+    startup_intersection_size = self._processor.OffsetsPrimarySize(
+        startup_intersection)
+    common_intersection_size = self._processor.OffsetsPrimarySize(
+        common_intersection)
+    interaction_intersection_size = self._processor.OffsetsPrimarySize(
+        interaction_intersection)
+    startup_stability = self._SafeDiv(
+        self._processor.OffsetsPrimarySize(startup_union),
+        startup_intersection_size)
+    common_stability = self._SafeDiv(
+        self._processor.OffsetsPrimarySize(common_union),
+        common_intersection_size)
+    interaction_stability = self._SafeDiv(
+        self._processor.OffsetsPrimarySize(interaction_union),
+        interaction_intersection_size)
+    return ((startup_stability, startup_intersection_size),
+            (common_stability, common_intersection_size),
+            (interaction_stability, interaction_intersection_size))
+
+  def GetCombinedOffsets(self):
+    """Get offsets for the union and intersection of orderfile phases.
+
+    Returns:
+      ([int] * 8) For each of startup, common, interaction and all, respectively
+          the intersection and union offsets, in that order.
    """
    phase_offsets = self._GetOrderfilePhaseOffsets()
-    assert len(phase_offsets) > 1  # Otherwise the analysis is silly.
+    assert phase_offsets
+    if len(phase_offsets) == 1:
+      logging.error('Only one run set, the combined offset files will all be '
+                    'identical')

    startup_union = set(phase_offsets[0].startup)
    startup_intersection = set(phase_offsets[0].startup)
@@ -108,16 +157,172 @@ class PhasedAnalyzer(object):
      common_intersection &= set(offsets.common)
      interaction_union |= set(offsets.interaction)
      interaction_intersection &= set(offsets.interaction)
-    startup_stability = self._SafeDiv(
-        self._processor.OffsetsPrimarySize(startup_union),
-        self._processor.OffsetsPrimarySize(startup_intersection))
-    common_stability = self._SafeDiv(
-        self._processor.OffsetsPrimarySize(common_union),
-        self._processor.OffsetsPrimarySize(common_intersection))
-    interaction_stability = self._SafeDiv(
-        self._processor.OffsetsPrimarySize(interaction_union),
-        self._processor.OffsetsPrimarySize(interaction_intersection))
-    return (startup_stability, common_stability, interaction_stability)
+    return (startup_intersection, startup_union,
+            common_intersection, common_union,
+            interaction_intersection, interaction_union,
+            (startup_union & common_union & interaction_union),
+            (startup_union | common_union | interaction_union))
+
+  def GetOffsetsForMemoryFootprint(self):
+    """Get offsets organized to minimize the memory footprint.
+
+    The startup, common and interaction offsets are computed for each
+    process. Any symbols used by one process in startup or interaction that are
+    used in a different phase by another process are moved to the common
+    section. This should minimize the memory footprint by keeping startup- or
+    interaction-only pages clean, at the possibly expense of startup time, as
+    more of the common section will need to be loaded. To mitigate that effect,
+    symbols moved from startup are placed at the beginning of the common
+    section, and those moved from interaction are placed at the end.
+
+    Browser startup symbols are placed at the beginning of the startup section
+    in the hope of working out with native library prefetching to minimize
+    startup time.
+
+    Returns:
+      OrdrerfilePhaseOffsets as described above.
+    """
+    startup = []
+    common_head = []
+    common = []
+    common_tail = []
+    interaction = []
+
+    process_offsets = {p: self._GetCombinedProcessOffsets(p)
+                       for p in self._GetProcessList()}
+    assert self.BROWSER in process_offsets.keys()
+
+    any_startup = set()
+    any_interaction = set()
+    any_common = set()
+    for offsets in process_offsets.itervalues():
+      any_startup |= set(offsets.startup)
+      any_interaction |= set(offsets.interaction)
+      any_common |= set(offsets.common)
+
+    already_added = set()
+    # This helper function splits |offsets|, adding to |alternate| all offsets
+    # that are in |interfering| or are already known to be common, and otherwise
+    # adding to |target|.
+    def add_process_offsets(offsets, interfering, target, alternate):
+      for o in offsets:
+        if o in already_added:
+          continue
+        if o in interfering or o in any_common:
+          alternate.append(o)
+        else:
+          target.append(o)
+        already_added.add(o)
+
+    # This helper updates |common| with new members of |offsets|.
+    def add_common_offsets(offsets):
+      for o in offsets:
+        if o not in already_added:
+          common.append(o)
+          already_added.add(o)
+
+    add_process_offsets(process_offsets[self.BROWSER].startup,
+                        any_interaction, startup, common_head)
+    add_process_offsets(process_offsets[self.BROWSER].interaction,
+                        any_startup, interaction, common_tail)
+    add_common_offsets(process_offsets[self.BROWSER].common)
+
+    for p in process_offsets:
+      if p == self.BROWSER:
+        continue
+      add_process_offsets(process_offsets[p].startup,
+                          any_interaction, startup, common_head)
+      add_process_offsets(process_offsets[p].interaction,
+                          any_startup, interaction, common_tail)
+      add_common_offsets(process_offsets[p].common)
+
+    return OrderfilePhaseOffsets(
+        startup=startup,
+        common=(common_head + common + common_tail),
+        interaction=interaction)
+
+  def GetOffsetsForStartup(self):
+    """Get offsets organized to minimize startup time.
+
+    The startup, common and interaction offsets are computed for each
+    process. Any symbol used by one process in interaction that appears in a
+    different phase in another process is moved to common, but any symbol that
+    appears in startup for *any* process stays in startup.
+
+    This should maximize startup performance at the expense of increasing the
+    memory footprint, as some startup symbols will not be able to page out.
+
+    The startup symbols in the browser process appear first in the hope of
+    working out with native library prefetching to minimize startup time.
+    """
+    startup = []
+    common = []
+    interaction = []
+    already_added = set()
+
+    process_offsets = {p: self._GetCombinedProcessOffsets(p)
+                       for p in self._GetProcessList()}
+    startup.extend(process_offsets[self.BROWSER].startup)
+    already_added |= set(process_offsets[self.BROWSER].startup)
+    common.extend(process_offsets[self.BROWSER].common)
+    already_added |= set(process_offsets[self.BROWSER].common)
+    interaction.extend(process_offsets[self.BROWSER].interaction)
+    already_added |= set(process_offsets[self.BROWSER].interaction)
+
+    for process, offsets in process_offsets.iteritems():
+      if process == self.BROWSER:
+        continue
+      startup.extend(o for o in offsets.startup
+                     if o not in already_added)
+      already_added |= set(offsets.startup)
+      common.extend(o for o in offsets.common
+                     if o not in already_added)
+      already_added |= set(offsets.common)
+      interaction.extend(o for o in offsets.interaction
+                     if o not in already_added)
+      already_added |= set(offsets.interaction)
+
+    return OrderfilePhaseOffsets(
+        startup=startup, common=common, interaction=interaction)
+
+  def _GetCombinedProcessOffsets(self, process):
+    """Combine offsets across runs for a particular process.
+
+    Args:
+      process (str) The process to combine.
+
+    Returns:
+      OrderfilePhaseOffsets, the startup, common and interaction offsets for the
+      process in question. The offsets are sorted arbitrarily.
+    """
+    (startup, common, interaction) = ([], [], [])
+    assert self._profiles.GetPhases() == set([0,1]), 'Unexpected phases'
+    for o in self._GetAnnotatedOffsets():
+      startup_count = o.Count(0, process)
+      interaction_count = o.Count(1, process)
+      if not startup_count and not interaction_count:
+        continue
+      if startup_count and interaction_count:
+        common.append(o.Offset())
+      elif startup_count:
+        startup.append(o.Offset())
+      else:
+        interaction.append(o.Offset())
+    return OrderfilePhaseOffsets(
+        startup=startup, common=common, interaction=interaction)
+
+  def _GetAnnotatedOffsets(self):
+    if self._annotated_offsets is None:
+      self._annotated_offsets = self._profiles.GetAnnotatedOffsets()
+      self._processor.TranslateAnnotatedSymbolOffsets(self._annotated_offsets)
+    return self._annotated_offsets
+
+  def _GetProcessList(self):
+    if self._process_list is None:
+      self._process_list = set()
+      for o in self._GetAnnotatedOffsets():
+        self._process_list.update(o.Processes())
+    return self._process_list

  def _GetOrderfilePhaseOffsets(self):
    """Compute the phase offsets for each run.
@@ -129,7 +334,8 @@ class PhasedAnalyzer(object):
    if self._phase_offsets is not None:
      return self._phase_offsets

-    assert self._profiles.GetPhases() == set([0, 1]), 'Unexpected phases'
+    assert self._profiles.GetPhases() == set([0, 1]), (
+        'Unexpected phases {}'.format(self._profiles.GetPhases()))
    self._phase_offsets = []
    for first, second in zip(self._profiles.GetRunGroupOffsets(phase=0),
                             self._profiles.GetRunGroupOffsets(phase=1)):
@@ -175,10 +381,17 @@ def _CreateArgumentParser():
                      help=('Directory containing profile runs. Files '
                            'matching {} are used.'.format(PROFILE_GLOB)))
  parser.add_argument('--instrumented-build-dir', type=str,
-                      help='Path to the instrumented build', required=True)
+                      help='Path to the instrumented build (eg, out/Orderfile)',
+                      required=True)
  parser.add_argument('--library-name', default='libchrome.so',
                      help=('Chrome shared library name (usually libchrome.so '
                            'or libmonochrome.so'))
+  parser.add_argument('--offset-output-base', default=None, type=str,
+                      help=('If present, a base name to output offsets to. '
+                            'No offsets are output if this is missing. The '
+                            'base name is suffixed with _for_memory and '
+                            '_for_startup, corresponding to the two sets of '
+                            'offsets produced.'))
  return parser


@@ -186,12 +399,25 @@ def main():
  logging.basicConfig(level=logging.INFO)
  parser = _CreateArgumentParser()
  args = parser.parse_args()
-  profiles = process_profiles.ProfileManager(
-      glob.glob(os.path.join(args.profile_directory, PROFILE_GLOB)))
+  profiles = process_profiles.ProfileManager(itertools.chain.from_iterable(
+      glob.glob(os.path.join(d, PROFILE_GLOB))
+      for d in args.profile_directory.split(',')))
  processor = process_profiles.SymbolOffsetProcessor(os.path.join(
      args.instrumented_build_dir, 'lib.unstripped', args.library_name))
  phaser = PhasedAnalyzer(profiles, processor)
-  print 'Stability: {:.2f} {:.2f} {:.2f}'.format(*phaser.ComputeStability())
+  stability = phaser.ComputeStability()
+  print 'Stability: {:.2} {:.2} {:.2}'.format(*[s[0] for s in stability])
+  print 'Sizes: {} {} {}'.format(*[s[1] for s in stability])
+  if args.offset_output_base is not None:
+    for name, offsets in zip(
+        ['_for_memory', '_for_startup'],
+        [phaser.GetOffsetsForMemoryFootprint(),
+         phaser.GetOffsetsForStartup()]):
+      with file(args.offset_output_base + name, 'w') as output:
+        output.write('\n'.join(
+            str(i) for i in (offsets.startup + offsets.common +
+                             offsets.interaction)))
+        output.write('\n')


 if __name__ == '__main__':

--- a/tools/cygprofile/phased_orderfile_unittest.py
+++ b/tools/cygprofile/phased_orderfile_unittest.py
@@ -11,20 +11,47 @@ import unittest
 import phased_orderfile
 import process_profiles

-from test_utils import (SimpleTestSymbol,
+from test_utils import (ProfileFile,
+                        SimpleTestSymbol,
                        TestSymbolOffsetProcessor,
                        TestProfileManager)


-class Mod10Processor(object):
+class Mod10Processor(process_profiles.SymbolOffsetProcessor):
  """A restricted mock for a SymbolOffsetProcessor.

-  This only implements GetReachedOffsetsFromDump, and works by mapping a dump
-  offset to offset - (offset % 10). If the dump offset is negative, it is marked
-  as not found.
+  This only implements {Translate,Get}ReacheOffsetsFromDump, and works by
+  mapping a dump offset to offset - (offset % 10). If the dump offset is
+  negative, it is marked as not found.
  """
-  def GetReachedOffsetsFromDump(self, dump):
-    return [x - (x % 10) for x in dump if x >= 0]
+  def __init__(self):
+    super(Mod10Processor, self).__init__(None)
+
+  def _TranslateReachedOffsetsFromDump(self, items, get, update):
+    for i in items:
+      x = get(i)
+      if x >= 0:
+        update(i, x - (x % 10))
+      else:
+        update(i, None)
+
+
+class IdentityProcessor(process_profiles.SymbolOffsetProcessor):
+  """A restricted mock for a SymbolOffsetProcessor.
+
+  This only implements {Translate,Get}ReachedOffsetsFromDump, and maps the dump
+  offset to itself. If the dump offset is negative, it is marked as not found.
+  """
+  def __init__(self):
+    super(IdentityProcessor, self).__init__(None)
+
+  def _TranslateReachedOffsetsFromDump(self, items, get, update):
+    for i in items:
+      x = get(i)
+      if x >= 0:
+        update(i, x)
+      else:
+        update(i, None)


 class PhasedOrderfileTestCase(unittest.TestCase):
@@ -32,11 +59,6 @@ class PhasedOrderfileTestCase(unittest.TestCase):
  def setUp(self):
    self._file_counter = 0

-  def File(self, timestamp_sec, phase):
-    self._file_counter += 1
-    return 'file-{}-{}.txt_{}'.format(
-        self._file_counter, timestamp_sec * 1000 * 1000 * 1000, phase)
-
  def testProfileStability(self):
    symbols = [SimpleTestSymbol(str(i), i, 10)
               for i in xrange(20)]
@@ -46,7 +68,8 @@ class PhasedOrderfileTestCase(unittest.TestCase):
        startup=s, common=c, interaction=i)
    phaser._phase_offsets = [opo(range(5), range(6, 10), range(11,15)),
                             opo(range(4), range(6, 10), range(18, 20))]
-    self.assertEquals((1.25, 1, None), phaser.ComputeStability())
+    self.assertEquals((1.25, 1, None),
+                      tuple(s[0] for s in phaser.ComputeStability()))

  def testIsStable(self):
    symbols = [SimpleTestSymbol(str(i), i, 10)
@@ -64,12 +87,12 @@ class PhasedOrderfileTestCase(unittest.TestCase):

  def testGetOrderfilePhaseOffsets(self):
    mgr = TestProfileManager({
-        self.File(0, 0): [12, 21, -1, 33],
-        self.File(0, 1): [31, 49, 52],
-        self.File(100, 0): [113, 128],
-        self.File(200, 1): [132, 146],
-        self.File(300, 0): [19, 20, 32],
-        self.File(300, 1): [24, 39]})
+        ProfileFile(0, 0): [12, 21, -1, 33],
+        ProfileFile(0, 1): [31, 49, 52],
+        ProfileFile(100, 0): [113, 128],
+        ProfileFile(200, 1): [132, 146],
+        ProfileFile(300, 0): [19, 20, 32],
+        ProfileFile(300, 1): [24, 39]})
    phaser = phased_orderfile.PhasedAnalyzer(mgr, Mod10Processor())
    opo = lambda s, c, i: phased_orderfile.OrderfilePhaseOffsets(
        startup=s, common=c, interaction=i)
@@ -79,6 +102,47 @@ class PhasedOrderfileTestCase(unittest.TestCase):
                          opo([10], [20, 30], [])],
                         phaser._GetOrderfilePhaseOffsets())

+  def testGetCombinedProcessOffsets(self):
+    mgr = TestProfileManager({
+        ProfileFile(40, 0, ''): [1, 2, 3],
+        ProfileFile(50, 1, ''): [3, 4, 5],
+        ProfileFile(51, 0, 'renderer'): [2, 3, 6],
+        ProfileFile(51, 1, 'gpu-process'): [6, 7],
+        ProfileFile(70, 0, ''): [2, 8, 9],
+        ProfileFile(70, 1, ''): [9]})
+    phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
+    offsets = phaser._GetCombinedProcessOffsets('browser')
+    self.assertListEqual([1, 2, 8], sorted(offsets.startup))
+    self.assertListEqual([4, 5], sorted(offsets.interaction))
+    self.assertListEqual([3, 9], sorted(offsets.common))
+
+    offsets = phaser._GetCombinedProcessOffsets('gpu-process')
+    self.assertListEqual([], sorted(offsets.startup))
+    self.assertListEqual([6, 7], sorted(offsets.interaction))
+    self.assertListEqual([], sorted(offsets.common))
+
+    self.assertListEqual(['browser', 'gpu-process', 'renderer'],
+                         sorted(phaser._GetProcessList()))
+
+  def testGetOffsetVariations(self):
+    mgr = TestProfileManager({
+        ProfileFile(40, 0, ''): [1, 2, 3],
+        ProfileFile(50, 1, ''): [3, 4, 5],
+        ProfileFile(51, 0, 'renderer'): [2, 3, 6],
+        ProfileFile(51, 1, 'gpu-process'): [6, 7],
+        ProfileFile(70, 0, ''): [2, 6, 8, 9],
+        ProfileFile(70, 1, ''): [9]})
+    phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
+    offsets = phaser.GetOffsetsForMemoryFootprint()
+    self.assertListEqual([1, 2, 8], offsets.startup)
+    self.assertListEqual([6, 3, 9], offsets.common)
+    self.assertListEqual([4, 5, 7], offsets.interaction)
+
+    offsets = phaser.GetOffsetsForStartup()
+    self.assertListEqual([1, 2, 6, 8], offsets.startup)
+    self.assertListEqual([3, 9], offsets.common)
+    self.assertListEqual([4, 5, 7], offsets.interaction)
+

 if __name__ == "__main__":
  unittest.main()
--- a/tools/cygprofile/process_profiles.py
+++ b/tools/cygprofile/process_profiles.py
@@ -56,6 +56,7 @@ class SymbolOffsetProcessor(object):
    self._name_to_symbol = None
    self._offset_to_primary = None
    self._offset_to_symbols = None
+    self._offset_to_symbol_info = None

  def SymbolInfos(self):
    """The symbols associated with this processor's binary.
@@ -152,24 +153,14 @@ class SymbolOffsetProcessor(object):
    Returns:
      [int] Reached symbol offsets.
    """
-    dump_offset_to_symbol_info = self._GetDumpOffsetToSymbolInfo()
-    logging.info('Offset to Symbol size = %d', len(dump_offset_to_symbol_info))
-    assert max(dump) / 4 <= len(dump_offset_to_symbol_info)
-    already_seen = set()
    reached_offsets = []
-    reached_return_addresses_not_found = 0
-    for dump_offset in dump:
-      symbol_info = dump_offset_to_symbol_info[dump_offset / 4]
-      if symbol_info is None:
-        reached_return_addresses_not_found += 1
-        continue
-      if symbol_info.offset in already_seen:
-        continue
-      reached_offsets.append(symbol_info.offset)
-      already_seen.add(symbol_info.offset)
-    if reached_return_addresses_not_found:
-      logging.warning('%d return addresses don\'t map to any symbol',
-                      reached_return_addresses_not_found)
+    already_seen = set()
+    def update(_, symbol_offset):
+      if symbol_offset is None or symbol_offset in already_seen:
+        return
+      reached_offsets.append(symbol_offset)
+      already_seen.add(symbol_offset)
+    self._TranslateReachedOffsetsFromDump(dump, lambda x: x, update)
    return reached_offsets

  def MatchSymbolNames(self, symbol_names):
@@ -185,6 +176,52 @@ class SymbolOffsetProcessor(object):
    matched_names = our_symbol_names.intersection(set(symbol_names))
    return [self.NameToSymbolMap()[n] for n in matched_names]

+  def TranslateAnnotatedSymbolOffsets(self, annotated_offsets):
+    """Merges offsets across run groups and translates to symbol offsets.
+
+    Like GetReachedOffsetsFromDump, but works with AnnotatedOffsets.
+
+    Args:
+      annotated_offsets (AnnotatedOffset iterable) List of annotated offsets,
+        eg from ProfileManager.GetAnnotatedOffsets(). This will be mutated to
+        translate raw offsets to symbol offsets.
+    """
+    self._TranslateReachedOffsetsFromDump(
+        annotated_offsets,
+        lambda o: o.Offset(),
+        lambda o, symbol_offset: o.SetOffset(symbol_offset))
+
+  def _TranslateReachedOffsetsFromDump(self, items, get, update):
+    """Translate raw binary offsets to symbol offsets.
+
+    See GetReachedOffsetsFromDump for details. This version calls
+    |get(i)| on each element |i| of |items|, then calls
+    |update(i, symbol_offset)| with the updated offset. If the offset is not
+    found, update will be called with None.
+
+    Args:
+      items: (iterable) Items containing offsets.
+      get: (lambda item) As described above.
+      update: (lambda item, int) As described above.
+    """
+    dump_offset_to_symbol_info = self._GetDumpOffsetToSymbolInfo()
+    logging.info('Offset to Symbol size = %d', len(dump_offset_to_symbol_info))
+    reached_return_addresses_not_found = 0
+    for i in items:
+      dump_offset = get(i)
+      idx = dump_offset / 4
+      assert idx < len(dump_offset_to_symbol_info), (
+          'Dump offset out of binary range')
+      symbol_info = dump_offset_to_symbol_info[idx]
+      if symbol_info is None:
+        reached_return_addresses_not_found += 1
+        update(i, None)
+      else:
+        update(i, symbol_info.offset)
+    if reached_return_addresses_not_found:
+      logging.warning('%d return addresses don\'t map to any symbol',
+                      reached_return_addresses_not_found)
+
  def _GetDumpOffsetToSymbolInfo(self):
    """Computes an array mapping each word in .text to a symbol.

@@ -192,15 +229,16 @@ class SymbolOffsetProcessor(object):
      [symbol_extractor.SymbolInfo or None] For every 4 bytes of the .text
        section, maps it to a symbol, or None.
    """
-    min_offset = min(s.offset for s in self.SymbolInfos())
-    max_offset = max(s.offset + s.size for s in self.SymbolInfos())
-    text_length_words = (max_offset - min_offset) / 4
-    offset_to_symbol_info = [None for _ in xrange(text_length_words)]
-    for s in self.SymbolInfos():
-      offset = s.offset - min_offset
-      for i in range(offset / 4, (offset + s.size) / 4):
-        offset_to_symbol_info[i] = s
-    return offset_to_symbol_info
+    if self._offset_to_symbol_info is None:
+      min_offset = min(s.offset for s in self.SymbolInfos())
+      max_offset = max(s.offset + s.size for s in self.SymbolInfos())
+      text_length_words = (max_offset - min_offset) / 4
+      self._offset_to_symbol_info = [None for _ in xrange(text_length_words)]
+      for s in self.SymbolInfos():
+        offset = s.offset - min_offset
+        for i in range(offset / 4, (offset + s.size) / 4):
+          self._offset_to_symbol_info[i] = s
+    return self._offset_to_symbol_info


 class ProfileManager(object):
@@ -222,11 +260,11 @@ class ProfileManager(object):
  example the dump for the startup could be phase 0 and then the steady-state
  would be labeled phase 1.

-  We assume the files are named like *-TIMESTAMP.SUFFIX_PHASE, where TIMESTAMP
-  is in nanoseconds, SUFFIX is string without dashes, PHASE is an integer
-  numbering the phases as 0, 1, 2..., and the only dot is the one between
-  TIMESTAMP and SUFFIX. Note that the current dump filename also includes a
-  process id which is currently unused.
+  We assume the files are named like
+  profile-hitmap-PROCESS-PID-TIMESTAMP.SUFFIX_PHASE, where PROCESS is a possibly
+  empty string, PID is the process id, TIMESTAMP is in nanoseconds, SUFFIX is
+  string without dashes, PHASE is an integer numbering the phases as 0, 1, 2...,
+  and the only dot is the one between TIMESTAMP and SUFFIX.

  This manager supports several configurations of dumps.

@@ -242,6 +280,44 @@ class ProfileManager(object):
    time. This files can be grouped into run sets that are within 30 seconds of
    each other. Each run set is then grouped into phases as before.
  """
+  class AnnotatedOffset(object):
+    """Describes an offset with how it appeared in a profile set.
+
+    Each offset is annotated with the phase and process that it appeared in, and
+    can report how often it occurred in a specific phase and process.
+    """
+    def __init__(self, offset):
+      self._offset = offset
+      self._count = {}
+
+    def __str__(self):
+      return '{}: {}'.format(self._offset, self._count)
+
+    def __eq__(self, other):
+      if other is None:
+        return False
+      return (self._offset == other._offset and
+              self._count == other._count)
+
+    def Increment(self, phase, process):
+      key = (phase, process)
+      self._count[key] = self._count.setdefault(key, 0) + 1
+
+    def Count(self, phase, process):
+      return self._count.get((phase, process), 0)
+
+    def Processes(self):
+      return set(k[1] for k in self._count.iterkeys())
+
+    def Phases(self):
+      return set(k[0] for k in self._count.iterkeys())
+
+    def Offset(self):
+      return self._offset
+
+    def SetOffset(self, o):
+      self._offset = o
+
  class _RunGroup(object):
    RUN_GROUP_THRESHOLD_NS = 30e9

@@ -295,6 +371,22 @@ class ProfileManager(object):
    return self._GetOffsetsForGroup(f for f in self._filenames
                                    if self._Phase(f) == phase)

+  def GetAnnotatedOffsets(self):
+    """Merges offsets across run groups and annotates each one.
+
+    Returns:
+      [AnnotatedOffset]
+    """
+    offset_map = {}  # offset int -> AnnotatedOffset
+    for g in self._GetRunGroups():
+      for f in g:
+        phase = self._Phase(f)
+        process = self._ProcessName(f)
+        for offset in self._ReadOffsets(f):
+          offset_map.setdefault(offset, self.AnnotatedOffset(offset)).Increment(
+              phase, process)
+    return offset_map.values()
+
  def GetRunGroupOffsets(self, phase=None):
    """Merges files from each run group and returns offset list for each.

@@ -322,11 +414,21 @@ class ProfileManager(object):
      self._ComputeRunGroups()
    return [g.Filenames(phase) for g in self._run_groups]

+  @classmethod
+  def _ProcessName(cls, filename):
+    # The filename starts with 'profile-hitmap-' and ends with
+    # '-PID-TIMESTAMP.text_X'. Anything in between is the process name. The
+    # browser has an empty process name, which is insterted here.
+    process_name_parts = os.path.basename(filename).split('-')[2:-2]
+    if not process_name_parts:
+      return 'browser'
+    return '-'.join(process_name_parts)
+
  @classmethod
  def _Timestamp(cls, filename):
-      dash_index = filename.rindex('-')
-      dot_index = filename.rindex('.')
-      return int(filename[dash_index+1:dot_index])
+    dash_index = filename.rindex('-')
+    dot_index = filename.rindex('.')
+    return int(filename[dash_index+1:dot_index])

  @classmethod
  def _Phase(cls, filename):
@@ -347,6 +449,19 @@ class ProfileManager(object):
        g.Add(f)
        self._run_groups.append(g)

+    # Some sanity checks on the run groups.
+    assert self._run_groups
+    if len(self._run_groups) < 5:
+      return  # Small runs have too much variance for testing.
+    sizes = map(lambda g: len(g.Filenames()), self._run_groups)
+    avg_size = sum(sizes) / len(self._run_groups)
+    num_outliers = len([s for s in sizes
+                        if s > 1.5 * avg_size or s < 0.75 * avg_size])
+    expected_outliers = 0.1 * len(self._run_groups)
+    assert num_outliers < expected_outliers, (
+        'Saw {} outliers instead of at most {} for average of {}'.format(
+            num_outliers, expected_outliers, avg_size))
+

 def GetReachedOffsetsFromDumpFiles(dump_filenames, library_filename):
  """Produces a list of symbol offsets reached by the dumps.

--- a/tools/cygprofile/process_profiles_unittest.py
+++ b/tools/cygprofile/process_profiles_unittest.py
@@ -10,7 +10,8 @@ import unittest

 import process_profiles

-from test_utils import (SimpleTestSymbol,
+from test_utils import (ProfileFile,
+                        SimpleTestSymbol,
                        TestSymbolOffsetProcessor,
                        TestProfileManager)

@@ -28,10 +29,10 @@ class ProcessProfilesTestCase(unittest.TestCase):
                         self.symbol_2, self.symbol_3]
    self._file_counter = 0

-  def File(self, timestamp_sec, phase):
-    self._file_counter += 1
-    return 'file-{}-{}.txt_{}'.format(
-        self._file_counter, timestamp_sec * 1000 * 1000 * 1000, phase)
+  def MakeAnnotatedOffset(self, offset, counts):
+    ao = process_profiles.ProfileManager.AnnotatedOffset(offset)
+    ao._count = counts
+    return ao

  def testGetOffsetToSymbolInfo(self):
    processor = TestSymbolOffsetProcessor(self.symbol_infos)
@@ -103,8 +104,9 @@ class ProcessProfilesTestCase(unittest.TestCase):
    self.assertEquals(5, process_profiles._Median([1, 4, 5, 6, 100]))

  def testRunGroups(self):
-    files = [self.File(40, 0), self.File(100, 0), self.File(200, 1),
-             self.File(35, 1), self.File(42, 0), self.File(95, 0)]
+    files = [ProfileFile(40, 0), ProfileFile(100, 0),
+             ProfileFile(200, 1), ProfileFile(35, 1),
+             ProfileFile(42, 0), ProfileFile(95, 0)]
    mgr = process_profiles.ProfileManager(files)
    mgr._ComputeRunGroups()
    self.assertEquals(3, len(mgr._run_groups))
@@ -118,11 +120,34 @@ class ProcessProfilesTestCase(unittest.TestCase):
    self.assertTrue(files[5] in mgr._run_groups[1].Filenames())
    self.assertTrue(files[2] in mgr._run_groups[2].Filenames())

+  def testRunGroupSanity(self):
+    files = []
+    # Generate 20 sets of files in groups separated by 60s.
+    for ts_base in xrange(0, 20):
+      ts = ts_base * 60
+      files.extend([ProfileFile(ts, 0, 'browser'),
+                    ProfileFile(ts + 1, 0, 'renderer'),
+                    ProfileFile(ts + 2, 1, 'browser'),
+                    ProfileFile(ts + 3, 0, 'gpu'),
+                    ProfileFile(ts + 2, 1, 'renderer'),
+                    ProfileFile(ts + 5, 1, 'gpu')])
+    # The following call should not assert.
+    process_profiles.ProfileManager(files)._ComputeRunGroups()
+
+    files.extend([ProfileFile(20 * 60, 0, 'browser'),
+                  ProfileFile(20 * 60 + 2, 1, 'renderer'),
+                  ProfileFile(21 * 60, 0, 'browser')] +
+                 [ProfileFile(22 * 60, 0, 'renderer')
+                  for _ in xrange(0, 10)])
+
+    self.assertRaises(AssertionError,
+                      process_profiles.ProfileManager(files)._ComputeRunGroups)
+
  def testReadOffsets(self):
    mgr = TestProfileManager({
-        self.File(30, 0): [1, 3, 5, 7],
-        self.File(40, 1): [8, 10],
-        self.File(50, 0): [13, 15]})
+        ProfileFile(30, 0): [1, 3, 5, 7],
+        ProfileFile(40, 1): [8, 10],
+        ProfileFile(50, 0): [13, 15]})
    self.assertListEqual([1, 3, 5, 7, 8, 10, 13, 15],
                         mgr.GetMergedOffsets())
    self.assertListEqual([8, 10], mgr.GetMergedOffsets(1))
@@ -130,9 +155,9 @@ class ProcessProfilesTestCase(unittest.TestCase):

  def testRunGroupOffsets(self):
    mgr = TestProfileManager({
-        self.File(30, 0): [1, 2, 3, 4],
-        self.File(150, 0): [9, 11, 13],
-        self.File(40, 1): [5, 6, 7]})
+        ProfileFile(30, 0): [1, 2, 3, 4],
+        ProfileFile(150, 0): [9, 11, 13],
+        ProfileFile(40, 1): [5, 6, 7]})
    offsets_list = mgr.GetRunGroupOffsets()
    self.assertEquals(2, len(offsets_list))
    self.assertListEqual([1, 2, 3, 4, 5, 6, 7], offsets_list[0])
@@ -150,22 +175,54 @@ class ProcessProfilesTestCase(unittest.TestCase):
    # The fact that the ProfileManager sorts by filename is implicit in the
    # other tests. It is tested explicitly here.
    mgr = TestProfileManager({
-        self.File(40, 0): [1, 2, 3, 4],
-        self.File(150, 0): [9, 11, 13],
-        self.File(30, 1): [5, 6, 7]})
+        ProfileFile(40, 0): [1, 2, 3, 4],
+        ProfileFile(150, 0): [9, 11, 13],
+        ProfileFile(30, 1): [5, 6, 7]})
    offsets_list = mgr.GetRunGroupOffsets()
    self.assertEquals(2, len(offsets_list))
    self.assertListEqual([5, 6, 7, 1, 2, 3, 4], offsets_list[0])

  def testPhases(self):
    mgr = TestProfileManager({
-        self.File(40, 0): [],
-        self.File(150, 0): [],
-        self.File(30, 1): [],
-        self.File(30, 2): [],
-        self.File(30, 0): []})
+        ProfileFile(40, 0): [],
+        ProfileFile(150, 0): [],
+        ProfileFile(30, 1): [],
+        ProfileFile(30, 2): [],
+        ProfileFile(30, 0): []})
    self.assertEquals(set([0,1,2]), mgr.GetPhases())

+  def testGetAnnotatedOffsets(self):
+    mgr = TestProfileManager({
+        ProfileFile(40, 0, ''): [1, 2, 3],
+        ProfileFile(50, 1, ''): [3, 4, 5],
+        ProfileFile(51, 0, 'renderer'): [2, 3, 6],
+        ProfileFile(51, 1, 'gpu-process'): [6, 7],
+        ProfileFile(70, 0, ''): [2, 8, 9],
+        ProfileFile(70, 1, ''): [9]})
+    offsets = mgr.GetAnnotatedOffsets()
+    self.assertListEqual([
+        self.MakeAnnotatedOffset(1, {(0, 'browser'): 1}),
+        self.MakeAnnotatedOffset(2, {(0, 'browser'): 2,
+                                     (0, 'renderer'): 1}),
+        self.MakeAnnotatedOffset(3, {(0, 'browser'): 1,
+                                     (1, 'browser'): 1,
+                                     (0, 'renderer'): 1}),
+        self.MakeAnnotatedOffset(4, {(1, 'browser'): 1}),
+        self.MakeAnnotatedOffset(5, {(1, 'browser'): 1}),
+        self.MakeAnnotatedOffset(6, {(0, 'renderer'): 1,
+                                     (1, 'gpu-process'): 1}),
+        self.MakeAnnotatedOffset(7, {(1, 'gpu-process'): 1}),
+        self.MakeAnnotatedOffset(8, {(0, 'browser'): 1}),
+        self.MakeAnnotatedOffset(9, {(0, 'browser'): 1,
+                                     (1, 'browser'): 1})],
+                         offsets)
+    self.assertListEqual(['browser', 'renderer'],
+                         sorted(offsets[1].Processes()))
+    self.assertListEqual(['browser'], list(offsets[0].Processes()))
+    self.assertListEqual([0], list(offsets[1].Phases()))
+    self.assertListEqual([0, 1], sorted(offsets[2].Phases()))
+    self.assertListEqual([0, 1], sorted(mgr.GetPhases()))
+

 if __name__ == '__main__':
  unittest.main()
--- a/tools/cygprofile/test_utils.py
+++ b/tools/cygprofile/test_utils.py
@@ -8,6 +8,9 @@ import collections

 import process_profiles

+# Used by ProfileFile to generate unique file names.
+_FILE_COUNTER = 0
+
 SimpleTestSymbol = collections.namedtuple(
    'SimpleTestSymbol', ['name', 'offset', 'size'])

@@ -25,3 +28,14 @@ class TestProfileManager(process_profiles.ProfileManager):

  def _ReadOffsets(self, filename):
    return self._filecontents_mapping[filename]
+
+
+def ProfileFile(timestamp_sec, phase, process_name=None):
+  global _FILE_COUNTER
+  _FILE_COUNTER += 1
+  if process_name:
+    name_str = process_name + '-'
+  else:
+    name_str = ''
+  return 'test-directory/profile-hitmap-{}{}-{}.txt_{}'.format(
+      name_str, _FILE_COUNTER, timestamp_sec * 1000 * 1000 * 1000, phase)