Updated the orderfile generation pipeline to support call graph.

Updated the orderfile pipeline to support generation of an orderfile based on the new instrumentation call graph information. To enable using the call graph instrumention, set |--use-call-graph|. This CL uses the counts of how many a pair of (caller, callee) was recorded to define the distance between them. It then constructs the neighbors, and uses the existing clustering algorithm to produce the orderfile. Bug: 963833 Change-Id: Idd07a7a0aa5e00d87581fc68fe60374fe426d24c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1615022Reviewed-by: Matthew Cary <mattcary@chromium.org> Commit-Queue: Monica Basta <msalama@google.com> Cr-Commit-Position: refs/heads/master@{#661708}

Updated the orderfile generation pipeline to support call graph.
Updated the orderfile pipeline to support generation of an orderfile based on the new instrumentation call graph information. To enable using the call graph instrumention, set |--use-call-graph|. This CL uses the counts of how many a pair of (caller, callee) was recorded to define the distance between them. It then constructs the neighbors, and uses the existing clustering algorithm to produce the orderfile. Bug: 963833 Change-Id: Idd07a7a0aa5e00d87581fc68fe60374fe426d24c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1615022Reviewed-by: Matthew Cary <mattcary@chromium.org> Commit-Queue: Monica Basta <msalama@google.com> Cr-Commit-Position: refs/heads/master@{#661708}
99c101fa · Monica Basta · Commit Bot · 13fc8667 · 99c101fa · 99c101fa
Commit 99c101fa authored May 21, 2019 by Monica Basta Committed by Commit Bot May 21, 2019
6 changed files
--- a/base/android/orderfile/orderfile_call_graph_instrumentation.cc
+++ b/base/android/orderfile/orderfile_call_graph_instrumentation.cc
@@ -81,6 +81,7 @@ std::atomic<uint32_t> g_caller_count[kMaxReachedSymbols * kTotalBuckets];
 static_assert(sizeof(g_caller_count) == 16 * (1 << 20), "");
 // Index for |g_caller_offset| and |g_caller_count|.
 std::atomic<uint32_t> g_callers_index;
+std::atomic<uint32_t> g_calls_count;
 std::atomic<bool> g_disabled;

 #if BUILDFLAG(DEVTOOLS_INSTRUMENTATION_DUMPING)
@@ -214,6 +215,7 @@ __attribute__((always_inline, no_instrument_function)) void RecordAddress(
    callers_index = expected == 0 ? insertion_index : expected;
  }

+  AtomicIncrement(&g_calls_count);
  callers_index *= kTotalBuckets;
  if (caller_address <= start || caller_address > end ||
      !RecordCaller(callers_index, caller_address - start)) {
@@ -239,7 +241,11 @@ NO_INSTRUMENT_FUNCTION bool DumpToFile(const base::FilePath& path) {

  // This can get very large as it  constructs the whole data structure in
  // memory before dumping it to the file.
-  ListValue root;
+  DictionaryValue root;
+  uint32_t total_calls_count = g_calls_count.load(std::memory_order_relaxed);
+  root.SetStringKey("total_calls_count",
+                    base::StringPrintf("%" PRIu32, total_calls_count));
+  ListValue call_graph;
  for (size_t i = 0; i < kMaxElements; i++) {
    auto caller_index =
        callee_map[i].load(std::memory_order_relaxed) * kTotalBuckets;
@@ -280,12 +286,12 @@ NO_INSTRUMENT_FUNCTION bool DumpToFile(const base::FilePath& path) {
      callers_list.GetList().push_back(std::move(caller_count));
    }
    callee_element.SetKey("caller_and_count", std::move(callers_list));
-    root.GetList().push_back(std::move(callee_element));
+    call_graph.GetList().push_back(std::move(callee_element));
  }

+  root.SetKey("call_graph", std::move(call_graph));
  std::string output_js;
-  if (!JSONWriter::WriteWithOptions(root, JSONWriter::OPTIONS_PRETTY_PRINT,
-                                    &output_js)) {
+  if (!JSONWriter::Write(root, &output_js)) {
    LOG(FATAL) << "Error getting JSON string";
  }
  if (file.WriteAtCurrentPos(output_js.c_str(),

--- a/tools/cygprofile/cluster.py
+++ b/tools/cygprofile/cluster.py
@@ -11,8 +11,11 @@ import collections
 import itertools
 import logging

-
 Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
+CalleeInfo = collections.namedtuple('CalleeInfo',
+                                    ('index', 'callee_symbol',
+                                     'misses', 'caller_and_count'))
+CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))


 class Clustering(object):
@@ -86,6 +89,12 @@ class Clustering(object):
    c.AddSymbolLists(sym_lists)
    return c.ClusterToList(size_map)

+  @classmethod
+  def ClusterSymbolCallGraph(cls, call_graph, whitelist):
+    c = cls()
+    c.AddSymbolCallGraph(call_graph, whitelist)
+    return c.ClusterToList()
+
  def __init__(self):
    self._num_lists = None
    self._neighbors = None
@@ -127,6 +136,39 @@ class Clustering(object):
    self._neighbors = self._CoalesceNeighbors(
        self._ConstructNeighbors(sym_lists))

+  def AddSymbolCallGraph(self, call_graph, whitelist):
+    self._num_lists = len(call_graph)
+    self._neighbors = self._ConstructNeighborsFromGraph(call_graph, whitelist)
+
+  def _ConstructNeighborsFromGraph(self, call_graph, whitelist):
+    neighbors = []
+    pairs = collections.defaultdict()
+    # Each list item is a list of dict.
+    for process_items in call_graph:
+      for callee_info in process_items:
+        callee = callee_info.callee_symbol
+        for caller_info in callee_info.caller_and_count:
+          caller = caller_info.caller_symbol
+          if caller in whitelist or callee == caller:
+            continue
+
+          # Multiply by -1, the biggest the count the smaller the distance
+          # should be.
+          dist = caller_info.count * -1
+          if (caller, callee) in pairs:
+            pairs[(caller, callee)] += dist
+          elif (callee, caller) in pairs:
+            pairs[(callee, caller)] += dist
+          else:
+            pairs[(caller, callee)] = dist
+
+    for (s, t) in pairs:
+      assert s != t and (t, s) not in pairs, ('Unexpected shuffled pair:'
+                                              ' ({}, {})'.format(s, t))
+      neighbors.append(Neighbor(s, t, pairs[(s, t)]))
+
+    return neighbors
+
  def _ConstructNeighbors(self, sym_lists):
    neighbors = []
    for sym_list in sym_lists:
@@ -219,17 +261,99 @@ class Clustering(object):
    assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
    return ordered_syms

-
-def ClusterOffsets(profiles, processor, limit_cluster_size=False):
-  """Cluster profile offsets.
+def _GetOffsetSymbolName(processor, dump_offset):
+  dump_offset_to_symbol_info = \
+    processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
+  offset_to_primary = processor.OffsetToPrimaryMap()
+  idx = dump_offset / 2
+  assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
+      'Dump offset out of binary range')
+  symbol_info = dump_offset_to_symbol_info[idx]
+  assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
+                       'to any symbol'.format(dump_offset))
+  assert symbol_info.offset in offset_to_primary, (
+      'Offset not found in primary map!')
+  return offset_to_primary[symbol_info.offset].name
+
+def _GetSymbolsCallGraph(profiles, processor):
+  """Maps each offset in the call graph to the corresponding symbol name.

  Args:
    profiles (ProfileManager) Manager of the profile dump files.
    processor (SymbolOffsetProcessor) Symbol table processor for the dumps.

  Returns:
-    A list of clustered symbol offsets.
+    A dict that maps each process type (ex: browser, renderer, etc.) to a list
+    of processes of that type. Each process is a list that contains the
+    call graph information. The call graph is represented by a list where each
+    item is a dict that contains: callee, 3 caller-count pairs, misses.
  """
+  offsets_graph = profiles.GetProcessOffsetGraph();
+  process_symbols_graph = collections.defaultdict(list)
+
+  # |process_type| can be : browser, renderer...etc.
+  for process_type in offsets_graph:
+    for process in offsets_graph[process_type]:
+      process = sorted(process, key=lambda k: long(k['index']))
+      graph_list = []
+      for el in process:
+        index = long(el['index'])
+        callee_symbol = _GetOffsetSymbolName(processor,
+                                             long(el['callee_offset']))
+        misses = 0
+        caller_and_count = []
+        for bucket in el['caller_and_count']:
+          caller_offset = long(bucket['caller_offset'])
+          count = long(bucket['count'])
+          if caller_offset == 0:
+            misses = count
+            continue
+
+          caller_symbol_name = _GetOffsetSymbolName(processor, caller_offset)
+          caller_info = CallerInfo(caller_symbol=caller_symbol_name,
+                                   count=count)
+          caller_and_count.append(caller_info)
+
+        callee_info = CalleeInfo(index=index,
+                                 callee_symbol=callee_symbol,
+                                 misses=misses,
+                                 caller_and_count=caller_and_count)
+        graph_list.append(callee_info)
+      process_symbols_graph[process_type].append(graph_list)
+  return process_symbols_graph
+
+def _ClusterOffsetsFromCallGraph(profiles, processor):
+  symbols_call_graph = _GetSymbolsCallGraph(profiles, processor)
+  # Process names from the profile dumps that are treated specially.
+  _RENDERER = 'renderer'
+  _BROWSER = 'browser'
+
+  assert _RENDERER in symbols_call_graph
+  assert _BROWSER in symbols_call_graph
+  whitelist = processor.GetWhitelistSymbols()
+  renderer_clustering = Clustering.ClusterSymbolCallGraph(
+      symbols_call_graph[_RENDERER], whitelist)
+  browser_clustering = Clustering.ClusterSymbolCallGraph(
+      symbols_call_graph[_BROWSER], whitelist)
+  other_lists = []
+  for process in symbols_call_graph:
+    if process not in (_RENDERER, _BROWSER):
+      other_lists.extend(symbols_call_graph[process])
+  if other_lists:
+    other_clustering = Clustering.ClusterSymbolCallGraph(other_lists, whitelist)
+  else:
+    other_clustering = []
+
+  # Start with the renderer cluster to favor rendering performance.
+  final_ordering = [s for s in renderer_clustering]
+  seen = set(final_ordering)
+  final_ordering.extend(s for s in browser_clustering if s not in seen)
+  seen |= set(browser_clustering)
+  final_ordering.extend(s for s in other_clustering if s not in seen)
+
+  return final_ordering
+
+def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
  raw_offsets = profiles.GetProcessOffsetLists()
  process_symbols = collections.defaultdict(list)
  seen_symbols = set()
@@ -273,3 +397,20 @@ def ClusterOffsets(profiles, processor, limit_cluster_size=False):
  final_ordering.extend(s for s in other_clustering if s not in seen)

  return final_ordering
+
+def ClusterOffsets(profiles, processor, limit_cluster_size=False,
+                   call_graph=False):
+  """Cluster profile offsets.
+
+  Args:
+    profiles (ProfileManager) Manager of the profile dump files.
+    processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
+    call_graph (bool) whether the call graph instrumentation was used.
+
+  Returns:
+    A list of clustered symbol offsets.
+"""
+  if not call_graph:
+    return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)
+  else:
+    return _ClusterOffsetsFromCallGraph(profiles, processor)
--- a/tools/cygprofile/cluster_unittest.py
+++ b/tools/cygprofile/cluster_unittest.py
@@ -6,6 +6,7 @@
 """Tests for cluster.py."""

 import unittest
+import json

 import cluster
 import process_profiles
@@ -103,7 +104,6 @@ class ClusteringTestCase(unittest.TestCase):
                      list('badf'), list('baef')])
    self.assertEqual(list('fedcba'), c.ClusterToList(size_map))

-
  def testClusterOffsets(self):
    processor = TestSymbolOffsetProcessor([
        SimpleTestSymbol('linker_script_start_of_text', 0, 0),
@@ -124,12 +124,239 @@ class ClusteringTestCase(unittest.TestCase):
        ProfileFile(51, 1, 'gpu-process'): [6000, 7000],
        ProfileFile(70, 0, ''): [1000, 2000, 6000, 8000, 9000],
        ProfileFile(70, 1, ''): [9000, 5000, 3000]})
-    syms = cluster.ClusterOffsets(mgr, processor, False)
+    syms = cluster.ClusterOffsets(mgr, processor, limit_cluster_size=False)
    self.assertListEqual(list('236148957'), syms)

-    syms = cluster.ClusterOffsets(mgr, processor, True)
+    syms = cluster.ClusterOffsets(mgr, processor, limit_cluster_size=True)
    self.assertListEqual(list('236489517'), syms)

+  def testClusteringDistancesForCallGraph(self):
+    c = cluster.Clustering()
+    callerA = cluster.CallerInfo(caller_symbol='a', count=1)
+    callerB = cluster.CallerInfo(caller_symbol='b', count=2)
+    callerC = cluster.CallerInfo(caller_symbol='c', count=3)
+    callerD = cluster.CallerInfo(caller_symbol='d', count=100)
+    callerE = cluster.CallerInfo(caller_symbol='e', count=200)
+
+    calleeA = cluster.CalleeInfo(index=4, callee_symbol='a', misses=0,
+                                 caller_and_count=[])
+    calleeB = cluster.CalleeInfo(index=8, callee_symbol='b', misses=1,
+                                 caller_and_count=[callerA])
+    calleeC = cluster.CalleeInfo(index=12, callee_symbol='c', misses=1,
+                                 caller_and_count=[callerA, callerE])
+    calleeD = cluster.CalleeInfo(index=20, callee_symbol='d', misses=1,
+                                 caller_and_count=[callerB, callerC, callerE])
+    calleeF = cluster.CalleeInfo(index=28, callee_symbol='f', misses=10,
+                                 caller_and_count=[callerD])
+    process1 = [calleeA, calleeB, calleeC, calleeD]
+    process2 = [calleeA, calleeB, calleeC, calleeD, calleeF]
+    call_graph = [process1, process2]
+    whitelist = ['e', 'g', 'h', 'k', 'l']
+    c.AddSymbolCallGraph(call_graph, whitelist)
+    distances = {}
+    for n in c._neighbors:
+      self.assertFalse((n.src, n.dst) in distances)
+      distances[(n.src, n.dst)] = n.dist
+    self.assertEqual(5, len(distances))
+    self.assertEquals(-2, distances[('a', 'b')])
+    self.assertEquals(-2, distances[('a', 'c')])
+    self.assertEquals(-4, distances[('b', 'd')])
+    self.assertEquals(-6, distances[('c', 'd')])
+    self.assertEquals(-100, distances[('d', 'f')])
+    self.assertEquals(list('abcdf'), c.ClusterToList())
+
+  def testClusterOffsetsFromCallGraph(self):
+    process1 = ('{"call_graph": [ {'
+                  '"callee_offset": "1000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "0",'
+                    '"count": "2"'
+                  '} ],'
+                  '"index": "61496"'
+                '}, {'
+                  '"callee_offset": "7000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "1000",'
+                    '"count": "2"'
+                  '}, {'
+                    '"caller_offset": "7500",'
+                    '"count": "100"'
+                  '} ],'
+                  '"index": "61500"'
+                '}, {'
+                  '"callee_offset": "6000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "1000",'
+                    '"count": "4"'
+                  '}, {'
+                    '"caller_offset": "7000",'
+                    '"count": "3"'
+                  '}, {'
+                    '"caller_offset": "7500",'
+                    '"count": "2"'
+                  '}, {'
+                    '"caller_offset": "0",'
+                    '"count": "3"'
+                  '} ],'
+                  '"index": "47860"'
+                '}, {'
+                  '"callee_offset": "3000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "6000",'
+                    '"count": "11"'
+                  '} ],'
+                  '"index": "47900"'
+                '} ],'
+                '"total_calls_count": "127"'
+                '}')
+
+    process2 = ('{"call_graph": [ {'
+                  '"callee_offset": "1000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "0",'
+                    '"count": "2"'
+                  '} ],'
+                  '"index": "61496"'
+                  '}, {'
+                  '"callee_offset": "5000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "1000",'
+                    '"count": "20"'
+                  '}, {'
+                    '"caller_offset": "5000",'
+                    '"count": "100"'
+                  '}, {'
+                    '"caller_offset": "3000",'
+                    '"count": "40"'
+                  '} ],'
+                  '"index": "61500"'
+                '}, {'
+                  '"callee_offset": "3000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "5000",'
+                    '"count": "10"'
+                  '}, {'
+                    '"caller_offset": "0",'
+                    '"count": "10"'
+                  '} ],'
+                  '"index": "47860"'
+                '} ],'
+                '"total_calls_count": "182"'
+                '}')
+
+    process3 = ('{"call_graph": [ {'
+                  '"callee_offset": "8000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "0",'
+                    '"count": "5"'
+                  '} ],'
+                  '"index": "61496"'
+                  '}, {'
+                  '"callee_offset": "2000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "8000",'
+                    '"count": "100"'
+                  '} ],'
+                  '"index": "61500"'
+                  '}, {'
+                  '"callee_offset": "4000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "8000",'
+                    '"count": "20"'
+                  '} ],'
+                  '"index": "61504"'
+                  '}, {'
+                  '"callee_offset": "9000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "8000",'
+                    '"count": "50"'
+                  '} ],'
+                  '"index": "61512"'
+                  '}, {'
+                  '"callee_offset": "7000",'
+                  '"caller_and_count": [ {'
+                    '"caller_offset": "2000",'
+                    '"count": "15"'
+                  '}, {'
+                    '"caller_offset": "4000",'
+                    '"count": "20"'
+                  '}, {'
+                    '"caller_offset": "9000",'
+                    '"count": "80"'
+                  '}, {'
+                    '"caller_offset": "0",'
+                    '"count": "400"'
+                  '} ],'
+                  '"index": "61516"'
+                  '} ],'
+                  '"total_calls_count": "690"'
+                  '}')
+
+    process4 = ('{"call_graph": [ {'
+                '"callee_offset": "8000",'
+                '"caller_and_count": [ {'
+                  '"caller_offset": "0",'
+                  '"count": "10"'
+                '} ],'
+                '"index": "61496"'
+                '}, {'
+                '"callee_offset": "2000",'
+                '"caller_and_count": [ {'
+                  '"caller_offset": "8000",'
+                  '"count": "100"'
+                '} ],'
+                '"index": "61500"'
+                '}, {'
+                '"callee_offset": "6000",'
+                '"caller_and_count": [ {'
+                  '"caller_offset": "7000",'
+                  '"count": "10"'
+                '} , {'
+                  '"caller_offset": "7500",'
+                  '"count": "2"'
+                '} ],'
+                '"index": "61504"'
+                '}, {'
+                '"callee_offset": "7000",'
+                '"caller_and_count": [ {'
+                  '"caller_offset": "8000",'
+                  '"count": "300"'
+                '}, {'
+                  '"caller_offset": "7500",'
+                  '"count": "100"'
+                '}, {'
+                  '"caller_offset": "2000",'
+                  '"count": "15"'
+                '}, {'
+                  '"caller_offset": "0",'
+                  '"count": "50"'
+                '} ],'
+                '"index": "61516"'
+                '} ],'
+                '"total_calls_count": "587"'
+                '}')
+
+    processor = TestSymbolOffsetProcessor([
+        SimpleTestSymbol('linker_script_start_of_text', 0, 0),
+        SimpleTestSymbol('1', 1000, 999),
+        SimpleTestSymbol('2', 2000, 999),
+        SimpleTestSymbol('3', 3000, 999),
+        SimpleTestSymbol('4', 4000, 16),
+        SimpleTestSymbol('5', 5000, 16),
+        SimpleTestSymbol('6', 6000, 999),
+        SimpleTestSymbol('7', 7000, 16),
+        SimpleTestSymbol('8', 7100, 0),  # whitelist
+        SimpleTestSymbol('9', 8000, 999),
+        SimpleTestSymbol('10', 9000, 16)])
+    mgr = TestProfileManager({
+        ProfileFile(40, 0, 'renderer'): json.loads(process1),
+        ProfileFile(50, 1, 'renderer'): json.loads(process2),
+        ProfileFile(51, 0, 'browser'): json.loads(process3),
+        ProfileFile(51, 1, 'gpu-process'): json.loads(process4)})
+    syms = cluster.ClusterOffsets(mgr, processor, limit_cluster_size=False,
+                                  call_graph=True)
+    self.assertListEqual(['7', '6', '1', '5', '3', '9', '2', '10', '4'], syms)
+


 if __name__ == "__main__":

--- a/tools/cygprofile/orderfile_generator_backend.py
+++ b/tools/cygprofile/orderfile_generator_backend.py
@@ -273,14 +273,17 @@ class ClankCompiler(object):
        '{}.so'.format(self._libname))
    self.chrome_apk = os.path.join(self._out_dir, 'Release', 'apks', self._apk)

-  def Build(self, instrumented, target):
+  def Build(self, instrumented, use_call_graph, target):
    """Builds the provided ninja target with or without order_profiling on.

    Args:
      instrumented: (bool) Whether we want to build an instrumented binary.
+      use_call_graph: (bool) Whether to use the call graph instrumentation.
      target: (str) The name of the ninja target to build.
    """
    self._step_recorder.BeginStep('Compile %s' % target)
+    assert not use_call_graph or instrumented, ('You can not enable call graph '
+                                                'without instrumentation!')

    # Set the "Release Official" flavor, the parts affecting performance.
    args = [
@@ -291,6 +294,7 @@ class ClankCompiler(object):
        'target_os="android"',
        'use_goma=' + str(self._use_goma).lower(),
        'use_order_profiling=' + str(instrumented).lower(),
+        'use_call_graph=' + str(use_call_graph).lower(),
    ]
    args += _ARCH_GN_ARGS[self._arch]
    if self._goma_dir:
@@ -313,27 +317,29 @@ class ClankCompiler(object):
        ['ninja', '-C', os.path.join(self._out_dir, 'Release'),
         '-j' + str(self._jobs), '-l' + str(self._max_load), target])

-  def CompileChromeApk(self, instrumented, force_relink=False):
+  def CompileChromeApk(self, instrumented, use_call_graph, force_relink=False):
    """Builds a Chrome.apk either with or without order_profiling on.

    Args:
      instrumented: (bool) Whether to build an instrumented apk.
+      use_call_graph: (bool) Whether to use the call graph instrumentation.
      force_relink: Whether libchromeview.so should be re-created.
    """
    if force_relink:
      self._step_recorder.RunCommand(['rm', '-rf', self.lib_chrome_so])
-    self.Build(instrumented, self._apk_target)
+    self.Build(instrumented, use_call_graph, self._apk_target)

-  def CompileLibchrome(self, instrumented, force_relink=False):
+  def CompileLibchrome(self, instrumented, use_call_graph, force_relink=False):
    """Builds a libchrome.so either with or without order_profiling on.

    Args:
      instrumented: (bool) Whether to build an instrumented apk.
+      use_call_graph: (bool) Whether to use the call graph instrumentation.
      force_relink: (bool) Whether libchrome.so should be re-created.
    """
    if force_relink:
      self._step_recorder.RunCommand(['rm', '-rf', self.lib_chrome_so])
-    self.Build(instrumented, self._libchrome_target)
+    self.Build(instrumented, use_call_graph, self._libchrome_target)


 class OrderfileUpdater(object):
@@ -567,9 +573,11 @@ class OrderfileGenerator(object):

  def __init__(self, options, orderfile_updater_class):
    self._options = options
-
    self._instrumented_out_dir = os.path.join(
        self._BUILD_ROOT, self._options.arch + '_instrumented_out')
+    if self._options.use_call_graph:
+        self._instrumented_out_dir += '_call_graph'
+
    self._uninstrumented_out_dir = os.path.join(
        self._BUILD_ROOT, self._options.arch + '_uninstrumented_out')
    self._no_orderfile_out_dir = os.path.join(
@@ -677,7 +685,8 @@ class OrderfileGenerator(object):
    profiles = process_profiles.ProfileManager(files)
    processor = process_profiles.SymbolOffsetProcessor(
        self._compiler.lib_chrome_so)
-    ordered_symbols = cluster.ClusterOffsets(profiles, processor)
+    ordered_symbols = cluster.ClusterOffsets(profiles, processor,
+        call_graph=self._options.use_call_graph)
    if not ordered_symbols:
      raise Exception('Failed to get ordered symbols')
    for sym in ordered_symbols:
@@ -945,7 +954,9 @@ class OrderfileGenerator(object):
        open(orderfile_path, 'w').close()

      # Build APK to be installed on the device.
-      self._compiler.CompileChromeApk(False, force_relink=True)
+      self._compiler.CompileChromeApk(instrumented=False,
+                                      use_call_graph=False,
+                                      force_relink=True)
      benchmark_results = dict()
      benchmark_results['Speedometer2.0'] = self._PerformanceBenchmark(
          self._compiler.chrome_apk)
@@ -991,7 +1002,9 @@ class OrderfileGenerator(object):
          # If there are pregenerated profiles, the instrumented build should
          # not be changed to avoid invalidating the pregenerated profile
          # offsets.
-          self._compiler.CompileChromeApk(True)
+          self._compiler.CompileChromeApk(instrumented=True,
+                                          use_call_graph=
+                                          self._options.use_call_graph)
        self._GenerateAndProcessProfile()
        self._MaybeArchiveOrderfile(self._GetUnpatchedOrderfileFilename())
        profile_uploaded = True
@@ -1026,14 +1039,17 @@ class OrderfileGenerator(object):
            self._options.system_health_orderfile, self._monochrome,
            self._options.public, self._GetPathToOrderfile())

-        self._compiler.CompileLibchrome(False)
+        self._compiler.CompileLibchrome(instrumented=False,
+                                        use_call_graph=False)
        self._PatchOrderfile()
        # Because identical code folding is a bit different with and without
        # the orderfile build, we need to re-patch the orderfile with code
        # folding as close to the final version as possible.
-        self._compiler.CompileLibchrome(False, force_relink=True)
+        self._compiler.CompileLibchrome(instrumented=False,
+                                        use_call_graph=False, force_relink=True)
        self._PatchOrderfile()
-        self._compiler.CompileLibchrome(False, force_relink=True)
+        self._compiler.CompileLibchrome(instrumented=False,
+                                        use_call_graph=False, force_relink=True)
        self._VerifySymbolOrder()
        self._MaybeArchiveOrderfile(self._GetPathToOrderfile())
      finally:
@@ -1175,7 +1191,8 @@ def CreateArgumentParser():
                            'checkout; performs no other action'))
  parser.add_argument('--new-commit-flow', action='store_true',
                      help='Use the new two-step commit flow.')
-
+  parser.add_argument('--use-call-graph', action='store_true', default=False,
+                      help='Use call graph instrumentation.')
  profile_android_startup.AddProfileCollectionArguments(parser)
  return parser


--- a/tools/cygprofile/process_profiles.py
+++ b/tools/cygprofile/process_profiles.py
@@ -11,6 +11,7 @@ import logging
 import operator
 import os
 import sys
+import json

 _SRC_PATH = os.path.abspath(os.path.join(
    os.path.dirname(__file__), os.pardir, os.pardir))
@@ -59,6 +60,8 @@ class SymbolOffsetProcessor(object):
    self._offset_to_primary = None
    self._offset_to_symbols = None
    self._offset_to_symbol_info = None
+    # |_whitelist| will contain symbols whose size is 0.
+    self._whitelist = None

  def SymbolInfos(self):
    """The symbols associated with this processor's binary.
@@ -243,6 +246,45 @@ class SymbolOffsetProcessor(object):
          'to any symbol'.format(dump_offset))
      update(i, symbol_info.offset)

+  def GetWhitelistSymbols(self):
+    """Returns list(string) containing names of the symbols whose size is zero.
+    """
+    if self._whitelist is None:
+      self.GetDumpOffsetToSymboInfolIncludingWhitelist()
+    return self._whitelist
+
+  def GetDumpOffsetToSymboInfolIncludingWhitelist(self):
+    """Computes an array mapping each word in .text to a symbol.
+
+    This list includes symbols with size 0. It considers all offsets till the
+    next symbol to map to the symbol of size 0.
+
+    Returns:
+      [symbol_extractor.SymbolInfo or None] For every 4 bytes of the .text
+        section, maps it to a symbol, or None.
+    """
+    if self._whitelist is None:
+      self._whitelist = set()
+      symbols = self.SymbolInfos()
+      start_syms = [s for s in symbols
+                    if s.name == cygprofile_utils.START_OF_TEXT_SYMBOL]
+      assert len(start_syms) == 1, 'Can\'t find unique start of text symbol'
+      start_of_text = start_syms[0].offset
+      self.GetDumpOffsetToSymbolInfo()
+      max_idx = len(self._offset_to_symbol_info)
+      for sym in symbols:
+        if sym.size != 0 or sym.offset == start_of_text:
+          continue
+        self._whitelist.add(sym.name)
+        idx = (sym.offset - start_of_text)/ 2
+        assert self._offset_to_symbol_info[idx] == sym, (
+            'Unexpected unset offset')
+        idx += 1
+        while idx < max_idx and self._offset_to_symbol_info[idx] is None:
+          self._offset_to_symbol_info[idx] = sym
+          idx += 1
+    return self._offset_to_symbol_info
+
  def GetDumpOffsetToSymbolInfo(self):
    """Computes an array mapping each word in .text to a symbol.

@@ -273,6 +315,13 @@ class SymbolOffsetProcessor(object):
          # code). In this case, keep the one that started first.
          if other_symbol is None or other_symbol.offset > sym.offset:
            self._offset_to_symbol_info[i] = sym
+
+        if sym.name != cygprofile_utils.START_OF_TEXT_SYMBOL and sym.size == 0:
+          idx = offset / 2
+          assert (self._offset_to_symbol_info[idx] is None or
+                  self._offset_to_symbol_info[idx].size == 0), (
+              'Unexpected symbols overlapping')
+          self._offset_to_symbol_info[idx] = sym
    return self._offset_to_symbol_info


@@ -429,6 +478,32 @@ class ProfileManager(object):
      offsets_by_process[self._ProcessName(f)].append(self._ReadOffsets(f))
    return offsets_by_process

+  def _SanityCheckAllCallsCapturedByTheInstrumentation(self, process_info):
+    total_calls_count = long(process_info['total_calls_count'])
+    call_graph = process_info['call_graph']
+    count = 0
+    for el in call_graph:
+      for bucket in el['caller_and_count']:
+        count += long(bucket['count'])
+
+    assert total_calls_count == count, ('Instrumentation missed calls!. '
+                                        '{} != {}').format(total_calls_count,
+                                                           count)
+
+  def GetProcessOffsetGraph(self):
+    """Returns a dict that maps each process type to a list of processes's
+       call graph data.
+    """
+    graph_by_process = collections.defaultdict(list)
+    for f in self._filenames:
+      process_info = self._ReadJSON(f)
+      assert ('total_calls_count' in process_info
+              and 'call_graph' in process_info), ('Unexpected JSON format for '
+                                                  '%s.' % f)
+      self._SanityCheckAllCallsCapturedByTheInstrumentation(process_info)
+      graph_by_process[self._ProcessName(f)].append(process_info['call_graph'])
+    return graph_by_process
+
  def GetRunGroupOffsets(self, phase=None):
    """Merges files from each run group and returns offset list for each.

@@ -479,6 +554,11 @@ class ProfileManager(object):
  def _ReadOffsets(self, filename):
    return [int(x.strip()) for x in open(filename)]

+  def _ReadJSON(self, filename):
+    with open(filename) as f:
+      file_content = json.load(f)
+    return file_content
+
  def _ComputeRunGroups(self):
    self._run_groups = []
    for f in self._filenames:

--- a/tools/cygprofile/test_utils.py
+++ b/tools/cygprofile/test_utils.py
@@ -29,6 +29,9 @@ class TestProfileManager(process_profiles.ProfileManager):
  def _ReadOffsets(self, filename):
    return self._filecontents_mapping[filename]

+  def _ReadJSON(self, filename):
+    return self._filecontents_mapping[filename]
+

 def ProfileFile(timestamp_sec, phase, process_name=None):
  global _FILE_COUNTER