sandwich: Make metrics extraction more customizable.

BUG=582080 Review-Url: https://codereview.chromium.org/2009883002 Cr-Commit-Position: refs/heads/master@{#397134}

sandwich: Make metrics extraction more customizable.
BUG=582080 Review-Url: https://codereview.chromium.org/2009883002 Cr-Commit-Position: refs/heads/master@{#397134}
928ffadc · gabadie · Commit bot · 5f265d3b · 928ffadc · 928ffadc
Commit 928ffadc authored Jun 01, 2016 by gabadie Committed by Commit bot Jun 01, 2016
5 changed files
--- a/tools/android/loading/sandwich_metrics.py
+++ b/tools/android/loading/sandwich_metrics.py
@@ -32,19 +32,9 @@ import sandwich_misc
 import tracing
-CSV_FIELD_NAMES = [
+COMMON_CSV_COLUMN_NAMES = [
-    'repeat_id',
-    'url',
    'chromium_commit',
    'platform',
-    'subresource_discoverer',
-    'subresource_count',
-    # The amount of subresources detected at SetupBenchmark step.
-    'subresource_count_theoretic',
-    # Amount of subresources for caching as suggested by the subresource
-    # discoverer.
-    'cached_subresource_count_theoretic',
-    'cached_subresource_count',
    'first_layout',
    'first_contentful_paint',
    'total_load',
@@ -222,30 +212,6 @@ def _ExtractMemoryMetrics(loading_trace):
  }
-def _ExtractBenchmarkStatistics(benchmark_setup, loading_trace):
-  """Extracts some useful statistics from a benchmark run.
-  Args:
-    benchmark_setup: benchmark_setup: dict representing the benchmark setup
-        JSON. The JSON format is according to:
-            PrefetchBenchmarkBuilder.PopulateLoadBenchmark.SetupBenchmark.
-    loading_trace: loading_trace_module.LoadingTrace.
-  Returns:
-    Dictionary with all extracted fields set.
-  """
-  return {
-    'subresource_discoverer': benchmark_setup['subresource_discoverer'],
-    'subresource_count': len(sandwich_misc.ListUrlRequests(
-        loading_trace, sandwich_misc.RequestOutcome.All)),
-    'subresource_count_theoretic': len(benchmark_setup['url_resources']),
-    'cached_subresource_count': len(sandwich_misc.ListUrlRequests(
-        loading_trace, sandwich_misc.RequestOutcome.ServedFromCache)),
-    'cached_subresource_count_theoretic':
-        len(benchmark_setup['cache_whitelist']),
-  }
 def _ExtractCompletenessRecordFromVideo(video_path):
  """Extracts the completeness record from a video.
@@ -283,7 +249,7 @@ def _ExtractCompletenessRecordFromVideo(video_path):
  return [(time, FrameProgress(hist)) for time, hist in histograms]
-def ComputeSpeedIndex(completeness_record):
+def _ComputeSpeedIndex(completeness_record):
  """Computes the speed-index from a completeness record.
  Args:
@@ -305,82 +271,41 @@ def ComputeSpeedIndex(completeness_record):
  return speed_index
-def _ExtractMetricsFromRunDirectory(benchmark_setup, run_directory_path):
+def ExtractCommonMetricsFromRepeatDirectory(repeat_dir, trace):
-  """Extracts all the metrics from traces and video of a sandwich run.
+  """Extracts all the metrics from traces and video of a sandwich run repeat
+  directory.
  Args:
-    benchmark_setup: benchmark_setup: dict representing the benchmark setup
+    repeat_dir: Path of the repeat directory within a run directory.
-        JSON. The JSON format is according to:
+    trace: preloaded LoadingTrace in |repeat_dir|
-            PrefetchBenchmarkBuilder.PopulateLoadBenchmark.SetupBenchmark.
-    run_directory_path: Path of the run directory.
+  Contract:
+    trace == LoadingTrace.FromJsonFile(
+        os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME))
  Returns:
    Dictionary of extracted metrics.
  """
-  trace_path = os.path.join(run_directory_path, 'trace.json')
-  logging.info('processing trace \'%s\'' % trace_path)
-  loading_trace = loading_trace_module.LoadingTrace.FromJsonFile(trace_path)
  run_metrics = {
-      'url': loading_trace.url,
+      'chromium_commit': trace.metadata['chromium_commit'],
-      'chromium_commit': loading_trace.metadata['chromium_commit'],
+      'platform': (trace.metadata['platform']['os'] + '-' +
-      'platform': (loading_trace.metadata['platform']['os'] + '-' +
+          trace.metadata['platform']['product_model'])
-          loading_trace.metadata['platform']['product_model'])
  }
-  run_metrics.update(_ExtractDefaultMetrics(loading_trace))
+  run_metrics.update(_ExtractDefaultMetrics(trace))
-  run_metrics.update(_ExtractMemoryMetrics(loading_trace))
+  run_metrics.update(_ExtractMemoryMetrics(trace))
-  if benchmark_setup:
+  video_path = os.path.join(repeat_dir, sandwich_runner.VIDEO_FILENAME)
-    run_metrics.update(
-        _ExtractBenchmarkStatistics(benchmark_setup, loading_trace))
-  video_path = os.path.join(run_directory_path, 'video.mp4')
  if os.path.isfile(video_path):
    logging.info('processing speed-index video \'%s\'' % video_path)
    try:
      completeness_record = _ExtractCompletenessRecordFromVideo(video_path)
-      run_metrics['speed_index'] = ComputeSpeedIndex(completeness_record)
+      run_metrics['speed_index'] = _ComputeSpeedIndex(completeness_record)
    except video.BoundingBoxNotFoundException:
      # Sometimes the bounding box for the web content area is not present. Skip
      # calculating Speed Index.
      run_metrics['speed_index'] = _FAILED_CSV_VALUE
  else:
    run_metrics['speed_index'] = _UNAVAILABLE_CSV_VALUE
-  for key, value in loading_trace.metadata['network_emulation'].iteritems():
+  for key, value in trace.metadata['network_emulation'].iteritems():
    run_metrics['net_emul.' + key] = value
+  assert set(run_metrics.keys()) == set(COMMON_CSV_COLUMN_NAMES)
  return run_metrics
-def ExtractMetricsFromRunnerOutputDirectory(benchmark_setup_path,
-                                            output_directory_path):
-  """Extracts all the metrics from all the traces of a sandwich runner output
-  directory.
-  Args:
-    benchmark_setup_path: Path of the JSON of the benchmark setup.
-    output_directory_path: The sandwich runner's output directory to extract the
-        metrics from.
-  Returns:
-    List of dictionaries.
-  """
-  benchmark_setup = None
-  if benchmark_setup_path:
-    benchmark_setup = json.load(open(benchmark_setup_path))
-  assert os.path.isdir(output_directory_path)
-  metrics = []
-  for node_name in os.listdir(output_directory_path):
-    if not os.path.isdir(os.path.join(output_directory_path, node_name)):
-      continue
-    try:
-      repeat_id = int(node_name)
-    except ValueError:
-      continue
-    run_directory_path = os.path.join(output_directory_path, node_name)
-    run_metrics = _ExtractMetricsFromRunDirectory(
-        benchmark_setup, run_directory_path)
-    run_metrics['repeat_id'] = repeat_id
-    # TODO(gabadie): Make common metrics extraction with benchmark type
-    # specific CSV column.
-    # assert set(run_metrics.keys()) == set(CSV_FIELD_NAMES)
-    metrics.append(run_metrics)
-  assert len(metrics) > 0, ('Looks like \'{}\' was not a sandwich runner ' +
-                            'output directory.').format(output_directory_path)
-  return metrics
--- a/tools/android/loading/sandwich_metrics_unittest.py
+++ b/tools/android/loading/sandwich_metrics_unittest.py
@@ -232,7 +232,7 @@ class PageTrackTest(unittest.TestCase):
      point(400, 1.0),
    ]
    self.assertEqual(120 + 70 * 0.6 + 90 * 0.25,
-                     puller.ComputeSpeedIndex(completness_record))
+                     puller._ComputeSpeedIndex(completness_record))
    completness_record = [
      point(70, 0.0),
@@ -242,7 +242,7 @@ class PageTrackTest(unittest.TestCase):
      point(240, 1.0),
    ]
    self.assertEqual(80 + 60 * 0.7 + 10 * 0.4 + 20 * 0.1,
-                     puller.ComputeSpeedIndex(completness_record))
+                     puller._ComputeSpeedIndex(completness_record))
    completness_record = [
      point(90, 0.0),
@@ -251,7 +251,7 @@ class PageTrackTest(unittest.TestCase):
      point(230, 1.0),
    ]
    with self.assertRaises(ValueError):
-      puller.ComputeSpeedIndex(completness_record)
+      puller._ComputeSpeedIndex(completness_record)
 if __name__ == '__main__':

--- a/tools/android/loading/sandwich_runner.py
+++ b/tools/android/loading/sandwich_runner.py
@@ -254,3 +254,24 @@ class SandwichRunner(object):
      self._PullCacheFromDevice()
    self._chrome_ctl = None
+def WalkRepeatedRuns(runner_output_dir):
+  """Yields unordered (repeat id, path of the repeat directory).
+  Args:
+    runner_output_dir: Same as for SandwichRunner.output_dir.
+  """
+  repeated_run_count = 0
+  for node_name in os.listdir(runner_output_dir):
+    repeat_dir = os.path.join(runner_output_dir, node_name)
+    if not os.path.isdir(repeat_dir):
+      continue
+    try:
+      repeat_id = int(node_name)
+    except ValueError:
+      continue
+    yield repeat_id, repeat_dir
+    repeated_run_count += 1
+  assert repeated_run_count > 0, ('Error: not a sandwich runner output '
+                                  'directory: {}').format(runner_output_dir)
--- a/tools/android/loading/sandwich_swr.py
+++ b/tools/android/loading/sandwich_swr.py
@@ -101,6 +101,8 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder):
      depends on: <transformer_list_name>/{swr,worstcase}-run/
        depends on: some tasks saved by PopulateCommonPipelines()
    """
+    additional_column_names = ['url', 'repeat_id']
    task_prefix = os.path.join(transformer_list_name, '')
    if enable_swr:
      task_prefix += 'swr'
@@ -124,15 +126,27 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder):
    @self.RegisterTask(task_prefix + '-metrics.csv', [RunBenchmark])
    def ExtractMetrics():
-      trace_metrics_list = \
+      run_metrics_list = []
-          sandwich_metrics.ExtractMetricsFromRunnerOutputDirectory(
+      for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
-              None, RunBenchmark.path)
+          RunBenchmark.path):
-      trace_metrics_list.sort(key=lambda e: e['repeat_id'])
+        trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
+        logging.info('processing trace: %s', trace_path)
+        trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
+        run_metrics = {
+            'url': trace.url,
+            'repeat_id': repeat_id,
+        }
+        run_metrics.update(
+            sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
+                repeat_dir, trace))
+        run_metrics_list.append(run_metrics)
+      run_metrics_list.sort(key=lambda e: e['repeat_id'])
      with open(ExtractMetrics.path, 'w') as csv_file:
-        writer = csv.DictWriter(csv_file,
+        writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
-                                fieldnames=sandwich_metrics.CSV_FIELD_NAMES)
+                                    sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
        writer.writeheader()
-        for trace_metrics in trace_metrics_list:
+        for trace_metrics in run_metrics_list:
          writer.writerow(trace_metrics)
    self._common_builder.default_final_tasks.append(ExtractMetrics)
--- a/tools/android/loading/sandwich_task_builder.py
+++ b/tools/android/loading/sandwich_task_builder.py
@@ -5,12 +5,14 @@
 import csv
 import logging
 import json
+import logging
 import os
 import shutil
 import chrome_cache
 import common_util
 import emulation
+import loading_trace
 import sandwich_metrics
 import sandwich_misc
 import sandwich_runner
@@ -186,6 +188,18 @@ class PrefetchBenchmarkBuilder(task_manager.Builder):
          depends on: common/<subresource_discoverer>-setup.json
            depends on: some tasks saved by PopulateCommonPipelines()
    """
+    additional_column_names = [
+        'url',
+        'repeat_id',
+        'subresource_discoverer',
+        'subresource_count',
+        # The amount of subresources detected at SetupBenchmark step.
+        'subresource_count_theoretic',
+        # Amount of subresources for caching as suggested by the subresource
+        # discoverer.
+        'cached_subresource_count_theoretic',
+        'cached_subresource_count']
    assert subresource_discoverer in sandwich_misc.SUBRESOURCE_DISCOVERERS
    assert 'common' not in sandwich_misc.SUBRESOURCE_DISCOVERERS
    shared_task_prefix = os.path.join('common', subresource_discoverer)
@@ -233,17 +247,42 @@ class PrefetchBenchmarkBuilder(task_manager.Builder):
    @self.RegisterTask(task_prefix + '-metrics.csv',
                       dependencies=[RunBenchmark])
    def ExtractMetrics():
+      # TODO(gabadie): Performance improvement: load each trace only once and
+      # use it for validation and extraction of metrics later.
      sandwich_misc.VerifyBenchmarkOutputDirectory(
          SetupBenchmark.path, RunBenchmark.path)
-      trace_metrics_list = \
-          sandwich_metrics.ExtractMetricsFromRunnerOutputDirectory(
+      benchmark_setup = json.load(open(SetupBenchmark.path))
-              SetupBenchmark.path, RunBenchmark.path)
+      run_metrics_list = []
-      trace_metrics_list.sort(key=lambda e: e['repeat_id'])
+      for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
+          RunBenchmark.path):
+        trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
+        logging.info('processing trace: %s', trace_path)
+        trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
+        run_metrics = {
+            'url': trace.url,
+            'repeat_id': repeat_id,
+            'subresource_discoverer': benchmark_setup['subresource_discoverer'],
+            'subresource_count': len(sandwich_misc.ListUrlRequests(
+                trace, sandwich_misc.RequestOutcome.All)),
+            'subresource_count_theoretic':
+                len(benchmark_setup['url_resources']),
+            'cached_subresource_count': len(sandwich_misc.ListUrlRequests(
+                trace, sandwich_misc.RequestOutcome.ServedFromCache)),
+            'cached_subresource_count_theoretic':
+                len(benchmark_setup['cache_whitelist']),
+        }
+        run_metrics.update(
+            sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
+                repeat_dir, trace))
+        run_metrics_list.append(run_metrics)
+      run_metrics_list.sort(key=lambda e: e['repeat_id'])
      with open(ExtractMetrics.path, 'w') as csv_file:
-        writer = csv.DictWriter(csv_file,
+        writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
-                                fieldnames=sandwich_metrics.CSV_FIELD_NAMES)
+                                    sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
        writer.writeheader()
-        for trace_metrics in trace_metrics_list:
+        for trace_metrics in run_metrics_list:
          writer.writerow(trace_metrics)
    self._common_builder.default_final_tasks.append(ExtractMetrics)