[tools/perf] Refactoring: parallel test result processing

Before this CL Results Processor did its processing in several stages: aggregating traces for all tests in parallel, then computing metrics for all tests in parallel, and so on. Now we move the parallelization to the upper level, so that all processing for a particular test is done inside one thread. This allows us to: 1) Make processing of tests independent, so that errors in one of them do not affect others. 2) Mark the tests with failures 'FAIL' in the final results. 3) Add test-specific diagnostics to histograms. Also we add support for the new intermediate results format (where there are no benchmarkRun messages; all metadata is contained in testResults). Bug: 981349, 1015192 Change-Id: I75d036a3ded439e092ee7b892a26bc26f3600520 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869212 Commit-Queue: Mikhail Khokhlov <khokhlov@google.com> Reviewed-by: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#708235}

[tools/perf] Refactoring: parallel test result processing
Before this CL Results Processor did its processing in several stages: aggregating traces for all tests in parallel, then computing metrics for all tests in parallel, and so on. Now we move the parallelization to the upper level, so that all processing for a particular test is done inside one thread. This allows us to: 1) Make processing of tests independent, so that errors in one of them do not affect others. 2) Mark the tests with failures 'FAIL' in the final results. 3) Add test-specific diagnostics to histograms. Also we add support for the new intermediate results format (where there are no benchmarkRun messages; all metadata is contained in testResults). Bug: 981349, 1015192 Change-Id: I75d036a3ded439e092ee7b892a26bc26f3600520 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869212 Commit-Queue: Mikhail Khokhlov <khokhlov@google.com> Reviewed-by: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#708235}
86308fbe · Mikhail Khokhlov · Commit Bot · 6f2e929e · 86308fbe · 86308fbe
Commit 86308fbe authored Oct 22, 2019 by Mikhail Khokhlov Committed by Commit Bot Oct 22, 2019
10 changed files
--- a/tools/perf/core/results_processor/compute_metrics.py
+++ b/tools/perf/core/results_processor/compute_metrics.py
@@ -7,8 +7,6 @@ import logging
 import os
 import time
-from core.results_processor import util
 from tracing.metrics import metric_runner
@@ -24,7 +22,7 @@ HISTOGRAM_DICTS_KEY = 'histogram_dicts'
 HISTOGRAM_DICTS_FILE = 'histogram_dicts.json'
-def _PoolWorker(test_result):
+def _RunMetric(test_result):
  metrics = [tag['value'] for tag in test_result['tags']
             if tag['key'] == 'tbmv2']
  html_trace = test_result['outputArtifacts'][HTML_TRACE_NAME]
@@ -56,7 +54,7 @@ def _PoolWorker(test_result):
  return mre_result.pairs.get('histograms', [])
-def ComputeTBMv2Metrics(intermediate_results):
+def ComputeTBMv2Metrics(test_result):
  """Compute metrics on aggregated traces in parallel.
  For each test run that has an aggregate trace and some TBMv2 metrics listed
@@ -64,40 +62,32 @@ def ComputeTBMv2Metrics(intermediate_results):
  histograms. Note: the order of histograms in the results may be different
  from the order of tests in intermediate_results.
  """
-  histogram_dicts = []
+  artifacts = test_result.get('outputArtifacts', {})
-  work_list = []
+  # TODO(crbug.com/981349): If metrics have already been computed in
-  for test_result in intermediate_results['testResults']:
+  # Telemetry, we read it from the file. Remove this branch after Telemetry
-    artifacts = test_result.get('outputArtifacts', {})
+  # does not compute metrics anymore.
-    # TODO(crbug.com/981349): If metrics have already been computed in
+  if HISTOGRAM_DICTS_FILE in artifacts:
-    # Telemetry, we read it from the file. Remove this branch after Telemetry
+    with open(artifacts[HISTOGRAM_DICTS_FILE]['filePath']) as f:
-    # does not compute metrics anymore.
+      test_result['_histograms'].ImportDicts(json.load(f))
-    if HISTOGRAM_DICTS_FILE in artifacts:
+    del artifacts[HISTOGRAM_DICTS_FILE]
-      with open(artifacts[HISTOGRAM_DICTS_FILE]['filePath']) as f:
+    return
-        histogram_dicts += json.load(f)
-      del artifacts[HISTOGRAM_DICTS_FILE]
+  if test_result['status'] == 'SKIP':
-      continue
+    return
-    if test_result['status'] == 'SKIP':
+  if (HTML_TRACE_NAME not in artifacts or
-      continue
+      not any(tag['key'] == 'tbmv2' for tag in test_result.get('tags', []))):
+    return
-    if (HTML_TRACE_NAME not in artifacts or
-        not any(tag['key'] == 'tbmv2' for tag in test_result.get('tags', []))):
+  trace_size_in_mib = (os.path.getsize(artifacts[HTML_TRACE_NAME]['filePath'])
-      continue
+                       / (2 ** 20))
+  # Bails out on traces that are too big. See crbug.com/812631 for more
-    trace_size_in_mib = (os.path.getsize(artifacts[HTML_TRACE_NAME]['filePath'])
+  # details.
-                         / (2 ** 20))
+  # TODO(crbug.com/1010041): Return a non-zero exit code in this case.
-    # Bails out on traces that are too big. See crbug.com/812631 for more
+  if trace_size_in_mib > 400:
-    # details.
+    test_result['status'] = 'FAIL'
-    # TODO(crbug.com/1010041): Return a non-zero exit code in this case.
+    logging.error('%s: Trace size is too big: %s MiB',
-    if trace_size_in_mib > 400:
+                  test_result['testPath'], trace_size_in_mib)
-      test_result['status'] = 'FAIL'
+    return
-      logging.error('%s: Trace size is too big: %s MiB',
-                    test_result['testPath'], trace_size_in_mib)
+  test_result['_histograms'].ImportDicts(_RunMetric(test_result))
-      continue
-    work_list.append(test_result)
-  for dicts in util.ApplyInParallel(_PoolWorker, work_list):
-    histogram_dicts += dicts
-  return histogram_dicts
--- a/tools/perf/core/results_processor/compute_metrics_unittest.py
+++ b/tools/perf/core/results_processor/compute_metrics_unittest.py
@@ -11,6 +11,7 @@ from tracing.mre import failure
 from tracing.mre import job
 from tracing.mre import mre_result
 from tracing.value import histogram
+from tracing.value import histogram_set
 import mock
@@ -21,22 +22,14 @@ GETSIZE_METHOD = 'os.path.getsize'
 class ComputeMetricsTest(unittest.TestCase):
  def testComputeTBMv2Metrics(self):
-    in_results = testing.IntermediateResults([
+    test_result = testing.TestResult(
-        testing.TestResult(
+        'benchmark/story1',
-            'benchmark/story1',
+        output_artifacts={
-            output_artifacts={
+            compute_metrics.HTML_TRACE_NAME:
-                compute_metrics.HTML_TRACE_NAME:
+                testing.Artifact('/trace1.html', 'gs://trace1.html')},
-                    testing.Artifact('/trace1.html', 'gs://trace1.html')},
+        tags=['tbmv2:metric1'],
-            tags=['tbmv2:metric1'],
+    )
-        ),
+    test_result['_histograms'] = histogram_set.HistogramSet()
-        testing.TestResult(
-            'benchmark/story2',
-            output_artifacts={
-                compute_metrics.HTML_TRACE_NAME:
-                    testing.Artifact('/trace2.html', 'gs://trace2.html')},
-            tags=['tbmv2:metric2'],
-        ),
-    ])
    test_dict = histogram.Histogram('a', 'unitless').AsDict()
    metrics_result = mre_result.MreResult()
@@ -46,42 +39,41 @@ class ComputeMetricsTest(unittest.TestCase):
      with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
        getsize_mock.return_value = 1000
        run_metrics_mock.return_value = metrics_result
-        histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
+        compute_metrics.ComputeTBMv2Metrics(test_result)
-    self.assertEqual(histogram_dicts, [test_dict, test_dict])
+    histogram_dicts = test_result['_histograms'].AsDicts()
-    self.assertEqual(in_results['testResults'][0]['status'], 'PASS')
+    self.assertEqual(histogram_dicts, [test_dict])
-    self.assertEqual(in_results['testResults'][1]['status'], 'PASS')
+    self.assertEqual(test_result['status'], 'PASS')
  def testComputeTBMv2MetricsTraceTooBig(self):
-    in_results = testing.IntermediateResults([
+    test_result = testing.TestResult(
-        testing.TestResult(
+        'benchmark/story1',
-            'benchmark/story1',
+        output_artifacts={
-            output_artifacts={
+            compute_metrics.HTML_TRACE_NAME:
-                compute_metrics.HTML_TRACE_NAME:
+                testing.Artifact('/trace1.html', 'gs://trace1.html')},
-                    testing.Artifact('/trace1.html', 'gs://trace1.html')},
+        tags=['tbmv2:metric1'],
-            tags=['tbmv2:metric1'],
+    )
-        ),
+    test_result['_histograms'] = histogram_set.HistogramSet()
-    ])
    with mock.patch(GETSIZE_METHOD) as getsize_mock:
      with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
        getsize_mock.return_value = 1e9
-        histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
+        compute_metrics.ComputeTBMv2Metrics(test_result)
        self.assertEqual(run_metrics_mock.call_count, 0)
+    histogram_dicts = test_result['_histograms'].AsDicts()
    self.assertEqual(histogram_dicts, [])
-    self.assertEqual(in_results['testResults'][0]['status'], 'FAIL')
+    self.assertEqual(test_result['status'], 'FAIL')
  def testComputeTBMv2MetricsFailure(self):
-    in_results = testing.IntermediateResults([
+    test_result = testing.TestResult(
-        testing.TestResult(
+        'benchmark/story1',
-            'benchmark/story1',
+        output_artifacts={
-            output_artifacts={
+            compute_metrics.HTML_TRACE_NAME:
-                compute_metrics.HTML_TRACE_NAME:
+                testing.Artifact('/trace1.html', 'gs://trace1.html')},
-                    testing.Artifact('/trace1.html', 'gs://trace1.html')},
+        tags=['tbmv2:metric1'],
-            tags=['tbmv2:metric1'],
+    )
-        ),
+    test_result['_histograms'] = histogram_set.HistogramSet()
-    ])
    metrics_result = mre_result.MreResult()
    metrics_result.AddFailure(failure.Failure(job.Job(0), 0, 0, 0, 0, 0))
@@ -90,26 +82,27 @@ class ComputeMetricsTest(unittest.TestCase):
      with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
        getsize_mock.return_value = 100
        run_metrics_mock.return_value = metrics_result
-        histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
+        compute_metrics.ComputeTBMv2Metrics(test_result)
+    histogram_dicts = test_result['_histograms'].AsDicts()
    self.assertEqual(histogram_dicts, [])
-    self.assertEqual(in_results['testResults'][0]['status'], 'FAIL')
+    self.assertEqual(test_result['status'], 'FAIL')
  def testComputeTBMv2MetricsSkipped(self):
-    in_results = testing.IntermediateResults([
+    test_result = testing.TestResult(
-        testing.TestResult(
+        'benchmark/story1',
-            'benchmark/story1',
+        output_artifacts={
-            output_artifacts={
+            compute_metrics.HTML_TRACE_NAME:
-                compute_metrics.HTML_TRACE_NAME:
+                testing.Artifact('/trace1.html', 'gs://trace1.html')},
-                    testing.Artifact('/trace1.html', 'gs://trace1.html')},
+        tags=['tbmv2:metric1'],
-            tags=['tbmv2:metric1'],
+        status='SKIP',
-            status='SKIP',
+    )
-        ),
+    test_result['_histograms'] = histogram_set.HistogramSet()
-    ])
    with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
-      histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
+      compute_metrics.ComputeTBMv2Metrics(test_result)
      self.assertEqual(run_metrics_mock.call_count, 0)
+    histogram_dicts = test_result['_histograms'].AsDicts()
    self.assertEqual(histogram_dicts, [])
-    self.assertEqual(in_results['testResults'][0]['status'], 'SKIP')
+    self.assertEqual(test_result['status'], 'SKIP')
--- a/tools/perf/core/results_processor/formatters/json3_output.py
+++ b/tools/perf/core/results_processor/formatters/json3_output.py
@@ -9,6 +9,7 @@ https://chromium.googlesource.com/chromium/src/+/master/docs/testing/json_test_r
 """
 import collections
+import datetime
 import json
 import os
 import urllib
@@ -19,18 +20,18 @@ from core.results_processor import util
 OUTPUT_FILENAME = 'test-results.json'
-def ProcessIntermediateResults(intermediate_results, options):
+def ProcessIntermediateResults(test_results, options):
  """Process intermediate results and write output in output_dir."""
-  results = Convert(intermediate_results, options.output_dir)
+  results = Convert(test_results, options.output_dir)
  with open(os.path.join(options.output_dir, OUTPUT_FILENAME), 'w') as f:
    json.dump(results, f, sort_keys=True, indent=4, separators=(',', ': '))
-def Convert(in_results, base_dir):
+def Convert(test_results, base_dir):
  """Convert intermediate results to the JSON Test Results Format.
  Args:
-    in_results: The parsed intermediate results.
+    test_results: The parsed intermediate results.
    base_dir: A string with the path to a base directory; artifact file paths
      will be written relative to this.
@@ -40,7 +41,7 @@ def Convert(in_results, base_dir):
  results = {'tests': {}}
  status_counter = collections.Counter()
-  for result in in_results['testResults']:
+  for result in test_results:
    benchmark_name, story_name = result['testPath'].split('/')
    story_name = urllib.unquote(story_name)
    actual_status = result['status']
@@ -77,10 +78,17 @@ def Convert(in_results, base_dir):
      if test['shard'] is None:
        del test['shard']
-  benchmark_run = in_results['benchmarkRun']
+  # Test results are written in order of execution, so the first test start
+  # time is approximately the start time of the whole suite.
+  test_suite_start_time = (test_results[0]['startTime'] if test_results
+                           else datetime.datetime.utcnow().isoformat() + 'Z')
+  # If Telemetry stops with a unhandleable error, then remaining stories
+  # are marked as unexpectedly skipped.
+  interrupted = any(t['status'] == 'SKIP' and not t['isExpected']
+                    for t in test_results)
  results.update(
-      seconds_since_epoch=util.IsoTimestampToEpoch(benchmark_run['startTime']),
+      seconds_since_epoch=util.IsoTimestampToEpoch(test_suite_start_time),
-      interrupted=benchmark_run['interrupted'],
+      interrupted=interrupted,
      num_failures_by_type=dict(status_counter),
      path_delimiter='/',
      version=3,

--- a/tools/perf/core/results_processor/formatters/json3_output_unittest.py
+++ b/tools/perf/core/results_processor/formatters/json3_output_unittest.py
@@ -13,13 +13,11 @@ class Json3OutputTest(unittest.TestCase):
  def setUp(self):
    self.base_dir = 'base_dir'
-  def Convert(self, test_results, **kwargs):
+  def Convert(self, test_results):
-    base_dir = kwargs.pop('base_dir', self.base_dir)
+    test_results_copy = copy.deepcopy(test_results)
-    original_results = testing.IntermediateResults(test_results, **kwargs)
+    results = json3_output.Convert(test_results_copy, self.base_dir)
-    intermediate_results = copy.deepcopy(original_results)
-    results = json3_output.Convert(intermediate_results, base_dir)
    # Convert should not modify the original intermediate results.
-    self.assertEqual(intermediate_results, original_results)
+    self.assertEqual(test_results_copy, test_results)
    return results
  def FindTestResult(self, results, benchmark, story):
@@ -29,15 +27,15 @@ class Json3OutputTest(unittest.TestCase):
      node = node[key]
    return node
-  def testEmptyResults(self):
+  def testStartTime(self):
-    results = self.Convert(
+    results = self.Convert([
-        [], start_time='2009-02-13T23:31:30.987000Z', interrupted=False)
+        testing.TestResult('benchmark/story',
+                           start_time='2009-02-13T23:31:30.987000Z')
+    ])
    self.assertFalse(results['interrupted'])
-    self.assertEqual(results['num_failures_by_type'], {})
    self.assertEqual(results['path_delimiter'], '/')
    self.assertEqual(results['seconds_since_epoch'], 1234567890.987)
-    self.assertEqual(results['tests'], {})
    self.assertEqual(results['version'], 3)
  def testSingleTestCase(self):

--- a/tools/perf/core/results_processor/processor.py
+++ b/tools/perf/core/results_processor/processor.py
@@ -8,6 +8,7 @@ Provides functions to process intermediate results, and the entry point to
 the standalone version of Results Processor.
 """
+import datetime
 import json
 import logging
 import os
@@ -28,7 +29,12 @@ from tracing.value import histogram
 from tracing.value import histogram_set
 from tracing.value import legacy_unit_info
+# Telemetry results file is deprecated.
+# TODO(crbug.com/981349): Remove this constant after Telemetry swithes to
+# the new file.
 TELEMETRY_RESULTS = '_telemetry_results.jsonl'
+TEST_RESULTS = '_test_results.jsonl'
+DIAGNOSTICS_NAME = 'diagnostics.json'
 MEASUREMENTS_NAME = 'measurements.json'
 FORMATS_WITH_METRICS = ['csv', 'histograms', 'html']
@@ -48,17 +54,31 @@ def ProcessResults(options):
  if not getattr(options, 'output_formats', None):
    return 0
-  intermediate_results = _LoadIntermediateResults(
+  test_results = _LoadTestResults(options.intermediate_dir)
-      os.path.join(options.intermediate_dir, TELEMETRY_RESULTS))
+  if not test_results:
+    # TODO(crbug.com/981349): Make sure that no one is expecting Results
-  AggregateTraces(intermediate_results)
+    # Processor to output results in the case of empty input
+    # and make this an error.
-  UploadArtifacts(
+    logging.warning('No test results to process.')
-      intermediate_results, options.upload_bucket, options.results_label)
+  upload_bucket = options.upload_bucket
-  if any(fmt in FORMATS_WITH_METRICS for fmt in options.output_formats):
+  results_label = options.results_label
-    histogram_dicts = _ComputeMetrics(intermediate_results,
+  test_suite_start = (test_results[0]['startTime'] if test_results
-                                      options.results_label)
+                      else datetime.datetime.utcnow().isoformat() + 'Z')
+  run_identifier = RunIdentifier(results_label, test_suite_start)
+  should_compute_metrics = any(
+      fmt in FORMATS_WITH_METRICS for fmt in options.output_formats)
+  util.ApplyInParallel(
+      lambda result: ProcessTestResult(
+          result, upload_bucket, results_label, run_identifier,
+          test_suite_start, should_compute_metrics),
+      test_results,
+      on_failure=lambda result: result.update(status='FAIL'),
+  )
+  if should_compute_metrics:
+    histogram_dicts = ExtractHistograms(test_results)
  for output_format in options.output_formats:
    logging.info('Processing format: %s', output_format)
@@ -66,12 +86,33 @@ def ProcessResults(options):
    if output_format in FORMATS_WITH_METRICS:
      formatter.ProcessHistogramDicts(histogram_dicts, options)
    else:
-      formatter.ProcessIntermediateResults(intermediate_results, options)
+      formatter.ProcessIntermediateResults(test_results, options)
+  return GenerateExitCode(test_results)
-  return GenerateExitCode(intermediate_results)
+def ProcessTestResult(test_result, upload_bucket, results_label,
+                      run_identifier, test_suite_start, should_compute_metrics):
+  AggregateTraces(test_result)
+  if upload_bucket is not None:
+    UploadArtifacts(test_result, upload_bucket, run_identifier)
+  if should_compute_metrics:
+    test_result['_histograms'] = histogram_set.HistogramSet()
+    compute_metrics.ComputeTBMv2Metrics(test_result)
+    ExtractMeasurements(test_result)
+    AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)
+def ExtractHistograms(test_results):
+  histograms = histogram_set.HistogramSet()
+  for result in test_results:
+    histograms.Merge(result['_histograms'])
+  histograms.DeduplicateDiagnostics()
+  return histograms.AsDicts()
-def GenerateExitCode(intermediate_results):
+def GenerateExitCode(test_results):
  """Generate an exit code as expected by callers.
  Returns:
@@ -79,69 +120,63 @@ def GenerateExitCode(intermediate_results):
    -1 if all tests were skipped.
    0 otherwise.
  """
-  if any(r['status'] == 'FAIL' for r in intermediate_results['testResults']):
+  if any(r['status'] == 'FAIL' for r in test_results):
    return 1
-  if all(r['status'] == 'SKIP' for r in intermediate_results['testResults']):
+  if all(r['status'] == 'SKIP' for r in test_results):
    return -1
  return 0
-def _LoadIntermediateResults(intermediate_file):
+def _LoadTestResults(intermediate_dir):
-  """Load intermediate results from a file into a single dict."""
+  """Load intermediate results from a file into a list of test results."""
-  results = {'benchmarkRun': {}, 'testResults': []}
+  # Try to load the results from the new file first, then from the old one.
+  # TODO(crbug.com/981349): Remove fallback when Telemetry switches to the
+  # new format.
+  intermediate_file = os.path.join(intermediate_dir, TEST_RESULTS)
+  if not os.path.exists(intermediate_file):
+    intermediate_file = os.path.join(intermediate_dir, TELEMETRY_RESULTS)
+  benchmark_run = {}
+  test_results = []
  with open(intermediate_file) as f:
    for line in f:
      record = json.loads(line)
+      # TODO(crbug.com/981349): Stop reading benchmarkRun messages when
+      # Telemetry switches to the new format.
      if 'benchmarkRun' in record:
-        results['benchmarkRun'].update(record['benchmarkRun'])
+        benchmark_run.update(record['benchmarkRun'])
      if 'testResult' in record:
-        test_result = record['testResult']
+        test_results.append(record['testResult'])
-        results['testResults'].append(test_result)
+  for test_result in test_results:
-  return results
+    test_result['_benchmarkRun'] = benchmark_run
+  return test_results
-def _AggregateTraceWorker(artifacts):
-  traces = [name for name in artifacts if name.startswith('trace/')]
-  trace_files = [artifacts.pop(name)['filePath'] for name in traces]
-  html_path = os.path.join(
-      os.path.dirname(os.path.commonprefix(trace_files)),
-      compute_metrics.HTML_TRACE_NAME)
-  trace_data.SerializeAsHtml(trace_files, html_path)
-  artifacts[compute_metrics.HTML_TRACE_NAME] = {
-    'filePath': html_path,
-    'contentType': 'text/html',
-  }
+def AggregateTraces(test_result):
-def AggregateTraces(intermediate_results):
  """Replace individual traces with an aggregate one for each test result.
-  For each test run with traces, generates an aggregate HTML trace. Removes
+  For a test run with traces, generates an aggregate HTML trace. Removes
  all entries for individual traces and adds one entry for aggregate one.
  """
-  work_list = []
+  artifacts = test_result.get('outputArtifacts', {})
-  for result in intermediate_results['testResults']:
+  traces = [name for name in artifacts if name.startswith('trace/')]
-    artifacts = result.get('outputArtifacts', {})
+  # TODO(crbug.com/981349): Stop checking for HTML_TRACE_NAME after
-    # TODO(crbug.com/981349): Stop checking for HTML_TRACE_NAME after
+  # Telemetry does not aggregate traces anymore.
-    # Telemetry does not aggregate traces anymore.
+  if traces and compute_metrics.HTML_TRACE_NAME not in artifacts:
-    if (any(name.startswith('trace/') for name in artifacts) and
+    trace_files = [artifacts[name]['filePath'] for name in traces]
-        compute_metrics.HTML_TRACE_NAME not in artifacts):
+    html_path = os.path.join(
-      work_list.append(artifacts)
+        os.path.dirname(os.path.commonprefix(trace_files)),
+        compute_metrics.HTML_TRACE_NAME)
-  if work_list:
+    trace_data.SerializeAsHtml(trace_files, html_path)
-    for _ in util.ApplyInParallel(_AggregateTraceWorker, work_list):
+    artifacts[compute_metrics.HTML_TRACE_NAME] = {
-      pass
+      'filePath': html_path,
+      'contentType': 'text/html',
-  # TODO(crbug.com/981349): This is to clean up traces that have been
+    }
-  # aggregated by Telemetry. Remove this after Telemetry no longer does this.
+  for name in traces:
-  for result in intermediate_results['testResults']:
+    del artifacts[name]
-    artifacts = result.get('outputArtifacts', {})
-    for name in artifacts.keys():
-      if name.startswith('trace/'):
+def RunIdentifier(results_label, test_suite_start):
-        del artifacts[name]
-def _RunIdentifier(results_label, start_time):
  """Construct an identifier for the current script run"""
  if results_label:
    identifier_parts = [re.sub(r'\W+', '_', results_label)]
@@ -149,80 +184,67 @@ def _RunIdentifier(results_label, start_time):
    identifier_parts = []
  # Time is rounded to seconds and delimiters are removed.
  # The first 19 chars of the string match 'YYYY-MM-DDTHH:MM:SS'.
-  identifier_parts.append(re.sub(r'\W+', '', start_time[:19]))
+  identifier_parts.append(re.sub(r'\W+', '', test_suite_start[:19]))
  identifier_parts.append(str(random.randint(1, 1e5)))
  return '_'.join(identifier_parts)
-def UploadArtifacts(intermediate_results, upload_bucket, results_label):
+def UploadArtifacts(test_result, upload_bucket, run_identifier):
  """Upload all artifacts to cloud.
-  For each test run, uploads all its artifacts to cloud and sets remoteUrl
+  For a test run, uploads all its artifacts to cloud and sets remoteUrl
  fields in intermediate_results.
  """
-  if upload_bucket is None:
+  artifacts = test_result.get('outputArtifacts', {})
-    return
+  for name, artifact in artifacts.iteritems():
+    if 'remoteUrl' in artifact:
-  run_identifier = _RunIdentifier(
+      continue
-      results_label, intermediate_results['benchmarkRun']['startTime'])
+    # TODO(crbug.com/981349): Remove check for HISTOGRAM_DICTS_FILE
-  work_list = []
+    # after Telemetry does not save histograms as an artifact anymore.
+    # Another TODO(crbug.com/981349): Think of a more general way to
-  for result in intermediate_results['testResults']:
+    # specify which artifacts deserve uploading.
-    artifacts = result.get('outputArtifacts', {})
+    if name in [compute_metrics.HISTOGRAM_DICTS_FILE, MEASUREMENTS_NAME]:
-    for name, artifact in artifacts.iteritems():
+      continue
-      if 'remoteUrl' in artifact:
+    remote_name = '/'.join([run_identifier, test_result['testPath'], name])
-        continue
-      # TODO(crbug.com/981349): Remove this check after Telemetry does not
-      # save histograms as an artifact anymore.
-      if name == compute_metrics.HISTOGRAM_DICTS_FILE:
-        continue
-      remote_name = '/'.join([run_identifier, result['testPath'], name])
-      work_list.append((artifact, remote_name))
-  def PoolUploader(work_item):
-    artifact, remote_name = work_item
    artifact['remoteUrl'] = cloud_storage.Insert(
        upload_bucket, remote_name, artifact['filePath'])
+    logging.info('Uploaded %s of %s to %s', name, test_result['testPath'],
-  for _ in util.ApplyInParallel(PoolUploader, work_list):
+                 artifact['remoteUrl'])
-    pass
-  for result in intermediate_results['testResults']:
-    artifacts = result.get('outputArtifacts', {})
-    for name, artifact in artifacts.iteritems():
-      logging.info('Uploaded %s of %s to %s', name, result['testPath'],
-                   artifact['remoteUrl'])
-def _ComputeMetrics(intermediate_results, results_label):
+def AddDiagnosticsToHistograms(test_result, test_suite_start, results_label):
-  histogram_dicts = compute_metrics.ComputeTBMv2Metrics(intermediate_results)
+  """Add diagnostics to all histograms of a test run.
-  histogram_dicts += ExtractMeasurements(intermediate_results)
-  histogram_dicts = AddDiagnosticsToHistograms(
-      histogram_dicts, intermediate_results, results_label)
-  return histogram_dicts
+  Reads diagnostics from the test artifact and adds them to all histograms.
+  This overwrites the corresponding diagnostics previously set by e.g.
+  run_metrics.
+  """
+  artifacts = test_result.get('outputArtifacts', {})
+  if DIAGNOSTICS_NAME in artifacts:
+    with open(artifacts[DIAGNOSTICS_NAME]['filePath']) as f:
+      diagnostics = json.load(f)['diagnostics']
+  # TODO(crbug.com/981349): Remove this branch when Telemetry switches to the
+  # new format.
+  else:
+    diagnostics = test_result.get('_benchmarkRun', {}).get('diagnostics', {})
-def AddDiagnosticsToHistograms(histogram_dicts, intermediate_results,
-                                results_label):
-  """Add diagnostics to histogram dicts"""
-  histograms = histogram_set.HistogramSet()
-  histograms.ImportDicts(histogram_dicts)
-  diagnostics = intermediate_results['benchmarkRun'].get('diagnostics', {})
  for name, diag in diagnostics.items():
    # For now, we only support GenericSet diagnostics that are serialized
    # as lists of values.
    assert isinstance(diag, list)
-    histograms.AddSharedDiagnosticToAllHistograms(
+    test_result['_histograms'].AddSharedDiagnosticToAllHistograms(
        name, generic_set.GenericSet(diag))
+  timestamp_ms = util.IsoTimestampToEpoch(test_suite_start) * 1e3
+  test_result['_histograms'].AddSharedDiagnosticToAllHistograms(
+      reserved_infos.BENCHMARK_START.name, date_range.DateRange(timestamp_ms))
  if results_label is not None:
-    histograms.AddSharedDiagnosticToAllHistograms(
+    test_result['_histograms'].AddSharedDiagnosticToAllHistograms(
        reserved_infos.LABELS.name,
        generic_set.GenericSet([results_label]))
-  histograms.DeduplicateDiagnostics()
-  return histograms.AsDicts()
 def MeasurementToHistogram(name, measurement):
  unit = measurement['unit']
@@ -238,22 +260,10 @@ def MeasurementToHistogram(name, measurement):
                                    description=description)
-def _GlobalDiagnostics(benchmark_run):
-  """Extract diagnostics information about the whole benchmark run.
-  These diagnostics will be added to ad-hoc measurements recorded by
-  benchmarks.
-  """
-  timestamp_ms = util.IsoTimestampToEpoch(benchmark_run['startTime']) * 1e3
-  return {
-    reserved_infos.BENCHMARK_START.name: date_range.DateRange(timestamp_ms),
-  }
 def _StoryDiagnostics(test_result):
  """Extract diagnostics information about the specific story.
-  These diagnostics will be added to ad-hoc measurements recorded by
+  These diagnostics will be added only to ad-hoc measurements recorded by
  benchmarks.
  """
  benchmark_name, story_name = test_result['testPath'].split('/', 1)
@@ -266,23 +276,16 @@ def _StoryDiagnostics(test_result):
  }
-def ExtractMeasurements(intermediate_results):
+def ExtractMeasurements(test_result):
  """Add ad-hoc measurements to histogram dicts"""
-  histograms = histogram_set.HistogramSet()
+  artifacts = test_result.get('outputArtifacts', {})
-  global_diagnostics = _GlobalDiagnostics(intermediate_results['benchmarkRun'])
+  if MEASUREMENTS_NAME in artifacts:
+    with open(artifacts[MEASUREMENTS_NAME]['filePath']) as f:
-  for result in intermediate_results['testResults']:
+      measurements = json.load(f)['measurements']
-    artifacts = result.get('outputArtifacts', {})
+    diagnostics = _StoryDiagnostics(test_result)
-    if MEASUREMENTS_NAME in artifacts:
+    for name, measurement in measurements.iteritems():
-      with open(artifacts[MEASUREMENTS_NAME]['filePath']) as f:
+      test_result['_histograms'].AddHistogram(
-        measurements = json.load(f)['measurements']
+          MeasurementToHistogram(name, measurement), diagnostics=diagnostics)
-      diagnostics = global_diagnostics.copy()
-      diagnostics.update(_StoryDiagnostics(result))
-      for name, measurement in measurements.iteritems():
-        histograms.AddHistogram(MeasurementToHistogram(name, measurement),
-                                diagnostics=diagnostics)
-  return histograms.AsDicts()
 def main(args=None):

--- a/tools/perf/core/results_processor/processor_test.py
+++ b/tools/perf/core/results_processor/processor_test.py
@@ -42,18 +42,35 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
  def tearDown(self):
    shutil.rmtree(self.output_dir)
-  def SerializeIntermediateResults(self, *args, **kwargs):
+  def SerializeIntermediateResults(self, *test_results):
-    in_results = testing.IntermediateResults(*args, **kwargs)
+    testing.SerializeIntermediateResults(test_results, os.path.join(
-    testing.SerializeIntermediateResults(in_results, os.path.join(
        self.intermediate_dir, processor.TELEMETRY_RESULTS))
+  def CreateHistogramsArtifact(self, hist):
+    """Create an artifact with histograms."""
+    histogram_dicts = [hist.AsDict()]
+    hist_file = os.path.join(self.output_dir,
+                             compute_metrics.HISTOGRAM_DICTS_FILE)
+    with open(hist_file, 'w') as f:
+      json.dump(histogram_dicts, f)
+    return testing.Artifact(hist_file)
+  def CreateDiagnosticsArtifact(self, **diagnostics):
+    """Create an artifact with diagnostics."""
+    diag_file = os.path.join(self.output_dir,
+                             processor.DIAGNOSTICS_NAME)
+    with open(diag_file, 'w') as f:
+      json.dump({'diagnostics': diagnostics}, f)
+    return testing.Artifact(diag_file)
  def testJson3Output(self):
-    self.SerializeIntermediateResults([
+    self.SerializeIntermediateResults(
        testing.TestResult(
-            'benchmark/story', run_duration='1.1s', tags=['shard:7']),
+            'benchmark/story', run_duration='1.1s', tags=['shard:7'],
+            start_time='2009-02-13T23:31:30.987000Z'),
        testing.TestResult(
            'benchmark/story', run_duration='1.2s', tags=['shard:7']),
-    ], start_time='2009-02-13T23:31:30.987000Z')
+    )
    processor.main([
        '--output-format', 'json-test-results',
@@ -80,7 +97,7 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(test_result['shard'], 7)
  def testJson3OutputWithArtifacts(self):
-    self.SerializeIntermediateResults([
+    self.SerializeIntermediateResults(
        testing.TestResult(
            'benchmark/story',
            output_artifacts={
@@ -88,8 +105,9 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
                'trace/telemetry': testing.Artifact('/telemetry.json'),
                'trace.html':
                    testing.Artifact('/trace.html', 'gs://trace.html'),
-            },
+            }
-    )])
+        ),
+    )
    processor.main([
        '--output-format', 'json-test-results',
@@ -110,26 +128,21 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(artifacts['trace.html'], ['gs://trace.html'])
  def testHistogramsOutput(self):
-    hist_file = os.path.join(self.output_dir,
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
-    with open(hist_file, 'w') as f:
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+                processor.DIAGNOSTICS_NAME:
-        ],
+                    self.CreateDiagnosticsArtifact(
-        diagnostics={
+                        benchmarks=['benchmark'],
-            'benchmarks': ['benchmark'],
+                        osNames=['linux'],
-            'osNames': ['linux'],
+                        documentationUrls=[['documentation', 'url']]),
-            'documentationUrls': [['documentation', 'url']],
+            },
-        },
+            start_time='2009-02-13T23:31:30.987000Z',
-        start_time='2009-02-13T23:31:30.987000Z',
+        ),
    )
    processor.main([
@@ -146,31 +159,32 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
    self.assertEqual(len(out_histograms), 1)
-    self.assertEqual(out_histograms.GetFirstHistogram().name, 'a')
-    self.assertEqual(out_histograms.GetFirstHistogram().unit, 'unitless')
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
+    hist = out_histograms.GetFirstHistogram()
-    self.assertEqual(len(diag_values), 4)
+    self.assertEqual(hist.name, 'a')
-    self.assertIn(['benchmark'], diag_values)
+    self.assertEqual(hist.unit, 'unitless')
-    self.assertIn(['linux'], diag_values)
-    self.assertIn([['documentation', 'url']], diag_values)
-    self.assertIn(['label'], diag_values)
-  def testHistogramsOutputResetResults(self):
+    self.assertEqual(hist.diagnostics['benchmarks'],
-    hist_file = os.path.join(self.output_dir,
+                     generic_set.GenericSet(['benchmark']))
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
+    self.assertEqual(hist.diagnostics['osNames'],
-    with open(hist_file, 'w') as f:
+                     generic_set.GenericSet(['linux']))
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
+    self.assertEqual(hist.diagnostics['documentationUrls'],
+                     generic_set.GenericSet([['documentation', 'url']]))
+    self.assertEqual(hist.diagnostics['labels'],
+                     generic_set.GenericSet(['label']))
+    self.assertEqual(hist.diagnostics['benchmarkStart'],
+                     date_range.DateRange(1234567890987))
+  def testHistogramsOutputResetResults(self):
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+            },
-        ],
+        ),
    )
    processor.main([
@@ -195,25 +209,21 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
    self.assertEqual(len(out_histograms), 1)
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
-    self.assertNotIn(['label1'], diag_values)
-    self.assertIn(['label2'], diag_values)
-  def testHistogramsOutputAppendResults(self):
+    hist = out_histograms.GetFirstHistogram()
-    hist_file = os.path.join(self.output_dir,
+    self.assertEqual(hist.diagnostics['labels'],
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
+                     generic_set.GenericSet(['label2']))
-    with open(hist_file, 'w') as f:
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
+  def testHistogramsOutputAppendResults(self):
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+            },
-        ],
+        ),
    )
    processor.main([
@@ -237,9 +247,11 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
    self.assertEqual(len(out_histograms), 2)
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
-    self.assertIn(['label1'], diag_values)
+    expected_labels = set(['label1', 'label2'])
-    self.assertIn(['label2'], diag_values)
+    observed_labels = set(label for hist in out_histograms
+                           for label in hist.diagnostics['labels'])
+    self.assertEqual(observed_labels, expected_labels)
  def testHistogramsOutputNoMetricsFromTelemetry(self):
    trace_file = os.path.join(self.output_dir, compute_metrics.HTML_TRACE_NAME)
@@ -247,16 +259,14 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
      pass
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HTML_TRACE_NAME:
-                    compute_metrics.HTML_TRACE_NAME:
+                    testing.Artifact(trace_file, 'gs://trace.html')
-                        testing.Artifact(trace_file, 'gs://trace.html')
+            },
-                },
+            tags=['tbmv2:sampleMetric'],
-                tags=['tbmv2:sampleMetric'],
+        ),
-            ),
-        ],
    )
    processor.main([
@@ -284,13 +294,11 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
      json.dump({'traceEvents': []}, f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={'trace/json': testing.Artifact(json_trace)},
-                output_artifacts={'trace/json': testing.Artifact(json_trace)},
+            tags=['tbmv2:sampleMetric'],
-                tags=['tbmv2:sampleMetric'],
+        ),
-            ),
-        ],
    )
    processor.main([
@@ -324,16 +332,14 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    start_iso = datetime.datetime.utcfromtimestamp(start_ts).isoformat() + 'Z'
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                processor.MEASUREMENTS_NAME: testing.Artifact(measure_file)
-                    processor.MEASUREMENTS_NAME: testing.Artifact(measure_file)
+            },
-                },
+            tags=['story_tag:test'],
-                tags=['story_tag:test']
+            start_time=start_iso,
-            ),
+        ),
-        ],
-        start_time=start_iso,
    )
    processor.main([
@@ -379,26 +385,21 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
                     date_range.DateRange(start_ts * 1e3))
  def testHtmlOutput(self):
-    hist_file = os.path.join(self.output_dir,
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
-    with open(hist_file, 'w') as f:
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+                processor.DIAGNOSTICS_NAME:
-        ],
+                    self.CreateDiagnosticsArtifact(
-        diagnostics={
+                        benchmarks=['benchmark'],
-            'benchmarks': ['benchmark'],
+                        osNames=['linux'],
-            'osNames': ['linux'],
+                        documentationUrls=[['documentation', 'url']]),
-            'documentationUrls': [['documentation', 'url']],
+            },
-        },
+            start_time='2009-02-13T23:31:30.987000Z',
-        start_time='2009-02-13T23:31:30.987000Z',
+        ),
    )
    processor.main([
@@ -415,18 +416,33 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
    self.assertEqual(len(out_histograms), 1)
-    self.assertEqual(out_histograms.GetFirstHistogram().name, 'a')
-    self.assertEqual(out_histograms.GetFirstHistogram().unit, 'unitless')
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
+    hist = out_histograms.GetFirstHistogram()
-    self.assertEqual(len(diag_values), 4)
+    self.assertEqual(hist.name, 'a')
-    self.assertIn(['benchmark'], diag_values)
+    self.assertEqual(hist.unit, 'unitless')
-    self.assertIn(['linux'], diag_values)
-    self.assertIn([['documentation', 'url']], diag_values)
+    self.assertEqual(hist.diagnostics['benchmarks'],
-    self.assertIn(['label'], diag_values)
+                     generic_set.GenericSet(['benchmark']))
+    self.assertEqual(hist.diagnostics['osNames'],
+                     generic_set.GenericSet(['linux']))
+    self.assertEqual(hist.diagnostics['documentationUrls'],
+                     generic_set.GenericSet([['documentation', 'url']]))
+    self.assertEqual(hist.diagnostics['labels'],
+                     generic_set.GenericSet(['label']))
+    self.assertEqual(hist.diagnostics['benchmarkStart'],
+                     date_range.DateRange(1234567890987))
  def testHtmlOutputResetResults(self):
-    self.SerializeIntermediateResults([])
+    self.SerializeIntermediateResults(
+        testing.TestResult(
+            'benchmark/story',
+            output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
+                    self.CreateHistogramsArtifact(
+                        histogram.Histogram('a', 'unitless')),
+            },
+        ),
+    )
    processor.main([
        '--output-format', 'html',
@@ -449,12 +465,23 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
+    self.assertEqual(len(out_histograms), 1)
-    self.assertNotIn(['label1'], diag_values)
-    self.assertIn(['label2'], diag_values)
+    hist = out_histograms.GetFirstHistogram()
+    self.assertEqual(hist.diagnostics['labels'],
+                     generic_set.GenericSet(['label2']))
  def testHtmlOutputAppendResults(self):
-    self.SerializeIntermediateResults([])
+    self.SerializeIntermediateResults(
+        testing.TestResult(
+            'benchmark/story',
+            output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
+                    self.CreateHistogramsArtifact(
+                        histogram.Histogram('a', 'unitless')),
+            },
+        ),
+    )
    processor.main([
        '--output-format', 'html',
@@ -476,32 +503,30 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    out_histograms = histogram_set.HistogramSet()
    out_histograms.ImportDicts(results)
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
+    self.assertEqual(len(out_histograms), 2)
-    self.assertIn(['label1'], diag_values)
-    self.assertIn(['label2'], diag_values)
+    expected_labels = set(['label1', 'label2'])
+    observed_labels = set(label for hist in out_histograms
+                           for label in hist.diagnostics['labels'])
+    self.assertEqual(observed_labels, expected_labels)
  def testCsvOutput(self):
-    hist_file = os.path.join(self.output_dir,
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
    test_hist = histogram.Histogram('a', 'ms')
    test_hist.AddSample(3000)
-    with open(hist_file, 'w') as f:
-      json.dump([test_hist.AsDict()], f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(test_hist),
-                },
+                processor.DIAGNOSTICS_NAME:
-            ),
+                    self.CreateDiagnosticsArtifact(
-        ],
+                        benchmarks=['benchmark'],
-        diagnostics={
+                        osNames=['linux'],
-            'benchmarks': ['benchmark'],
+                        documentationUrls=[['documentation', 'url']]),
-            'osNames': ['linux'],
+            },
-            'documentationUrls': [['documentation', 'url']],
+            start_time='2009-02-13T23:31:30.987000Z',
-        },
+        ),
    )
    processor.main([
@@ -519,7 +544,7 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
        ('name', 'a'), ('unit', 'ms'), ('avg', '3000'), ('count', '1'),
        ('max', '3000'), ('min', '3000'), ('std', '0'), ('sum', '3000'),
        ('architectures', ''), ('benchmarks', 'benchmark'),
-        ('benchmarkStart', ''), ('bots', ''),
+        ('benchmarkStart', '2009-02-13 23:31:30'), ('bots', ''),
        ('builds', ''), ('deviceIds', ''), ('displayLabel', 'label'),
        ('masters', ''), ('memoryAmounts', ''), ('osNames', 'linux'),
        ('osVersions', ''), ('productVersions', ''),
@@ -529,20 +554,15 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(actual, expected)
  def testCsvOutputResetResults(self):
-    hist_file = os.path.join(self.output_dir,
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
-    with open(hist_file, 'w') as f:
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+            },
-        ],
+        ),
    )
    processor.main([
@@ -567,20 +587,15 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertIn('label2', lines[1])
  def testCsvOutputAppendResults(self):
-    hist_file = os.path.join(self.output_dir,
-                             compute_metrics.HISTOGRAM_DICTS_FILE)
-    with open(hist_file, 'w') as f:
-      json.dump([histogram.Histogram('a', 'unitless').AsDict()], f)
    self.SerializeIntermediateResults(
-        test_results=[
+        testing.TestResult(
-            testing.TestResult(
+            'benchmark/story',
-                'benchmark/story',
+            output_artifacts={
-                output_artifacts={
+                compute_metrics.HISTOGRAM_DICTS_FILE:
-                    'histogram_dicts.json': testing.Artifact(hist_file)
+                    self.CreateHistogramsArtifact(
-                },
+                        histogram.Histogram('a', 'unitless')),
-            ),
+            },
-        ],
+        ),
    )
    processor.main([
@@ -605,10 +620,10 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertIn('label1', lines[2])
  def testExitCodeHasFailures(self):
-    self.SerializeIntermediateResults([
+    self.SerializeIntermediateResults(
        testing.TestResult('benchmark/story', status='PASS'),
        testing.TestResult('benchmark/story', status='FAIL'),
-    ])
+    )
    exit_code = processor.main([
        '--output-format', 'json-test-results',
@@ -618,10 +633,10 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(exit_code, 1)
  def testExitCodeAllSkipped(self):
-    self.SerializeIntermediateResults([
+    self.SerializeIntermediateResults(
        testing.TestResult('benchmark/story', status='SKIP'),
        testing.TestResult('benchmark/story', status='SKIP'),
-    ])
+    )
    exit_code = processor.main([
        '--output-format', 'json-test-results',
@@ -631,10 +646,10 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(exit_code, -1)
  def testExitCodeSomeSkipped(self):
-    self.SerializeIntermediateResults([
+    self.SerializeIntermediateResults(
        testing.TestResult('benchmark/story', status='SKIP'),
        testing.TestResult('benchmark/story', status='PASS'),
-    ])
+    )
    exit_code = processor.main([
        '--output-format', 'json-test-results',

--- a/tools/perf/core/results_processor/processor_unittest.py
+++ b/tools/perf/core/results_processor/processor_unittest.py
@@ -4,6 +4,7 @@
 """Unit tests for results_processor methods."""
+import datetime
 import os
 import unittest
@@ -12,141 +13,95 @@ import mock
 from core.results_processor import processor
 from core.results_processor import testing
-from tracing.value import histogram
+from tracing.value.diagnostics import generic_set
+from tracing.value.diagnostics import date_range
 from tracing.value import histogram_set
 class ResultsProcessorUnitTests(unittest.TestCase):
  def testAddDiagnosticsToHistograms(self):
-    histogram_dicts = [histogram.Histogram('a', 'unitless').AsDict()]
+    test_result = testing.TestResult('benchmark/story')
+    test_result['_histograms'] = histogram_set.HistogramSet()
-    in_results = testing.IntermediateResults(
+    test_result['_histograms'].CreateHistogram('a', 'unitless', [0])
-        test_results=[],
-        diagnostics={
+    start_ts = 1500000000
-            'benchmarks': ['benchmark'],
+    start_iso = datetime.datetime.utcfromtimestamp(start_ts).isoformat() + 'Z'
-            'osNames': ['linux'],
-            'documentationUrls': [['documentation', 'url']],
-        },
-    )
-    histograms_with_diagnostics = processor.AddDiagnosticsToHistograms(
+    processor.AddDiagnosticsToHistograms(
-        histogram_dicts, in_results, results_label='label')
+        test_result, test_suite_start=start_iso, results_label='label')
-    out_histograms = histogram_set.HistogramSet()
+    hist = test_result['_histograms'].GetFirstHistogram()
-    out_histograms.ImportDicts(histograms_with_diagnostics)
+    self.assertEqual(hist.diagnostics['labels'],
-    diag_values = [list(v) for v in  out_histograms.shared_diagnostics]
+                     generic_set.GenericSet(['label']))
-    self.assertEqual(len(diag_values), 4)
+    self.assertEqual(hist.diagnostics['benchmarkStart'],
-    self.assertIn(['benchmark'], diag_values)
+                     date_range.DateRange(start_ts * 1e3))
-    self.assertIn(['linux'], diag_values)
-    self.assertIn([['documentation', 'url']], diag_values)
-    self.assertIn(['label'], diag_values)
  def testUploadArtifacts(self):
-    in_results = testing.IntermediateResults(
+    test_result = testing.TestResult(
-        test_results=[
+        'benchmark/story',
-            testing.TestResult(
+        output_artifacts={
-                'benchmark/story',
+          'logs': testing.Artifact('/log.log'),
-                output_artifacts={'log': testing.Artifact('/log.log')},
+          'trace.html': testing.Artifact('/trace.html'),
-            ),
+          'screenshot': testing.Artifact('/screenshot.png'),
-            testing.TestResult(
+        },
-                'benchmark/story',
-                output_artifacts={
-                  'trace.html': testing.Artifact('/trace.html'),
-                  'screenshot': testing.Artifact('/screenshot.png'),
-                },
-            ),
-        ],
    )
    with mock.patch('py_utils.cloud_storage.Insert') as cloud_patch:
      cloud_patch.return_value = 'gs://url'
-      processor.UploadArtifacts(in_results, 'bucket', None)
+      processor.UploadArtifacts(test_result, 'bucket', 'run1')
      cloud_patch.assert_has_calls([
-          mock.call('bucket', mock.ANY, '/log.log'),
+          mock.call('bucket', 'run1/benchmark/story/logs', '/log.log'),
-          mock.call('bucket', mock.ANY, '/trace.html'),
+          mock.call('bucket', 'run1/benchmark/story/trace.html', '/trace.html'),
-          mock.call('bucket', mock.ANY, '/screenshot.png'),
+          mock.call('bucket', 'run1/benchmark/story/screenshot',
+                    '/screenshot.png'),
        ],
        any_order=True,
      )
-    for result in in_results['testResults']:
+    for artifact in test_result['outputArtifacts'].itervalues():
-      for artifact in result['outputArtifacts'].itervalues():
+      self.assertEqual(artifact['remoteUrl'], 'gs://url')
-        self.assertEqual(artifact['remoteUrl'], 'gs://url')
-  def testUploadArtifacts_CheckRemoteUrl(self):
-    in_results = testing.IntermediateResults(
-        test_results=[
-            testing.TestResult(
-                'benchmark/story',
-                output_artifacts={
-                    'trace.html': testing.Artifact('/trace.html')
-                },
-            ),
-        ],
-        start_time='2019-10-01T12:00:00.123456Z',
-    )
-    with mock.patch('py_utils.cloud_storage.Insert') as cloud_patch:
+  def testRunIdentifier(self):
-      with mock.patch('random.randint') as randint_patch:
+    with mock.patch('random.randint') as randint_patch:
-        randint_patch.return_value = 54321
+      randint_patch.return_value = 54321
-        processor.UploadArtifacts(in_results, 'bucket', 'src@abc + 123')
+      run_identifier = processor.RunIdentifier(
-        cloud_patch.assert_called_once_with(
+          results_label='src@abc + 123',
-            'bucket',
+          test_suite_start='2019-10-01T12:00:00.123456Z')
-            'src_abc_123_20191001T120000_54321/benchmark/story/trace.html',
+    self.assertEqual(run_identifier, 'src_abc_123_20191001T120000_54321')
-            '/trace.html'
-        )
  def testAggregateTraces(self):
-    in_results = testing.IntermediateResults(
+    test_result = testing.TestResult(
-        test_results=[
+        'benchmark/story2',
-            testing.TestResult(
+        output_artifacts={
-                'benchmark/story1',
+            'trace/1.json': testing.Artifact(
-                output_artifacts={
+                os.path.join('test_run', 'story2', 'trace', '1.json')),
-                    'trace/1.json': testing.Artifact(
+            'trace/2.json': testing.Artifact(
-                        os.path.join('test_run', 'story1', 'trace', '1.json')),
+                os.path.join('test_run', 'story2', 'trace', '2.json')),
-                },
+        },
-            ),
-            testing.TestResult(
-                'benchmark/story2',
-                output_artifacts={
-                    'trace/1.json': testing.Artifact(
-                        os.path.join('test_run', 'story2', 'trace', '1.json')),
-                    'trace/2.json': testing.Artifact(
-                        os.path.join('test_run', 'story2', 'trace', '2.json')),
-                },
-            ),
-        ],
    )
-    with mock.patch('tracing.trace_data.trace_data.SerializeAsHtml') as patch:
+    serialize_method = 'tracing.trace_data.trace_data.SerializeAsHtml'
-      processor.AggregateTraces(in_results)
+    with mock.patch(serialize_method) as mock_serialize:
+      processor.AggregateTraces(test_result)
-    call_list = [list(call[0]) for call in patch.call_args_list]
-    self.assertEqual(len(call_list), 2)
+    self.assertEqual(mock_serialize.call_count, 1)
-    for call in call_list:
+    trace_files, file_path = mock_serialize.call_args[0][:2]
-      call[0] = set(call[0])
+    self.assertEqual(
-    self.assertIn(
+        set(trace_files),
-        [
+        set([
-            set([os.path.join('test_run', 'story1', 'trace', '1.json')]),
+            os.path.join('test_run', 'story2', 'trace', '1.json'),
-            os.path.join('test_run', 'story1', 'trace', 'trace.html'),
+            os.path.join('test_run', 'story2', 'trace', '2.json'),
-        ],
+        ]),
-        call_list
    )
-    self.assertIn(
+    self.assertEqual(
-        [
+        file_path,
-            set([
+        os.path.join('test_run', 'story2', 'trace', 'trace.html'),
-                os.path.join('test_run', 'story2', 'trace', '1.json'),
-                os.path.join('test_run', 'story2', 'trace', '2.json'),
-            ]),
-            os.path.join('test_run', 'story2', 'trace', 'trace.html'),
-        ],
-        call_list
    )
-    for result in in_results['testResults']:
+    artifacts = test_result['outputArtifacts']
-      artifacts = result['outputArtifacts']
+    self.assertEqual(len(artifacts), 1)
-      self.assertEqual(len(artifacts), 1)
+    self.assertEqual(artifacts.keys()[0], 'trace.html')
-      self.assertEqual(artifacts.keys()[0], 'trace.html')
  def testMeasurementToHistogram(self):
    hist = processor.MeasurementToHistogram('a', {

--- a/tools/perf/core/results_processor/testing.py
+++ b/tools/perf/core/results_processor/testing.py
@@ -7,33 +7,6 @@
 import json
-_BENCHMARK_START_KEYS = set(['startTime'])
-def IntermediateResults(test_results, start_time='2015-10-21T07:28:00.000Z',
-                        finalized=True, interrupted=False, diagnostics=None):
-  """Build a dict of 'parsed' intermediate results.
-  Args:
-    test_results: A sequence of testResult dicts.
-    start_time: An optional UTC timestamp recording when a benchmark started
-      running.
-    finalized: An optional bool indicating whether the benchmark run finalized.
-      Defaults to True.
-    interrupted: An optional bool indicating whether the benchmark run was
-      interrupted. Defaults to False.
-  """
-  return {
-      'benchmarkRun': {
-          'startTime': start_time,
-          'finalized': finalized,
-          'interrupted': interrupted,
-          'diagnostics': diagnostics or {},
-      },
-      'testResults': list(test_results)
-  }
 def TestResult(test_path, status='PASS', is_expected=None,
               start_time='2015-10-21T07:28:00.000Z', run_duration='1.00s',
               output_artifacts=None, tags=None):
@@ -97,29 +70,16 @@ def SerializeIntermediateResults(in_results, filepath):
  """Serialize intermediate results to a filepath.
  Args:
-    in_results: A dict with intermediate results, e.g. as produced by
+    in_results: A list of test results.
-      IntermediateResults or parsed from an intermediate results file.
+    filepath: A file path where to serialize the intermediate results.
-    filpath: A file path where to serialize the intermediate results.
  """
-  # Split benchmarkRun into fields recorded at startup and when finishing.
-  benchmark_start = {}
-  benchmark_finish = {}
-  for key, value in in_results['benchmarkRun'].items():
-    d = benchmark_start if key in _BENCHMARK_START_KEYS else benchmark_finish
-    d[key] = value
-  # Serialize individual records as a sequence of json lines.
  with open(filepath, 'w') as fp:
-    _SerializeRecord({'benchmarkRun': benchmark_start}, fp)
+    for test_result in in_results:
-    for test_result in in_results['testResults']:
+      json.dump({'testResult': test_result}, fp,
-      _SerializeRecord({'testResult': test_result}, fp)
+                sort_keys=True, separators=(',', ':'))
-    _SerializeRecord({'benchmarkRun': benchmark_finish}, fp)
+      fp.write('\n')
 def _SplitTag(tag):
  key, value = tag.split(':', 1)
  return {'key': key, 'value': value}
-def _SerializeRecord(record, fp):
-  fp.write(json.dumps(record, sort_keys=True, separators=(',', ':')) + '\n')
--- a/tools/perf/core/results_processor/util.py
+++ b/tools/perf/core/results_processor/util.py
@@ -9,16 +9,13 @@ import multiprocessing
 from multiprocessing.dummy import Pool as ThreadPool
-def ApplyInParallel(function, work_list):
+def ApplyInParallel(function, work_list, on_failure=None):
  """Apply a function to all values in work_list in parallel.
  Args:
    function: A function with one argument.
    work_list: Any iterable with arguments for the function.
+    on_failure: A function to run in case of a failure.
-  Returns:
-    A generator over results. The order of results might not match the
-    order of the arguments in the work_list.
  """
  if not work_list:
    return
@@ -35,17 +32,17 @@ def ApplyInParallel(function, work_list):
  def function_with_try(arg):
    try:
-      return function(arg)
+      function(arg)
    except Exception:  # pylint: disable=broad-except
      # logging exception here is the only way to get a stack trace since
      # multiprocessing's pool implementation does not save that data. See
      # crbug.com/953365.
      logging.exception('Exception while running %s' % function.__name__)
-      raise
+      if on_failure:
+        on_failure(arg)
  try:
-    for result in pool.imap_unordered(function_with_try, work_list):
+    pool.imap_unordered(function_with_try, work_list)
-      yield result
    pool.close()
    pool.join()
  finally:

--- a/tools/perf/core/results_processor/util_unittest.py
+++ b/tools/perf/core/results_processor/util_unittest.py
@@ -9,15 +9,16 @@ from core.results_processor import util
 class UtilTests(unittest.TestCase):
  def testApplyInParallel(self):
-    work_list = [1, 2, 3]
+    work_list = [[1], [2], [3]]
-    fun = lambda x: x * x
+    def fun(x):
-    result = set(util.ApplyInParallel(fun, work_list))
+      x.extend(x)
-    self.assertEqual(result, set([1, 4, 9]))
+    util.ApplyInParallel(fun, work_list)
+    self.assertEqual(work_list, [[1, 1], [2, 2], [3, 3]])
-  def testApplyInParallelExceptionRaised(self):
+  def testApplyInParallelOnFailure(self):
-    work_list = [1, 2, 3]
+    work_list = [[1], [2], [3]]
    def fun(x):
-      if x == 3:
+      if x == [3]:
        raise RuntimeError()
-    with self.assertRaises(RuntimeError):
+    util.ApplyInParallel(fun, work_list, on_failure=lambda x: x.pop())
-      list(util.ApplyInParallel(fun, work_list))
+    self.assertEqual(work_list, [[1], [2], []])