[tools/perf] Upload artifacts in parallel

This CL speeds up uploading test artifacts to the cloud by doing it in a thread pool. It also fixes a bug where run_indentifier would change during the script run. Bug: 981349 Change-Id: I65fea156d73fb568f8ecb3e05d330971ab55be29 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1837630 Commit-Queue: Mikhail Khokhlov <khokhlov@google.com> Reviewed-by: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#702787}

[tools/perf] Upload artifacts in parallel
This CL speeds up uploading test artifacts to the cloud by doing it in a thread pool. It also fixes a bug where run_indentifier would change during the script run. Bug: 981349 Change-Id: I65fea156d73fb568f8ecb3e05d330971ab55be29 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1837630 Commit-Queue: Mikhail Khokhlov <khokhlov@google.com> Reviewed-by: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#702787}
15caa536 · Mikhail Khokhlov · Commit Bot · b58ea26a · 15caa536 · 15caa536
Commit 15caa536 authored Oct 04, 2019 by Mikhail Khokhlov Committed by Commit Bot Oct 04, 2019
4 changed files
--- a/tools/perf/core/results_processor/compute_metrics.py
+++ b/tools/perf/core/results_processor/compute_metrics.py
@@ -4,11 +4,11 @@

 import json
 import logging
-import multiprocessing
-from multiprocessing.dummy import Pool as ThreadPool
 import os
 import time

+from core.results_processor import util
+
 from tracing.metrics import metric_runner


@@ -101,22 +101,7 @@ def ComputeTBMv2Metrics(intermediate_results):
  if not work_list:
    return histogram_dicts

-  try:
-    # Note that this is speculatively halved as an attempt to fix
-    # crbug.com/953365.
-    cpu_count = multiprocessing.cpu_count() / 2
-  except NotImplementedError:
-    # Some platforms can raise a NotImplementedError from cpu_count()
-    logging.warning('cpu_count() not implemented.')
-    cpu_count = 4
-  pool = ThreadPool(min(cpu_count, len(work_list)))
-
-  try:
-    for dicts in pool.imap_unordered(_PoolWorker, work_list):
+  for dicts in util.ApplyInParallel(_PoolWorker, work_list):
    histogram_dicts += dicts
-    pool.close()
-    pool.join()
-  finally:
-    pool.terminate()

  return histogram_dicts
--- a/tools/perf/core/results_processor/processor.py
+++ b/tools/perf/core/results_processor/processor.py
@@ -18,6 +18,7 @@ from py_utils import cloud_storage
 from core.results_processor import command_line
 from core.results_processor import compute_metrics
 from core.results_processor import formatters
+from core.results_processor import util

 from tracing.value.diagnostics import generic_set
 from tracing.value.diagnostics import reserved_infos
@@ -93,10 +94,8 @@ def _AggregateTraces(intermediate_results):
        del artifacts[trace]


-def _RemoteName(results_label, start_time, test_path, artifact_name):
-  """Construct a name for a given artifact, under which it will be
-  stored in the cloud.
-  """
+def _RunIdentifier(results_label, start_time):
+  """Construct an identifier for the current script run"""
  if results_label:
    identifier_parts = [re.sub(r'\W+', '_', results_label)]
  else:
@@ -105,8 +104,7 @@ def _RemoteName(results_label, start_time, test_path, artifact_name):
  # The first 19 chars of the string match 'YYYY-MM-DDTHH:MM:SS'.
  identifier_parts.append(re.sub(r'\W+', '', start_time[:19]))
  identifier_parts.append(str(random.randint(1, 1e5)))
-  run_identifier = '_'.join(identifier_parts)
-  return '/'.join([run_identifier, test_path, artifact_name])
+  return '_'.join(identifier_parts)


 def UploadArtifacts(intermediate_results, upload_bucket, results_label):
@@ -118,7 +116,10 @@ def UploadArtifacts(intermediate_results, upload_bucket, results_label):
  if upload_bucket is None:
    return

-  start_time = intermediate_results['benchmarkRun']['startTime']
+  run_identifier = _RunIdentifier(
+      results_label, intermediate_results['benchmarkRun']['startTime'])
+  work_list = []
+
  for result in intermediate_results['testResults']:
    artifacts = result.get('artifacts', {})
    for name, artifact in artifacts.iteritems():
@@ -128,13 +129,25 @@ def UploadArtifacts(intermediate_results, upload_bucket, results_label):
      # save histograms as an artifact anymore.
      if name == compute_metrics.HISTOGRAM_DICTS_FILE:
        continue
+      remote_name = '/'.join([run_identifier, result['testPath'], name])
+      work_list.append((artifact, remote_name))
+
+  if not work_list:
+    return
+
+  def PoolUploader(work_item):
+    artifact, remote_name = work_item
    artifact['remoteUrl'] = cloud_storage.Insert(
-          upload_bucket,
-          _RemoteName(results_label, start_time, result['testPath'], name),
-          artifact['filePath'],
-      )
-      logging.info('Uploaded %s of %s to %s\n' % (
-          name, result['testPath'], artifact['remoteUrl']))
+        upload_bucket, remote_name, artifact['filePath'])
+
+  for _ in util.ApplyInParallel(PoolUploader, work_list):
+    pass
+
+  for result in intermediate_results['testResults']:
+    artifacts = result.get('artifacts', {})
+    for name, artifact in artifacts.iteritems():
+      logging.info('Uploaded %s of %s to %s', name, result['testPath'],
+                   artifact['remoteUrl'])


 def _ComputeMetrics(intermediate_results, results_label):

--- a/tools/perf/core/results_processor/util.py
+++ b/tools/perf/core/results_processor/util.py
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import multiprocessing
+from multiprocessing.dummy import Pool as ThreadPool
+
+
+def ApplyInParallel(function, work_list):
+  """Apply a function to all values in work_list in parallel.
+
+  Args:
+    function: A function with one argument.
+    work_list: Any iterable with arguments for the function.
+
+  Returns:
+    A generator over results. The order of results might not match the
+    order of the arguments in the work_list.
+  """
+  try:
+    # Note that this is speculatively halved as an attempt to fix
+    # crbug.com/953365.
+    cpu_count = multiprocessing.cpu_count() / 2
+  except NotImplementedError:
+    # Some platforms can raise a NotImplementedError from cpu_count()
+    logging.warning('cpu_count() not implemented.')
+    cpu_count = 4
+  pool = ThreadPool(min(cpu_count, len(work_list)))
+
+  try:
+    for result in pool.imap_unordered(function, work_list):
+      yield result
+    pool.close()
+    pool.join()
+  finally:
+    pool.terminate()
--- a/tools/perf/core/results_processor/util_unittest.py
+++ b/tools/perf/core/results_processor/util_unittest.py
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import unittest
+
+from core.results_processor import util
+
+
+class UtilTests(unittest.TestCase):
+  def testApplyInParallel(self):
+    work_list = [1, 2, 3]
+    fun = lambda x: x * x
+    result = set(util.ApplyInParallel(fun, work_list))
+    self.assertEqual(result, set([1, 4, 9]))