Add wait time between retry attempts to upload to perf dashboard

This implements exponential backoff to increase the wait time after each retry attempt. This CL also increase the default number of retry attempts to 4. Bug:864565 Cq-Include-Trybots: master.tryserver.chromium.perf:obbs_fyi Change-Id: I99c167e1329bbb83d86d310e2acf164cd3f3e7cf NOTRY=true # linux-chromeos-rel flake Change-Id: I99c167e1329bbb83d86d310e2acf164cd3f3e7cf Reviewed-on: https://chromium-review.googlesource.com/1176089 Commit-Queue: Ned Nguyen <nednguyen@google.com> Reviewed-by: Annie Sullivan <sullivan@chromium.org> Cr-Commit-Position: refs/heads/master@{#583422}

Add wait time between retry attempts to upload to perf dashboard
This implements exponential backoff to increase the wait time after each retry attempt. This CL also increase the default number of retry attempts to 4. Bug:864565 Cq-Include-Trybots: master.tryserver.chromium.perf:obbs_fyi Change-Id: I99c167e1329bbb83d86d310e2acf164cd3f3e7cf NOTRY=true # linux-chromeos-rel flake Change-Id: I99c167e1329bbb83d86d310e2acf164cd3f3e7cf Reviewed-on: https://chromium-review.googlesource.com/1176089 Commit-Queue: Ned Nguyen <nednguyen@google.com> Reviewed-by: Annie Sullivan <sullivan@chromium.org> Cr-Commit-Position: refs/heads/master@{#583422}
4a023983 · Ned Nguyen · Commit Bot · 04d6e4f7 · 4a023983 · 4a023983
Commit 4a023983 authored Aug 15, 2018 by Ned Nguyen Committed by Commit Bot Aug 15, 2018
4 changed files
--- a/tools/perf/core/results_dashboard.py
+++ b/tools/perf/core/results_dashboard.py
@@ -63,15 +63,18 @@ def LuciAuthTokenGeneratorCallback(
        (p.stdout.read(), p.stderr.read()))
-def SendResults(data, url, send_as_histograms=False, service_account_file=None,
+def SendResults(data, data_label, url, send_as_histograms=False,
+                service_account_file=None,
                token_generator_callback=LuciAuthTokenGeneratorCallback,
-                num_retries=3):
+                num_retries=4):
  """Sends results to the Chrome Performance Dashboard.
  This function tries to send the given data to the dashboard.
  Args:
    data: The data to try to send. Must be JSON-serializable.
+    data_label: string name of the data to be uploaded. This is only used for
+    logging purpose.
    url: Performance Dashboard URL (including schema).
    send_as_histograms: True if result is to be sent to /add_histograms.
    service_account_file: string; path to service account file which is used
@@ -99,10 +102,15 @@ def SendResults(data, url, send_as_histograms=False, service_account_file=None,
  dashboard_data_str = json.dumps(data)
+  # When perf dashboard is overloaded, it takes sometimes to spin up new
+  # instance. So sleep before retrying again. (
+  # For more details, see crbug.com/867379.
+  wait_before_next_retry_in_seconds = 30
  for i in xrange(1, num_retries + 1):
    try:
-      print 'Sending %s result to dashboard (attempt %i out of %i).' % (
+      print 'Sending %s result of %s to dashboard (attempt %i out of %i).' % (
-          data_type, i, num_retries)
+          data_type, data_label, i, num_retries)
      if send_as_histograms:
        _SendHistogramJson(url, dashboard_data_str,
                           service_account_file, token_generator_callback)
@@ -114,6 +122,8 @@ def SendResults(data, url, send_as_histograms=False, service_account_file=None,
    except SendResultsRetryException as e:
      error = 'Error while uploading %s data: %s' % (data_type, str(e))
      errors.append(error)
+      time.sleep(wait_before_next_retry_in_seconds)
+      wait_before_next_retry_in_seconds *= 2
    except SendResultsFatalException as e:
      error = 'Fatal error while uploading %s data: %s' % (data_type, str(e))
      errors.append(error)

--- a/tools/perf/core/results_dashboard_unittest.py
+++ b/tools/perf/core/results_dashboard_unittest.py
@@ -4,6 +4,7 @@
 import unittest
 import mock
+from mock import call
 from core import results_dashboard
@@ -23,14 +24,19 @@ class ResultsDashboardTest(unittest.TestCase):
      del token_generator_callback  # unused
      raise results_dashboard.SendResultsRetryException('Should retry')
+    with mock.patch('core.results_dashboard.time.sleep') as sleep_mock:
      with mock.patch('core.results_dashboard._SendHistogramJson',
                      side_effect=raise_retry_exception) as m:
        upload_result = results_dashboard.SendResults(
-          self.perf_data, self.dashboard_url, send_as_histograms=True,
+            self.perf_data, 'dummy_benchmark',
+            self.dashboard_url, send_as_histograms=True,
            service_account_file=self.fake_service,
            token_generator_callback=self.dummy_token_generator, num_retries=5)
        self.assertFalse(upload_result)
        self.assertEqual(m.call_count, 5)
+        self.assertEqual(
+            sleep_mock.mock_calls,
+            [call(30), call(60), call(120), call(240), call(480)])
  def testNoRetryForSendResultFatalException(self):
@@ -40,25 +46,31 @@ class ResultsDashboardTest(unittest.TestCase):
      del token_generator_callback  # unused
      raise results_dashboard.SendResultsFatalException('Do not retry')
+    with mock.patch('core.results_dashboard.time.sleep') as sleep_mock:
      with mock.patch('core.results_dashboard._SendHistogramJson',
                      side_effect=raise_retry_exception) as m:
        upload_result =  results_dashboard.SendResults(
-          self.perf_data, self.dashboard_url, send_as_histograms=True,
+            self.perf_data, 'dummy_benchmark',
+            self.dashboard_url, send_as_histograms=True,
            service_account_file=self.fake_service,
            token_generator_callback=self.dummy_token_generator,
            num_retries=5)
        self.assertFalse(upload_result)
        self.assertEqual(m.call_count, 1)
+        self.assertFalse(sleep_mock.mock_calls)
  def testNoRetryForSuccessfulSendResult(self):
+    with mock.patch('core.results_dashboard.time.sleep') as sleep_mock:
      with mock.patch('core.results_dashboard._SendHistogramJson') as m:
        upload_result = results_dashboard.SendResults(
-          self.perf_data, self.dashboard_url, send_as_histograms=True,
+            self.perf_data, 'dummy_benchmark',
+            self.dashboard_url, send_as_histograms=True,
            service_account_file=self.fake_service,
            token_generator_callback=self.dummy_token_generator,
            num_retries=5)
        self.assertTrue(upload_result)
        self.assertEqual(m.call_count, 1)
+        self.assertFalse(sleep_mock.mock_calls)
  def testNoRetryAfterSucessfulSendResult(self):
    counter = [0]
@@ -70,12 +82,16 @@ class ResultsDashboardTest(unittest.TestCase):
      if counter[0] <= 2:
        raise results_dashboard.SendResultsRetryException('Please retry')
+    with mock.patch('core.results_dashboard.time.sleep') as sleep_mock:
      with mock.patch('core.results_dashboard._SendHistogramJson',
                      side_effect=raise_retry_exception_first_two_times) as m:
        upload_result = results_dashboard.SendResults(
-          self.perf_data, self.dashboard_url, send_as_histograms=True,
+            self.perf_data, 'dummy_benchmark',
+            self.dashboard_url, send_as_histograms=True,
            service_account_file=self.fake_service,
            token_generator_callback=self.dummy_token_generator,
            num_retries=5)
        self.assertTrue(upload_result)
        self.assertEqual(m.call_count, 3)
+        self.assertEqual(
+            sleep_mock.mock_calls, [call(30), call(60)])
--- a/tools/perf/core/upload_results_to_perf_dashboard.py
+++ b/tools/perf/core/upload_results_to_perf_dashboard.py
@@ -142,6 +142,7 @@ def main(args):
    if not results_dashboard.SendResults(
        dashboard_json,
+        options.name,
        options.results_url,
        send_as_histograms=options.send_as_histograms,
        service_account_file=service_account_file):

--- a/tools/perf/process_perf_results.py
+++ b/tools/perf/process_perf_results.py
@@ -404,8 +404,7 @@ def _handle_perf_results(
          build_properties, output_json_file, service_account_file))
    # Kick off the uploads in mutliple processes
-    cpus = mp.cpu_count()
+    pool = mp.Pool()
-    pool = mp.Pool(cpus)
    try:
      async_result = pool.map_async(
          _upload_individual_benchmark, invocations)