Commit 881f1317 authored by Ned Nguyen's avatar Ned Nguyen Committed by Commit Bot

Add retry logic to results_dashboard uploading

In this first attempt, we retry upload the whole perf dashboard data (JSONnizable)
for any recoverable error.

Subsequent CL may improve this further by splitting up the perf dashboard data
to small parts and only try to reupload the parts which the upload failed.

This CL also increase the expiration of the oath token generated for each perf benchmark's data to 30 minutes (used to be 1 minutes by default)

Bug: 864565
Cq-Include-Trybots: master.tryserver.chromium.perf:obbs_fyi
Change-Id: If4cb959cc62c5da3baa4a36a5ae91c10ca6d4d02
Reviewed-on: https://chromium-review.googlesource.com/1148228
Commit-Queue: Ned Nguyen <nednguyen@google.com>
Reviewed-by: default avatarSimon Hatch <simonhatch@chromium.org>
Cr-Commit-Position: refs/heads/master@{#577682}
parent e28b1035
...@@ -13,17 +13,21 @@ import tempfile ...@@ -13,17 +13,21 @@ import tempfile
@contextlib.contextmanager @contextlib.contextmanager
def with_access_token(service_account_json, append): def with_access_token(
service_account_json, prefix, token_expiration_in_minutes):
"""Yields an access token for the service account. """Yields an access token for the service account.
Args: Args:
service_account_json: The path to the service account JSON file. service_account_json: The path to the service account JSON file.
prefix: the prefix of the token file
token_expiration_in_minutes: the integer expiration time of the token
""" """
fd, path = tempfile.mkstemp(suffix='.json', prefix=append) fd, path = tempfile.mkstemp(suffix='.json', prefix=prefix)
try: try:
args = ['luci-auth', 'token'] args = ['luci-auth', 'token']
if service_account_json: if service_account_json:
args += ['-service-account-json', service_account_json] args += ['-service-account-json', service_account_json]
args += ['-lifetime', '%im' % token_expiration_in_minutes]
subprocess.check_call(args, stdout=fd) subprocess.check_call(args, stdout=fd)
os.close(fd) os.close(fd)
fd = None fd = None
......
...@@ -52,7 +52,8 @@ class SendResultsFatalException(SendResultException): ...@@ -52,7 +52,8 @@ class SendResultsFatalException(SendResultException):
pass pass
def SendResults(data, url, send_as_histograms=False, oauth_token=None): def SendResults(data, url, send_as_histograms=False, oauth_token=None,
num_retries=3):
"""Sends results to the Chrome Performance Dashboard. """Sends results to the Chrome Performance Dashboard.
This function tries to send the given data to the dashboard. This function tries to send the given data to the dashboard.
...@@ -61,72 +62,80 @@ def SendResults(data, url, send_as_histograms=False, oauth_token=None): ...@@ -61,72 +62,80 @@ def SendResults(data, url, send_as_histograms=False, oauth_token=None):
data: The data to try to send. Must be JSON-serializable. data: The data to try to send. Must be JSON-serializable.
url: Performance Dashboard URL (including schema). url: Performance Dashboard URL (including schema).
send_as_histograms: True if result is to be sent to /add_histograms. send_as_histograms: True if result is to be sent to /add_histograms.
oauth_token: string; used for flushing oauth uploads from cache. oauth_token: string; used for flushing oauth uploads from cache. Note that
client is responsible for making sure that the oauth_token doesn't expire
when using this API.
num_retries: Number of times to retry uploading to the perf dashboard upon
recoverable error.
""" """
start = time.time() start = time.time()
# Send all the results from this run and the previous cache to the # Send all the results from this run and the previous cache to the
# dashboard. # dashboard.
fatal_error, errors = _SendResultsToDashboard( errors, all_data_uploaded = _SendResultsToDashboard(
data, url, oauth_token, is_histogramset=send_as_histograms) data, url, oauth_token, is_histogramset=send_as_histograms,
num_retries=num_retries)
print 'Time spent sending results to %s: %s' % (url, time.time() - start) print 'Time spent sending results to %s: %s' % (url, time.time() - start)
# Print any errors; if there was a fatal error, it should be an exception. for err in errors:
for error in errors: print err
print error
if fatal_error:
return False
return True
return all_data_uploaded
def _SendResultsToDashboard(dashboard_data, url, oauth_token, is_histogramset):
def _SendResultsToDashboard(
dashboard_data, url, oauth_token, is_histogramset, num_retries):
"""Tries to send perf dashboard data to |url|. """Tries to send perf dashboard data to |url|.
Args: Args:
perf_results_file_path: A file name. perf_results_file_path: A file name.
url: The instance URL to which to post results. url: The instance URL to which to post results.
oauth_token: An oauth token to use for histogram uploads. Might be None. oauth_token: An oauth token to use for histogram uploads. Might be None.
num_retries: Number of time to retry uploading to the perf dashboard upon
recoverable error.
Returns: Returns:
A pair (fatal_error, errors), where fatal_error is a boolean indicating A tuple (errors, all_data_uploaded), whereas:
whether there there was a major error and the step should fail, and errors errors is a list of error strings.
is a list of error strings. all_data_uploaded is a boolean indicating whether all perf data was
succesfully uploaded.
""" """
fatal_error = False
errors = [] errors = []
all_data_uploaded = False
data_type = ('histogram' if is_histogramset else 'chartjson') data_type = ('histogram' if is_histogramset else 'chartjson')
print 'Sending %s result to dashboard.' % data_type
try: for i in xrange(1, num_retries + 1):
if is_histogramset: try:
# TODO(eakuefner): Remove this discard logic once all bots use print 'Sending %s result to dashboard (attempt %i out of %i).' % (
# histograms. data_type, i, num_retries)
if oauth_token is None: if is_histogramset:
raise SendResultsFatalException(ERROR_NO_OAUTH_TOKEN) # TODO(eakuefner): Remove this discard logic once all bots use
# histograms.
_SendHistogramJson(url, json.dumps(dashboard_data), oauth_token) if oauth_token is None:
else: raise SendResultsFatalException(ERROR_NO_OAUTH_TOKEN)
_SendResultsJson(url, json.dumps(dashboard_data))
except SendResultsRetryException as e: _SendHistogramJson(url, json.dumps(dashboard_data), oauth_token)
# TODO(crbug.com/864565): retry sending to the perf dashboard with this else:
# error. _SendResultsJson(url, json.dumps(dashboard_data))
error = 'Error while uploading %s data: %s' % (data_type, str(e)) all_data_uploaded = True
fatal_error = True break
errors.append(error) except SendResultsRetryException as e:
except SendResultsFatalException as e: error = 'Error while uploading %s data: %s' % (data_type, str(e))
error = 'Error uploading %s data: %s' % (data_type, str(e)) errors.append(error)
errors.append(error) except SendResultsFatalException as e:
fatal_error = True error = 'Error uploading %s data: %s' % (data_type, str(e))
except Exception: errors.append(error)
error = 'Unexpected error while uploading %s data: %s' % ( break
data_type, traceback.format_exc()) except Exception:
errors.append(error) error = 'Unexpected error while uploading %s data: %s' % (
fatal_error = True data_type, traceback.format_exc())
errors.append(error)
return fatal_error, errors break
return errors, all_data_uploaded
def MakeHistogramSetWithDiagnostics(histograms_file, def MakeHistogramSetWithDiagnostics(histograms_file,
......
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
import mock
from core import results_dashboard
class ResultsDashboardTest(unittest.TestCase):
def setUp(self):
self.fake_oath = 'fake-oath'
self.perf_data = {'foo': 1, 'bar': 2}
self.dashboard_url = 'https://chromeperf.appspot.com'
def testRetryForSendResultRetryException(self):
def raise_retry_exception(url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
raise results_dashboard.SendResultsRetryException('Should retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertFalse(upload_result)
self.assertEqual(m.call_count, 5)
def testNoRetryForSendResultFatalException(self):
def raise_retry_exception(url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
raise results_dashboard.SendResultsFatalException('Do not retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertFalse(upload_result)
self.assertEqual(m.call_count, 1)
def testNoRetryForSuccessfulSendResult(self):
with mock.patch('core.results_dashboard._SendHistogramJson') as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertTrue(upload_result)
self.assertEqual(m.call_count, 1)
def testNoRetryAfterSucessfulSendResult(self):
counter = [0]
def raise_retry_exception_first_two_times(
url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
counter[0] += 1
if counter[0] <= 2:
raise results_dashboard.SendResultsRetryException('Please retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception_first_two_times) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertTrue(upload_result)
self.assertEqual(m.call_count, 3)
...@@ -205,10 +205,11 @@ def _handle_benchmarks_shard_map(benchmarks_shard_map_file, extra_links): ...@@ -205,10 +205,11 @@ def _handle_benchmarks_shard_map(benchmarks_shard_map_file, extra_links):
def _get_benchmark_name(directory): def _get_benchmark_name(directory):
return basename(directory).replace(" benchmark", "") return basename(directory).replace(" benchmark", "")
def process_perf_results(output_json, configuration_name, def process_perf_results(output_json, configuration_name,
service_account_file, service_account_file,
build_properties, task_output_dir, build_properties, task_output_dir,
smoke_test_mode): smoke_test_mode):
"""Process perf results. """Process perf results.
Consists of merging the json-test-format output, uploading the perf test Consists of merging the json-test-format output, uploading the perf test
...@@ -352,7 +353,8 @@ def _upload_individual( ...@@ -352,7 +353,8 @@ def _upload_individual(
# We generate an oauth token for every benchmark upload in the event # We generate an oauth token for every benchmark upload in the event
# the token could time out, see crbug.com/854162 # the token could time out, see crbug.com/854162
with oauth_api.with_access_token( with oauth_api.with_access_token(
service_account_file, ("%s_tok" % benchmark_name)) as oauth_file: service_account_file, ('%s_tok' % benchmark_name),
token_expiration_in_minutes=30) as oauth_file:
with open(output_json_file, 'w') as oj: with open(output_json_file, 'w') as oj:
upload_fail = _upload_perf_results( upload_fail = _upload_perf_results(
results_filename, results_filename,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment