Commit 881f1317 authored by Ned Nguyen's avatar Ned Nguyen Committed by Commit Bot

Add retry logic to results_dashboard uploading

In this first attempt, we retry upload the whole perf dashboard data (JSONnizable)
for any recoverable error.

Subsequent CL may improve this further by splitting up the perf dashboard data
to small parts and only try to reupload the parts which the upload failed.

This CL also increase the expiration of the oath token generated for each perf benchmark's data to 30 minutes (used to be 1 minutes by default)

Bug: 864565
Cq-Include-Trybots: master.tryserver.chromium.perf:obbs_fyi
Change-Id: If4cb959cc62c5da3baa4a36a5ae91c10ca6d4d02
Reviewed-on: https://chromium-review.googlesource.com/1148228
Commit-Queue: Ned Nguyen <nednguyen@google.com>
Reviewed-by: default avatarSimon Hatch <simonhatch@chromium.org>
Cr-Commit-Position: refs/heads/master@{#577682}
parent e28b1035
......@@ -13,17 +13,21 @@ import tempfile
@contextlib.contextmanager
def with_access_token(service_account_json, append):
def with_access_token(
service_account_json, prefix, token_expiration_in_minutes):
"""Yields an access token for the service account.
Args:
service_account_json: The path to the service account JSON file.
prefix: the prefix of the token file
token_expiration_in_minutes: the integer expiration time of the token
"""
fd, path = tempfile.mkstemp(suffix='.json', prefix=append)
fd, path = tempfile.mkstemp(suffix='.json', prefix=prefix)
try:
args = ['luci-auth', 'token']
if service_account_json:
args += ['-service-account-json', service_account_json]
args += ['-lifetime', '%im' % token_expiration_in_minutes]
subprocess.check_call(args, stdout=fd)
os.close(fd)
fd = None
......
......@@ -52,7 +52,8 @@ class SendResultsFatalException(SendResultException):
pass
def SendResults(data, url, send_as_histograms=False, oauth_token=None):
def SendResults(data, url, send_as_histograms=False, oauth_token=None,
num_retries=3):
"""Sends results to the Chrome Performance Dashboard.
This function tries to send the given data to the dashboard.
......@@ -61,72 +62,80 @@ def SendResults(data, url, send_as_histograms=False, oauth_token=None):
data: The data to try to send. Must be JSON-serializable.
url: Performance Dashboard URL (including schema).
send_as_histograms: True if result is to be sent to /add_histograms.
oauth_token: string; used for flushing oauth uploads from cache.
oauth_token: string; used for flushing oauth uploads from cache. Note that
client is responsible for making sure that the oauth_token doesn't expire
when using this API.
num_retries: Number of times to retry uploading to the perf dashboard upon
recoverable error.
"""
start = time.time()
# Send all the results from this run and the previous cache to the
# dashboard.
fatal_error, errors = _SendResultsToDashboard(
data, url, oauth_token, is_histogramset=send_as_histograms)
errors, all_data_uploaded = _SendResultsToDashboard(
data, url, oauth_token, is_histogramset=send_as_histograms,
num_retries=num_retries)
print 'Time spent sending results to %s: %s' % (url, time.time() - start)
# Print any errors; if there was a fatal error, it should be an exception.
for error in errors:
print error
if fatal_error:
return False
return True
for err in errors:
print err
return all_data_uploaded
def _SendResultsToDashboard(dashboard_data, url, oauth_token, is_histogramset):
def _SendResultsToDashboard(
dashboard_data, url, oauth_token, is_histogramset, num_retries):
"""Tries to send perf dashboard data to |url|.
Args:
perf_results_file_path: A file name.
url: The instance URL to which to post results.
oauth_token: An oauth token to use for histogram uploads. Might be None.
num_retries: Number of time to retry uploading to the perf dashboard upon
recoverable error.
Returns:
A pair (fatal_error, errors), where fatal_error is a boolean indicating
whether there there was a major error and the step should fail, and errors
is a list of error strings.
A tuple (errors, all_data_uploaded), whereas:
errors is a list of error strings.
all_data_uploaded is a boolean indicating whether all perf data was
succesfully uploaded.
"""
fatal_error = False
errors = []
all_data_uploaded = False
data_type = ('histogram' if is_histogramset else 'chartjson')
print 'Sending %s result to dashboard.' % data_type
try:
if is_histogramset:
# TODO(eakuefner): Remove this discard logic once all bots use
# histograms.
if oauth_token is None:
raise SendResultsFatalException(ERROR_NO_OAUTH_TOKEN)
_SendHistogramJson(url, json.dumps(dashboard_data), oauth_token)
else:
_SendResultsJson(url, json.dumps(dashboard_data))
except SendResultsRetryException as e:
# TODO(crbug.com/864565): retry sending to the perf dashboard with this
# error.
error = 'Error while uploading %s data: %s' % (data_type, str(e))
fatal_error = True
errors.append(error)
except SendResultsFatalException as e:
error = 'Error uploading %s data: %s' % (data_type, str(e))
errors.append(error)
fatal_error = True
except Exception:
error = 'Unexpected error while uploading %s data: %s' % (
data_type, traceback.format_exc())
errors.append(error)
fatal_error = True
return fatal_error, errors
for i in xrange(1, num_retries + 1):
try:
print 'Sending %s result to dashboard (attempt %i out of %i).' % (
data_type, i, num_retries)
if is_histogramset:
# TODO(eakuefner): Remove this discard logic once all bots use
# histograms.
if oauth_token is None:
raise SendResultsFatalException(ERROR_NO_OAUTH_TOKEN)
_SendHistogramJson(url, json.dumps(dashboard_data), oauth_token)
else:
_SendResultsJson(url, json.dumps(dashboard_data))
all_data_uploaded = True
break
except SendResultsRetryException as e:
error = 'Error while uploading %s data: %s' % (data_type, str(e))
errors.append(error)
except SendResultsFatalException as e:
error = 'Error uploading %s data: %s' % (data_type, str(e))
errors.append(error)
break
except Exception:
error = 'Unexpected error while uploading %s data: %s' % (
data_type, traceback.format_exc())
errors.append(error)
break
return errors, all_data_uploaded
def MakeHistogramSetWithDiagnostics(histograms_file,
......
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
import mock
from core import results_dashboard
class ResultsDashboardTest(unittest.TestCase):
def setUp(self):
self.fake_oath = 'fake-oath'
self.perf_data = {'foo': 1, 'bar': 2}
self.dashboard_url = 'https://chromeperf.appspot.com'
def testRetryForSendResultRetryException(self):
def raise_retry_exception(url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
raise results_dashboard.SendResultsRetryException('Should retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertFalse(upload_result)
self.assertEqual(m.call_count, 5)
def testNoRetryForSendResultFatalException(self):
def raise_retry_exception(url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
raise results_dashboard.SendResultsFatalException('Do not retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertFalse(upload_result)
self.assertEqual(m.call_count, 1)
def testNoRetryForSuccessfulSendResult(self):
with mock.patch('core.results_dashboard._SendHistogramJson') as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertTrue(upload_result)
self.assertEqual(m.call_count, 1)
def testNoRetryAfterSucessfulSendResult(self):
counter = [0]
def raise_retry_exception_first_two_times(
url, histogramset_json, oauth_token):
del url, histogramset_json, oauth_token # unused
counter[0] += 1
if counter[0] <= 2:
raise results_dashboard.SendResultsRetryException('Please retry')
with mock.patch('core.results_dashboard._SendHistogramJson',
side_effect=raise_retry_exception_first_two_times) as m:
upload_result = results_dashboard.SendResults(
self.perf_data, self.dashboard_url, send_as_histograms=True,
oauth_token=self.fake_oath, num_retries=5)
self.assertTrue(upload_result)
self.assertEqual(m.call_count, 3)
......@@ -205,10 +205,11 @@ def _handle_benchmarks_shard_map(benchmarks_shard_map_file, extra_links):
def _get_benchmark_name(directory):
return basename(directory).replace(" benchmark", "")
def process_perf_results(output_json, configuration_name,
service_account_file,
build_properties, task_output_dir,
smoke_test_mode):
service_account_file,
build_properties, task_output_dir,
smoke_test_mode):
"""Process perf results.
Consists of merging the json-test-format output, uploading the perf test
......@@ -352,7 +353,8 @@ def _upload_individual(
# We generate an oauth token for every benchmark upload in the event
# the token could time out, see crbug.com/854162
with oauth_api.with_access_token(
service_account_file, ("%s_tok" % benchmark_name)) as oauth_file:
service_account_file, ('%s_tok' % benchmark_name),
token_expiration_in_minutes=30) as oauth_file:
with open(output_json_file, 'w') as oj:
upload_fail = _upload_perf_results(
results_filename,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment