Commit 6156ddee authored by Emily Hanley's avatar Emily Hanley Committed by Commit Bot

Implement smoke test mode for performance_test_suite

1) Updates the script that executes the tests on the swarming bot by
accepting a flag for the benchmark to shard map.  We now have at least
three use cases for this: main perf waterfall android and desktop,
smoke testing, testing on our trybot and potentially android go.

2) Updates the custom merge script to not upload perf results when
we are in smoke test mode.

Bug:840427
Change-Id: Idec8f45cc2d1b5a3cd336ef3d1698d3d86161ba3

NOTRY=true  # win_7_chromium_rel_ng flaky

Change-Id: Idec8f45cc2d1b5a3cd336ef3d1698d3d86161ba3
Reviewed-on: https://chromium-review.googlesource.com/1055614
Commit-Queue: Ned Nguyen <nednguyen@google.com>
Reviewed-by: default avatarNed Nguyen <nednguyen@google.com>
Cr-Commit-Position: refs/heads/master@{#558900}
parent 6bb53282
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
"-v", "-v",
"--browser=android-chromium", "--browser=android-chromium",
"--upload-results", "--upload-results",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_android_bot_map.json"
], ],
"isolate_name": "performance_test_suite", "isolate_name": "performance_test_suite",
"merge": { "merge": {
...@@ -496,7 +497,8 @@ ...@@ -496,7 +497,8 @@
"-v", "-v",
"--browser=release", "--browser=release",
"--upload-results", "--upload-results",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_desktop_bot_map.json"
], ],
"isolate_name": "performance_test_suite", "isolate_name": "performance_test_suite",
"merge": { "merge": {
...@@ -657,9 +659,9 @@ ...@@ -657,9 +659,9 @@
"-v", "-v",
"--browser=reference", "--browser=reference",
"--upload-results", "--upload-results",
"--testing=true",
"--xvfb", "--xvfb",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_bot_map.json"
], ],
"isolate_name": "telemetry_perf_tests_without_chrome", "isolate_name": "telemetry_perf_tests_without_chrome",
"merge": { "merge": {
...@@ -706,7 +708,8 @@ ...@@ -706,7 +708,8 @@
"-v", "-v",
"--browser=android-chromium", "--browser=android-chromium",
"--upload-results", "--upload-results",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_android_bot_map.json"
], ],
"isolate_name": "performance_test_suite", "isolate_name": "performance_test_suite",
"merge": { "merge": {
...@@ -754,7 +757,8 @@ ...@@ -754,7 +757,8 @@
"-v", "-v",
"--browser=android-webview", "--browser=android-webview",
"--upload-results", "--upload-results",
"--webview-embedder-apk=../../out/Release/apks/SystemWebViewShell.apk" "--webview-embedder-apk=../../out/Release/apks/SystemWebViewShell.apk",
"--test-shard-map-filename=benchmark_android_bot_map.json"
], ],
"isolate_name": "performance_webview_test_suite", "isolate_name": "performance_webview_test_suite",
"merge": { "merge": {
......
...@@ -46719,7 +46719,8 @@ ...@@ -46719,7 +46719,8 @@
"-v", "-v",
"--browser=release", "--browser=release",
"--upload-results", "--upload-results",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_desktop_bot_map.json"
], ],
"isolate_name": "performance_test_suite", "isolate_name": "performance_test_suite",
"merge": { "merge": {
...@@ -46854,7 +46855,8 @@ ...@@ -46854,7 +46855,8 @@
"-v", "-v",
"--browser=release", "--browser=release",
"--upload-results", "--upload-results",
"--run-ref-build" "--run-ref-build",
"--test-shard-map-filename=benchmark_desktop_bot_map.json"
], ],
"isolate_name": "performance_test_suite", "isolate_name": "performance_test_suite",
"merge": { "merge": {
...@@ -80,30 +80,10 @@ BENCHMARKS_TO_OUTPUT_HISTOGRAMS = [ ...@@ -80,30 +80,10 @@ BENCHMARKS_TO_OUTPUT_HISTOGRAMS = [
'system_health.webview_startup', 'system_health.webview_startup',
] ]
# We currently have two different sharding schemes for android def get_sharding_map_path(args):
# vs desktop. When we are running at capacity we will have 26 return os.path.join(
# desktop shards and 39 android.
CURRENT_DESKTOP_NUM_SHARDS = 26
CURRENT_ANDROID_NUM_SHARDS = 39
def get_sharding_map_path(total_shards, testing):
# Determine if we want to do a test run of the benchmarks or run the
# full suite.
if not testing:
# Note: <= for testing purposes until we have all shards running
if int(total_shards) <= CURRENT_DESKTOP_NUM_SHARDS:
return os.path.join(
os.path.dirname(__file__), '..', '..', 'tools', 'perf', 'core',
'benchmark_desktop_bot_map.json')
else:
return os.path.join(
os.path.dirname(__file__), '..', '..', 'tools', 'perf', 'core',
'benchmark_android_bot_map.json')
else:
return os.path.join(
os.path.dirname(__file__), '..', '..', 'tools', 'perf', 'core', os.path.dirname(__file__), '..', '..', 'tools', 'perf', 'core',
'benchmark_bot_map.json') args.test_shard_map_filename)
def write_results( def write_results(
perf_test_name, perf_results, json_test_results, isolated_out_dir, encoded): perf_test_name, perf_results, json_test_results, isolated_out_dir, encoded):
...@@ -129,7 +109,7 @@ def execute_benchmark(benchmark, isolated_out_dir, ...@@ -129,7 +109,7 @@ def execute_benchmark(benchmark, isolated_out_dir,
is_histograms = append_output_format(benchmark, args, rest_args) is_histograms = append_output_format(benchmark, args, rest_args)
# Insert benchmark name as first argument to run_benchmark call # Insert benchmark name as first argument to run_benchmark call
# which is the first argument in the rest_args. Also need to append # which is the first argument in the rest_args. Also need to append
# output format. # output format and smoke test mode.
per_benchmark_args = (rest_args[:1] + [benchmark] + rest_args[1:]) per_benchmark_args = (rest_args[:1] + [benchmark] + rest_args[1:])
benchmark_name = benchmark benchmark_name = benchmark
if is_reference: if is_reference:
...@@ -200,12 +180,13 @@ def main(): ...@@ -200,12 +180,13 @@ def main():
parser.add_argument('--xvfb', help='Start xvfb.', action='store_true') parser.add_argument('--xvfb', help='Start xvfb.', action='store_true')
parser.add_argument('--non-telemetry', parser.add_argument('--non-telemetry',
help='Type of perf test', type=bool, default=False) help='Type of perf test', type=bool, default=False)
parser.add_argument('--testing', help='Test run, execute subset of tests',
type=bool, default=False)
parser.add_argument('--benchmarks', parser.add_argument('--benchmarks',
help='Comma separated list of benchmark names' help='Comma separated list of benchmark names'
' to run in lieu of indexing into our benchmark bot maps', ' to run in lieu of indexing into our benchmark bot maps',
required=False) required=False)
# Some executions may have a different sharding scheme and/or set of tests.
# These files must live in src/tools/perf/core/
parser.add_argument('--test-shard-map-filename', type=str, required=False)
parser.add_argument('--output-format', action='append') parser.add_argument('--output-format', action='append')
parser.add_argument('--run-ref-build', parser.add_argument('--run-ref-build',
help='Run test on reference browser', action='store_true') help='Run test on reference browser', action='store_true')
...@@ -244,8 +225,7 @@ def main(): ...@@ -244,8 +225,7 @@ def main():
if not (total_shards or shard_index): if not (total_shards or shard_index):
raise Exception('Shard indicators must be present for perf tests') raise Exception('Shard indicators must be present for perf tests')
sharding_map_path = get_sharding_map_path( sharding_map_path = get_sharding_map_path(args)
total_shards, args.testing or False)
with open(sharding_map_path) as f: with open(sharding_map_path) as f:
sharding_map = json.load(f) sharding_map = json.load(f)
sharding = None sharding = None
......
...@@ -970,7 +970,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = { ...@@ -970,7 +970,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = {
'tests': [ 'tests': [
{ {
'isolate': 'performance_test_suite', 'isolate': 'performance_test_suite',
'extra_args': ['--run-ref-build'], 'extra_args': [
'--run-ref-build',
'--test-shard-map-filename=benchmark_desktop_bot_map.json',
],
'num_shards': 26 'num_shards': 26
}, },
{ {
...@@ -997,8 +1000,11 @@ NEW_PERF_RECIPE_FYI_TESTERS = { ...@@ -997,8 +1000,11 @@ NEW_PERF_RECIPE_FYI_TESTERS = {
'tests': [ 'tests': [
{ {
'isolate': 'telemetry_perf_tests_without_chrome', 'isolate': 'telemetry_perf_tests_without_chrome',
'extra_args': ['--xvfb', 'extra_args': [
'--run-ref-build'], '--xvfb',
'--run-ref-build',
'--test-shard-map-filename=benchmark_bot_map.json'
],
'num_shards': 3 'num_shards': 3
}, },
{ {
...@@ -1022,7 +1028,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = { ...@@ -1022,7 +1028,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = {
{ {
'name': 'performance_test_suite', 'name': 'performance_test_suite',
'isolate': 'performance_test_suite', 'isolate': 'performance_test_suite',
'extra_args': ['--run-ref-build'], 'extra_args': [
'--run-ref-build',
'--test-shard-map-filename=benchmark_android_bot_map.json',
],
'num_shards': 14 'num_shards': 14
} }
], ],
...@@ -1040,6 +1049,9 @@ NEW_PERF_RECIPE_FYI_TESTERS = { ...@@ -1040,6 +1049,9 @@ NEW_PERF_RECIPE_FYI_TESTERS = {
'tests': [ 'tests': [
{ {
'isolate': 'performance_webview_test_suite', 'isolate': 'performance_webview_test_suite',
'extra_args': [
'--test-shard-map-filename=benchmark_android_bot_map.json',
],
'num_shards': 7 'num_shards': 7
} }
], ],
...@@ -1057,7 +1069,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = { ...@@ -1057,7 +1069,10 @@ NEW_PERF_RECIPE_FYI_TESTERS = {
'tests': [ 'tests': [
{ {
'isolate': 'performance_test_suite', 'isolate': 'performance_test_suite',
'extra_args': ['--run-ref-build'], 'extra_args': [
'--run-ref-build',
'--test-shard-map-filename=benchmark_android_bot_map.json',
],
'num_shards': 7 'num_shards': 7
} }
], ],
...@@ -1081,7 +1096,10 @@ NEW_PERF_RECIPE_MIGRATED_TESTERS = { ...@@ -1081,7 +1096,10 @@ NEW_PERF_RECIPE_MIGRATED_TESTERS = {
'tests': [ 'tests': [
{ {
'isolate': 'performance_test_suite', 'isolate': 'performance_test_suite',
'extra_args': ['--run-ref-build'], 'extra_args': [
'--run-ref-build',
'--test-shard-map-filename=benchmark_desktop_bot_map.json',
],
}, },
{ {
'isolate': 'load_library_perf_tests', 'isolate': 'load_library_perf_tests',
...@@ -1111,7 +1129,10 @@ NEW_PERF_RECIPE_MIGRATED_TESTERS = { ...@@ -1111,7 +1129,10 @@ NEW_PERF_RECIPE_MIGRATED_TESTERS = {
# Add views_perftests, crbug.com/811766 # Add views_perftests, crbug.com/811766
{ {
'isolate': 'performance_test_suite', 'isolate': 'performance_test_suite',
'extra_args': ['--run-ref-build'], 'extra_args': [
'--run-ref-build',
'--test-shard-map-filename=benchmark_desktop_bot_map.json',
],
}, },
{ {
'isolate': 'load_library_perf_tests', 'isolate': 'load_library_perf_tests',
...@@ -1213,11 +1234,6 @@ def generate_telemetry_args(tester_config): ...@@ -1213,11 +1234,6 @@ def generate_telemetry_args(tester_config):
test_args.append( test_args.append(
'--webview-embedder-apk=../../out/Release/apks/SystemWebViewShell.apk') '--webview-embedder-apk=../../out/Release/apks/SystemWebViewShell.apk')
# Appending testing=true if we only want to run a subset of benchmarks
# for quicker testing
if tester_config.get('testing', False):
test_args.append('--testing=true')
return test_args return test_args
def generate_non_telemetry_args(): def generate_non_telemetry_args():
......
...@@ -103,9 +103,11 @@ def _merge_json_output(output_json, jsons_to_merge, perf_results_link, ...@@ -103,9 +103,11 @@ def _merge_json_output(output_json, jsons_to_merge, perf_results_link,
""" """
merged_results = results_merger.merge_test_results(jsons_to_merge) merged_results = results_merger.merge_test_results(jsons_to_merge)
merged_results['links'] = { # Only append the perf results link if present
perf_results_file_name: perf_results_link if perf_results_link:
} merged_results['links'] = {
perf_results_file_name: perf_results_link
}
with open(output_json, 'w') as f: with open(output_json, 'w') as f:
json.dump(merged_results, f) json.dump(merged_results, f)
...@@ -113,9 +115,47 @@ def _merge_json_output(output_json, jsons_to_merge, perf_results_link, ...@@ -113,9 +115,47 @@ def _merge_json_output(output_json, jsons_to_merge, perf_results_link,
return 0 return 0
def _handle_perf_json_test_results(
benchmark_directory_list, test_results_list):
benchmark_enabled_map = {}
for directory in benchmark_directory_list:
# Obtain the test name we are running
benchmark_name = _get_benchmark_name(directory)
is_ref = '.reference' in benchmark_name
enabled = True
with open(join(directory, 'test_results.json')) as json_data:
json_results = json.load(json_data)
if not json_results:
# Output is null meaning the test didn't produce any results.
# Want to output an error and continue loading the rest of the
# test results.
print 'No results produced for %s, skipping upload' % directory
continue
if json_results.get('version') == 3:
# Non-telemetry tests don't have written json results but
# if they are executing then they are enabled and will generate
# chartjson results.
if not bool(json_results.get('tests')):
enabled = False
if not is_ref:
# We don't need to upload reference build data to the
# flakiness dashboard since we don't monitor the ref build
test_results_list.append(json_results)
if not enabled:
# We don't upload disabled benchmarks or tests that are run
# as a smoke test
print 'Benchmark %s disabled' % benchmark_name
benchmark_enabled_map[benchmark_name] = enabled
return benchmark_enabled_map
def _get_benchmark_name(directory):
return basename(directory).replace(" benchmark", "")
def _process_perf_results(output_json, configuration_name, def _process_perf_results(output_json, configuration_name,
service_account_file, service_account_file,
build_properties, task_output_dir): build_properties, task_output_dir,
smoke_test_mode):
"""Process one or more perf JSON results. """Process one or more perf JSON results.
Consists of merging the json-test-format output and uploading the perf test Consists of merging the json-test-format output and uploading the perf test
...@@ -139,8 +179,6 @@ def _process_perf_results(output_json, configuration_name, ...@@ -139,8 +179,6 @@ def _process_perf_results(output_json, configuration_name,
for f in listdir(join(task_output_dir, directory)) for f in listdir(join(task_output_dir, directory))
] ]
# We need to keep track of disabled benchmarks so we don't try to
# upload the results.
test_results_list = [] test_results_list = []
tmpfile_dir = tempfile.mkdtemp('resultscache') tmpfile_dir = tempfile.mkdtemp('resultscache')
upload_failure = False upload_failure = False
...@@ -151,52 +189,38 @@ def _process_perf_results(output_json, configuration_name, ...@@ -151,52 +189,38 @@ def _process_perf_results(output_json, configuration_name,
configuration_name = build_properties['buildername'] configuration_name = build_properties['buildername']
try: try:
# First obtain the list of json test results to merge
# and determine the status of each benchmark
benchmark_enabled_map = _handle_perf_json_test_results(
benchmark_directory_list, test_results_list)
# Upload all eligible benchmarks to the perf dashboard
logdog_dict = {} logdog_dict = {}
with oauth_api.with_access_token(service_account_file) as oauth_file: logdog_stream = None
for directory in benchmark_directory_list: logdog_label = 'Results Dashboard'
# Obtain the test name we are running if not smoke_test_mode:
benchmark_name = basename(directory).replace(" benchmark", "") with oauth_api.with_access_token(service_account_file) as oauth_file:
is_ref = '.reference' in benchmark_name for directory in benchmark_directory_list:
disabled = False benchmark_name = _get_benchmark_name(directory)
with open(join(directory, 'test_results.json')) as json_data: if not benchmark_enabled_map[benchmark_name]:
json_results = json.load(json_data)
if not json_results:
# Output is null meaning the test didn't produce any results.
# Want to output an error and continue loading the rest of the
# test results.
print 'No results produced for %s, skipping upload' % directory
continue continue
if json_results.get('version') == 3: print 'Uploading perf results from %s benchmark' % benchmark_name
# Non-telemetry tests don't have written json results but upload_fail = _upload_and_write_perf_data_to_logfile(
# if they are executing then they are enabled and will generate benchmark_name, directory, configuration_name, build_properties,
# chartjson results. oauth_file, tmpfile_dir, logdog_dict,
if not bool(json_results.get('tests')): ('.reference' in benchmark_name))
disabled = True upload_failure = upload_failure or upload_fail
if not is_ref:
# We don't need to upload reference build data to the
# flakiness dashboard since we don't monitor the ref build
test_results_list.append(json_results)
if disabled:
# We don't upload disabled benchmarks
print 'Benchmark %s disabled' % benchmark_name
continue
print 'Uploading perf results from %s benchmark' % benchmark_name
upload_fail = _upload_and_write_perf_data_to_logfile(
benchmark_name, directory, configuration_name, build_properties,
oauth_file, tmpfile_dir, logdog_dict, is_ref)
upload_failure = upload_failure or upload_fail
logdog_label = 'Results Dashboard'
logdog_file_name = 'Results_Dashboard_' + str(uuid.uuid4()) logdog_file_name = 'Results_Dashboard_' + str(uuid.uuid4())
logdog_stream = logdog_helper.text(logdog_file_name,
json.dumps(logdog_dict, sort_keys=True,
indent=4, separators=(',', ':')))
if upload_failure: if upload_failure:
logdog_label += ' Upload Failure' logdog_label += ' Upload Failure'
_merge_json_output(output_json, test_results_list,
logdog_helper.text(logdog_file_name, # Finally, merge all test results json and write out to output location
json.dumps(logdog_dict, sort_keys=True, _merge_json_output(output_json, test_results_list,
indent=4, separators=(',', ':'))), logdog_stream, logdog_label)
logdog_label)
finally: finally:
shutil.rmtree(tmpfile_dir) shutil.rmtree(tmpfile_dir)
return upload_failure return upload_failure
...@@ -259,13 +283,21 @@ def main(): ...@@ -259,13 +283,21 @@ def main():
parser.add_argument('-o', '--output-json', required=True, parser.add_argument('-o', '--output-json', required=True,
help=argparse.SUPPRESS) help=argparse.SUPPRESS)
parser.add_argument('json_files', nargs='*', help=argparse.SUPPRESS) parser.add_argument('json_files', nargs='*', help=argparse.SUPPRESS)
parser.add_argument('--smoke-test-mode', required=False, default=False,
help='This test should be run in smoke test mode'
' meaning it does not upload to the perf dashboard')
args = parser.parse_args() args = parser.parse_args()
if not args.service_account_file and not args.smoke_test_mode:
raise Exception(
'Service account file must be specificed for dashboard upload')
return _process_perf_results( return _process_perf_results(
args.output_json, args.configuration_name, args.output_json, args.configuration_name,
args.service_account_file, args.service_account_file,
args.build_properties, args.task_output_dir) args.build_properties, args.task_output_dir,
args.smoke_test_mode)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment