Commit f7c3b53e authored by Juan Antonio Navarro Perez's avatar Juan Antonio Navarro Perez Committed by Commit Bot

[tools/perf] Add --max-values-per-test-case option

Bug: 1001038
Change-Id: I08747c6a733ac42bd564f985e3f9ad52478ce210
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879207Reviewed-by: default avatarCaleb Rouleau <crouleau@chromium.org>
Commit-Queue: Juan Antonio Navarro Pérez <perezju@chromium.org>
Cr-Commit-Position: refs/heads/master@{#709870}
parent 70c310e1
......@@ -15,9 +15,11 @@ import unittest
from chrome_telemetry_build import chromium_config
from core import results_processor
from core import testing
from telemetry import benchmark as benchmark_module
from telemetry import decorators
from telemetry.testing import options_for_unittests
from telemetry.testing import progress_reporter
from py_utils import discover
......@@ -31,15 +33,20 @@ from benchmarks import speedometer
from benchmarks import v8_browsing
MAX_NUM_VALUES = 50000
# We want to prevent benchmarks from accidentally trying to upload too much
# data to the chrome perf dashboard. So the smoke tests below cap the max
# number of values that each story tested would produce when running on the
# waterfall.
MAX_VALUES_PERT_TEST_CASE = 1000
def SmokeTestGenerator(benchmark, num_pages=1):
"""Generates a benchmark that includes first N pages from pageset.
def SmokeTestGenerator(benchmark_class, num_pages=1):
"""Generates a somke test for the first N pages from a benchmark.
Args:
benchmark: benchmark object to make smoke test.
num_pages: use the first N pages to run smoke test.
benchmark_class: a benchmark class to smoke test.
num_pages: only smoke test the first N pages, since smoke testing
everything would take too long to run.
"""
# NOTE TO SHERIFFS: DO NOT DISABLE THIS TEST.
#
......@@ -50,43 +57,30 @@ def SmokeTestGenerator(benchmark, num_pages=1):
@decorators.Disabled('chromeos') # crbug.com/351114
@decorators.Disabled('android') # crbug.com/641934
def BenchmarkSmokeTest(self):
class SinglePageBenchmark(benchmark): # pylint: disable=no-init
def CreateStorySet(self, options):
# pylint: disable=super-on-old-class
story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
# We want to prevent benchmarks from accidentally trying to upload too
# much data to the chrome perf dashboard. So this tests tries to
# estimate the amount of values that the benchmark _would_ create when
# running on the waterfall, and fails if too many values are produced.
# As we run a single story and not the whole benchmark, the number of
# max values allowed is scaled proportionally.
# TODO(crbug.com/981349): This logic is only really valid for legacy
# values, and does not take histograms into account. An alternative
# should be implemented when using the results processor.
type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
return story_set
# Some benchmarks are running multiple iterations
# which is not needed for a smoke test
if hasattr(SinglePageBenchmark, 'enable_smoke_test_mode'):
SinglePageBenchmark.enable_smoke_test_mode = True
if hasattr(benchmark_class, 'enable_smoke_test_mode'):
benchmark_class.enable_smoke_test_mode = True
with tempfile_ext.NamedTemporaryDirectory() as temp_dir:
# Set the benchmark's default arguments.
options = options_for_unittests.GetRunOptions(
options = testing.GetRunOptions(
output_dir=temp_dir,
benchmark_cls=SinglePageBenchmark,
# Only smoke test num_pages since smoke testing everything takes
# too long.
benchmark_cls=benchmark_class,
overrides={'story_shard_end_index': num_pages},
environment=chromium_config.GetDefaultChromiumConfig())
options.pageset_repeat = 1 # For smoke testing only run the page once.
single_page_benchmark = SinglePageBenchmark()
return_code = single_page_benchmark.Run(options)
if return_code == -1:
self.skipTest('The benchmark was not run.')
self.assertEqual(0, return_code, msg='Failed: %s' % benchmark)
options.output_formats = ['histograms']
options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE
return_code = benchmark_class().Run(options)
if return_code == -1:
self.skipTest('The benchmark was not run.')
self.assertEqual(
return_code, 0,
msg='Benchmark run failed: %s' % benchmark_class.Name())
return_code = results_processor.ProcessResults(options)
self.assertEqual(
return_code, 0,
msg='Result processing failed: %s' % benchmark_class.Name())
return BenchmarkSmokeTest
......
......@@ -15,10 +15,11 @@ import unittest
from chrome_telemetry_build import chromium_config
from core import perf_benchmark
from core import results_processor
from core import testing
from telemetry import decorators
from telemetry.internal.browser import browser_finder
from telemetry.testing import options_for_unittests
from telemetry.testing import progress_reporter
from py_utils import discover
......@@ -194,7 +195,11 @@ _DISABLED_TESTS = frozenset({
})
MAX_NUM_VALUES = 50000
# We want to prevent benchmarks from accidentally trying to upload too much
# data to the chrome perf dashboard. So the smoke tests below cap the max
# number of values that each story tested would produce when running on the
# waterfall.
MAX_VALUES_PERT_TEST_CASE = 1000
def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
......@@ -211,18 +216,6 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
def CreateStorySet(self, options):
# pylint: disable=super-on-old-class
story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
# We want to prevent benchmarks from accidentally trying to upload too
# much data to the chrome perf dashboard. So this tests tries to
# estimate the amount of values that the benchmark _would_ create when
# running on the waterfall, and fails if too many values are produced.
# As we run a single story and not the whole benchmark, the number of
# max values allowed is scaled proportionally.
# TODO(crbug.com/981349): This logic is only really valid for legacy
# values, and does not take histograms into account. An alternative
# should be implemented when using the results processor.
type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
stories_to_remove = [s for s in story_set.stories if s !=
story_to_smoke_test]
for s in stories_to_remove:
......@@ -251,9 +244,15 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
self.skipTest('Test is explicitly disabled')
single_page_benchmark = SinglePageBenchmark()
return_code = single_page_benchmark.Run(options)
if return_code == -1:
self.skipTest('The benchmark was not run.')
self.assertEqual(0, return_code, msg='Failed: %s' % benchmark_class)
if return_code == -1:
self.skipTest('The benchmark was not run.')
self.assertEqual(
return_code, 0,
msg='Benchmark run failed: %s' % benchmark_class.Name())
return_code = results_processor.ProcessResults(options)
self.assertEqual(
return_code, 0,
msg='Result processing failed: %s' % benchmark_class.Name())
# We attach the test method to SystemHealthBenchmarkSmokeTest dynamically
# so that we can set the test method name to include
......@@ -270,10 +269,12 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
def GenerateBenchmarkOptions(output_dir, benchmark_cls):
options = options_for_unittests.GetRunOptions(
options = testing.GetRunOptions(
output_dir=output_dir, benchmark_cls=benchmark_cls,
environment=chromium_config.GetDefaultChromiumConfig())
options.pageset_repeat = 1 # For smoke testing only run each page once.
options.output_formats = ['histograms']
options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE
# Enable browser logging in the smoke test only. Hopefully, this will detect
# all crashes and hence remove the need to enable logging in actual perf
......@@ -292,7 +293,7 @@ def load_tests(loader, standard_tests, pattern):
names_stories_to_smoke_tests = []
for benchmark_class in benchmark_classes:
# HACK: these options should be derived from options_for_unittests which are
# HACK: these options should be derived from GetRunOptions which are
# the resolved options from run_tests' arguments. However, options is only
# parsed during test time which happens after load_tests are called.
# Since none of our system health benchmarks creates stories based on
......
......@@ -10,7 +10,6 @@ import unittest
import mock
from telemetry.testing import options_for_unittests
from telemetry.testing import test_stories
from telemetry.web_perf import timeline_based_measurement
from tracing.value.diagnostics import all_diagnostics
......@@ -20,6 +19,7 @@ from tracing.value import histogram_set
from core import benchmark_runner
from core import perf_benchmark
from core import results_processor
from core import testing
def _FakeParseArgs(environment, args, results_arg_parser):
......@@ -53,7 +53,7 @@ class BenchmarkRunnerIntegrationTest(unittest.TestCase):
"""
def setUp(self):
self.options = options_for_unittests.GetRunOptions(
self.options = testing.GetRunOptions(
output_dir=tempfile.mkdtemp())
self.options.output_formats = ['histograms']
......
......@@ -53,6 +53,11 @@ def ArgumentParser(standalone=False, legacy_formats=None):
help=Sentences(
'Path to a directory where to write final results.',
'Default: %(default)s.'))
group.add_argument(
'--max-values-per-test-case', type=int, metavar='NUM',
help=Sentences(
'Fail a test run if it produces more than this number of values.'
'This includes both ad hoc and metric generated measurements.'))
group.add_argument(
'--reset-results', action='store_true',
help=Sentences(
......
......@@ -65,6 +65,7 @@ def ProcessResults(options):
upload_bucket = options.upload_bucket
results_label = options.results_label
max_num_values = options.max_values_per_test_case
test_suite_start = (test_results[0]['startTime'] if test_results
else datetime.datetime.utcnow().isoformat() + 'Z')
run_identifier = RunIdentifier(results_label, test_suite_start)
......@@ -74,7 +75,7 @@ def ProcessResults(options):
util.ApplyInParallel(
lambda result: ProcessTestResult(
result, upload_bucket, results_label, run_identifier,
test_suite_start, should_compute_metrics),
test_suite_start, should_compute_metrics, max_num_values),
test_results,
on_failure=lambda result: result.update(status='FAIL'),
)
......@@ -95,7 +96,8 @@ def ProcessResults(options):
def ProcessTestResult(test_result, upload_bucket, results_label,
run_identifier, test_suite_start, should_compute_metrics):
run_identifier, test_suite_start, should_compute_metrics,
max_num_values):
AggregateTraces(test_result)
if upload_bucket is not None:
UploadArtifacts(test_result, upload_bucket, run_identifier)
......@@ -104,13 +106,21 @@ def ProcessTestResult(test_result, upload_bucket, results_label,
test_result['_histograms'] = histogram_set.HistogramSet()
compute_metrics.ComputeTBMv2Metrics(test_result)
ExtractMeasurements(test_result)
AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)
num_values = len(test_result['_histograms'])
if max_num_values is not None and num_values > max_num_values:
logging.error('%s produced %d values, but only %d are allowed.',
test_result['testPath'], num_values, max_num_values)
test_result['status'] = 'FAIL'
del test_result['_histograms']
else:
AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)
def ExtractHistograms(test_results):
histograms = histogram_set.HistogramSet()
for result in test_results:
histograms.Merge(result['_histograms'])
if '_histograms' in result:
histograms.Merge(result['_histograms'])
histograms.DeduplicateDiagnostics()
return histograms.AsDicts()
......
......@@ -63,6 +63,12 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
json.dump({'diagnostics': diagnostics}, f)
return testing.Artifact(diag_file)
def CreateMeasurementsArtifact(self, measurements):
with tempfile.NamedTemporaryFile(
dir=self.intermediate_dir, delete=False) as artifact_file:
json.dump({'measurements': measurements}, artifact_file)
return testing.Artifact(artifact_file.name)
def testJson3Output(self):
self.SerializeIntermediateResults(
testing.TestResult(
......@@ -127,6 +133,41 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
self.assertEqual(artifacts['logs'], ['gs://logs.txt'])
self.assertEqual(artifacts['trace.html'], ['gs://trace.html'])
def testMaxValuesPerTestCase(self):
def SomeMeasurements(num):
return (
processor.MEASUREMENTS_NAME,
self.CreateMeasurementsArtifact({
'n%d' % i: {'unit': 'count', 'samples': [i]}
for i in range(num)
})
)
self.SerializeIntermediateResults(
testing.TestResult(
'benchmark/story1', status='PASS',
output_artifacts=dict([SomeMeasurements(3)])),
testing.TestResult(
'benchmark/story2', status='PASS',
output_artifacts=dict([SomeMeasurements(7)])),
)
exit_code = processor.main([
'--output-format', 'json-test-results',
'--output-format', 'histograms',
'--output-dir', self.output_dir,
'--intermediate-dir', self.intermediate_dir,
'--max-values-per-test-case', '5'
])
self.assertEqual(exit_code, 1)
with open(os.path.join(
self.output_dir, json3_output.OUTPUT_FILENAME)) as f:
results = json.load(f)
self.assertEqual(results['tests']['benchmark']['story1']['actual'], 'PASS')
self.assertEqual(results['tests']['benchmark']['story2']['actual'], 'FAIL')
def testHistogramsOutput(self):
self.SerializeIntermediateResults(
testing.TestResult(
......
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from core.results_processor import command_line
from telemetry.testing import options_for_unittests
def GetRunOptions(*args, **kwargs):
"""Augment telemetry options for tests with results_processor defaults."""
options = options_for_unittests.GetRunOptions(*args, **kwargs)
parser = command_line.ArgumentParser()
processor_options = parser.parse_args([])
for arg in vars(processor_options):
if not hasattr(options, arg):
setattr(options, arg, getattr(processor_options, arg))
return options
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment