Commit f7c3b53e authored by Juan Antonio Navarro Perez's avatar Juan Antonio Navarro Perez Committed by Commit Bot

[tools/perf] Add --max-values-per-test-case option

Bug: 1001038
Change-Id: I08747c6a733ac42bd564f985e3f9ad52478ce210
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879207Reviewed-by: default avatarCaleb Rouleau <crouleau@chromium.org>
Commit-Queue: Juan Antonio Navarro Pérez <perezju@chromium.org>
Cr-Commit-Position: refs/heads/master@{#709870}
parent 70c310e1
...@@ -15,9 +15,11 @@ import unittest ...@@ -15,9 +15,11 @@ import unittest
from chrome_telemetry_build import chromium_config from chrome_telemetry_build import chromium_config
from core import results_processor
from core import testing
from telemetry import benchmark as benchmark_module from telemetry import benchmark as benchmark_module
from telemetry import decorators from telemetry import decorators
from telemetry.testing import options_for_unittests
from telemetry.testing import progress_reporter from telemetry.testing import progress_reporter
from py_utils import discover from py_utils import discover
...@@ -31,15 +33,20 @@ from benchmarks import speedometer ...@@ -31,15 +33,20 @@ from benchmarks import speedometer
from benchmarks import v8_browsing from benchmarks import v8_browsing
MAX_NUM_VALUES = 50000 # We want to prevent benchmarks from accidentally trying to upload too much
# data to the chrome perf dashboard. So the smoke tests below cap the max
# number of values that each story tested would produce when running on the
# waterfall.
MAX_VALUES_PERT_TEST_CASE = 1000
def SmokeTestGenerator(benchmark, num_pages=1): def SmokeTestGenerator(benchmark_class, num_pages=1):
"""Generates a benchmark that includes first N pages from pageset. """Generates a somke test for the first N pages from a benchmark.
Args: Args:
benchmark: benchmark object to make smoke test. benchmark_class: a benchmark class to smoke test.
num_pages: use the first N pages to run smoke test. num_pages: only smoke test the first N pages, since smoke testing
everything would take too long to run.
""" """
# NOTE TO SHERIFFS: DO NOT DISABLE THIS TEST. # NOTE TO SHERIFFS: DO NOT DISABLE THIS TEST.
# #
...@@ -50,43 +57,30 @@ def SmokeTestGenerator(benchmark, num_pages=1): ...@@ -50,43 +57,30 @@ def SmokeTestGenerator(benchmark, num_pages=1):
@decorators.Disabled('chromeos') # crbug.com/351114 @decorators.Disabled('chromeos') # crbug.com/351114
@decorators.Disabled('android') # crbug.com/641934 @decorators.Disabled('android') # crbug.com/641934
def BenchmarkSmokeTest(self): def BenchmarkSmokeTest(self):
class SinglePageBenchmark(benchmark): # pylint: disable=no-init
def CreateStorySet(self, options):
# pylint: disable=super-on-old-class
story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
# We want to prevent benchmarks from accidentally trying to upload too
# much data to the chrome perf dashboard. So this tests tries to
# estimate the amount of values that the benchmark _would_ create when
# running on the waterfall, and fails if too many values are produced.
# As we run a single story and not the whole benchmark, the number of
# max values allowed is scaled proportionally.
# TODO(crbug.com/981349): This logic is only really valid for legacy
# values, and does not take histograms into account. An alternative
# should be implemented when using the results processor.
type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
return story_set
# Some benchmarks are running multiple iterations # Some benchmarks are running multiple iterations
# which is not needed for a smoke test # which is not needed for a smoke test
if hasattr(SinglePageBenchmark, 'enable_smoke_test_mode'): if hasattr(benchmark_class, 'enable_smoke_test_mode'):
SinglePageBenchmark.enable_smoke_test_mode = True benchmark_class.enable_smoke_test_mode = True
with tempfile_ext.NamedTemporaryDirectory() as temp_dir: with tempfile_ext.NamedTemporaryDirectory() as temp_dir:
# Set the benchmark's default arguments. options = testing.GetRunOptions(
options = options_for_unittests.GetRunOptions(
output_dir=temp_dir, output_dir=temp_dir,
benchmark_cls=SinglePageBenchmark, benchmark_cls=benchmark_class,
# Only smoke test num_pages since smoke testing everything takes
# too long.
overrides={'story_shard_end_index': num_pages}, overrides={'story_shard_end_index': num_pages},
environment=chromium_config.GetDefaultChromiumConfig()) environment=chromium_config.GetDefaultChromiumConfig())
options.pageset_repeat = 1 # For smoke testing only run the page once. options.pageset_repeat = 1 # For smoke testing only run the page once.
single_page_benchmark = SinglePageBenchmark() options.output_formats = ['histograms']
return_code = single_page_benchmark.Run(options) options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE
if return_code == -1: return_code = benchmark_class().Run(options)
self.skipTest('The benchmark was not run.') if return_code == -1:
self.assertEqual(0, return_code, msg='Failed: %s' % benchmark) self.skipTest('The benchmark was not run.')
self.assertEqual(
return_code, 0,
msg='Benchmark run failed: %s' % benchmark_class.Name())
return_code = results_processor.ProcessResults(options)
self.assertEqual(
return_code, 0,
msg='Result processing failed: %s' % benchmark_class.Name())
return BenchmarkSmokeTest return BenchmarkSmokeTest
......
...@@ -15,10 +15,11 @@ import unittest ...@@ -15,10 +15,11 @@ import unittest
from chrome_telemetry_build import chromium_config from chrome_telemetry_build import chromium_config
from core import perf_benchmark from core import perf_benchmark
from core import results_processor
from core import testing
from telemetry import decorators from telemetry import decorators
from telemetry.internal.browser import browser_finder from telemetry.internal.browser import browser_finder
from telemetry.testing import options_for_unittests
from telemetry.testing import progress_reporter from telemetry.testing import progress_reporter
from py_utils import discover from py_utils import discover
...@@ -194,7 +195,11 @@ _DISABLED_TESTS = frozenset({ ...@@ -194,7 +195,11 @@ _DISABLED_TESTS = frozenset({
}) })
MAX_NUM_VALUES = 50000 # We want to prevent benchmarks from accidentally trying to upload too much
# data to the chrome perf dashboard. So the smoke tests below cap the max
# number of values that each story tested would produce when running on the
# waterfall.
MAX_VALUES_PERT_TEST_CASE = 1000
def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test): def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
...@@ -211,18 +216,6 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test): ...@@ -211,18 +216,6 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
def CreateStorySet(self, options): def CreateStorySet(self, options):
# pylint: disable=super-on-old-class # pylint: disable=super-on-old-class
story_set = super(SinglePageBenchmark, self).CreateStorySet(options) story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
# We want to prevent benchmarks from accidentally trying to upload too
# much data to the chrome perf dashboard. So this tests tries to
# estimate the amount of values that the benchmark _would_ create when
# running on the waterfall, and fails if too many values are produced.
# As we run a single story and not the whole benchmark, the number of
# max values allowed is scaled proportionally.
# TODO(crbug.com/981349): This logic is only really valid for legacy
# values, and does not take histograms into account. An alternative
# should be implemented when using the results processor.
type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
stories_to_remove = [s for s in story_set.stories if s != stories_to_remove = [s for s in story_set.stories if s !=
story_to_smoke_test] story_to_smoke_test]
for s in stories_to_remove: for s in stories_to_remove:
...@@ -251,9 +244,15 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test): ...@@ -251,9 +244,15 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
self.skipTest('Test is explicitly disabled') self.skipTest('Test is explicitly disabled')
single_page_benchmark = SinglePageBenchmark() single_page_benchmark = SinglePageBenchmark()
return_code = single_page_benchmark.Run(options) return_code = single_page_benchmark.Run(options)
if return_code == -1: if return_code == -1:
self.skipTest('The benchmark was not run.') self.skipTest('The benchmark was not run.')
self.assertEqual(0, return_code, msg='Failed: %s' % benchmark_class) self.assertEqual(
return_code, 0,
msg='Benchmark run failed: %s' % benchmark_class.Name())
return_code = results_processor.ProcessResults(options)
self.assertEqual(
return_code, 0,
msg='Result processing failed: %s' % benchmark_class.Name())
# We attach the test method to SystemHealthBenchmarkSmokeTest dynamically # We attach the test method to SystemHealthBenchmarkSmokeTest dynamically
# so that we can set the test method name to include # so that we can set the test method name to include
...@@ -270,10 +269,12 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test): ...@@ -270,10 +269,12 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
def GenerateBenchmarkOptions(output_dir, benchmark_cls): def GenerateBenchmarkOptions(output_dir, benchmark_cls):
options = options_for_unittests.GetRunOptions( options = testing.GetRunOptions(
output_dir=output_dir, benchmark_cls=benchmark_cls, output_dir=output_dir, benchmark_cls=benchmark_cls,
environment=chromium_config.GetDefaultChromiumConfig()) environment=chromium_config.GetDefaultChromiumConfig())
options.pageset_repeat = 1 # For smoke testing only run each page once. options.pageset_repeat = 1 # For smoke testing only run each page once.
options.output_formats = ['histograms']
options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE
# Enable browser logging in the smoke test only. Hopefully, this will detect # Enable browser logging in the smoke test only. Hopefully, this will detect
# all crashes and hence remove the need to enable logging in actual perf # all crashes and hence remove the need to enable logging in actual perf
...@@ -292,7 +293,7 @@ def load_tests(loader, standard_tests, pattern): ...@@ -292,7 +293,7 @@ def load_tests(loader, standard_tests, pattern):
names_stories_to_smoke_tests = [] names_stories_to_smoke_tests = []
for benchmark_class in benchmark_classes: for benchmark_class in benchmark_classes:
# HACK: these options should be derived from options_for_unittests which are # HACK: these options should be derived from GetRunOptions which are
# the resolved options from run_tests' arguments. However, options is only # the resolved options from run_tests' arguments. However, options is only
# parsed during test time which happens after load_tests are called. # parsed during test time which happens after load_tests are called.
# Since none of our system health benchmarks creates stories based on # Since none of our system health benchmarks creates stories based on
......
...@@ -10,7 +10,6 @@ import unittest ...@@ -10,7 +10,6 @@ import unittest
import mock import mock
from telemetry.testing import options_for_unittests
from telemetry.testing import test_stories from telemetry.testing import test_stories
from telemetry.web_perf import timeline_based_measurement from telemetry.web_perf import timeline_based_measurement
from tracing.value.diagnostics import all_diagnostics from tracing.value.diagnostics import all_diagnostics
...@@ -20,6 +19,7 @@ from tracing.value import histogram_set ...@@ -20,6 +19,7 @@ from tracing.value import histogram_set
from core import benchmark_runner from core import benchmark_runner
from core import perf_benchmark from core import perf_benchmark
from core import results_processor from core import results_processor
from core import testing
def _FakeParseArgs(environment, args, results_arg_parser): def _FakeParseArgs(environment, args, results_arg_parser):
...@@ -53,7 +53,7 @@ class BenchmarkRunnerIntegrationTest(unittest.TestCase): ...@@ -53,7 +53,7 @@ class BenchmarkRunnerIntegrationTest(unittest.TestCase):
""" """
def setUp(self): def setUp(self):
self.options = options_for_unittests.GetRunOptions( self.options = testing.GetRunOptions(
output_dir=tempfile.mkdtemp()) output_dir=tempfile.mkdtemp())
self.options.output_formats = ['histograms'] self.options.output_formats = ['histograms']
......
...@@ -53,6 +53,11 @@ def ArgumentParser(standalone=False, legacy_formats=None): ...@@ -53,6 +53,11 @@ def ArgumentParser(standalone=False, legacy_formats=None):
help=Sentences( help=Sentences(
'Path to a directory where to write final results.', 'Path to a directory where to write final results.',
'Default: %(default)s.')) 'Default: %(default)s.'))
group.add_argument(
'--max-values-per-test-case', type=int, metavar='NUM',
help=Sentences(
'Fail a test run if it produces more than this number of values.'
'This includes both ad hoc and metric generated measurements.'))
group.add_argument( group.add_argument(
'--reset-results', action='store_true', '--reset-results', action='store_true',
help=Sentences( help=Sentences(
......
...@@ -65,6 +65,7 @@ def ProcessResults(options): ...@@ -65,6 +65,7 @@ def ProcessResults(options):
upload_bucket = options.upload_bucket upload_bucket = options.upload_bucket
results_label = options.results_label results_label = options.results_label
max_num_values = options.max_values_per_test_case
test_suite_start = (test_results[0]['startTime'] if test_results test_suite_start = (test_results[0]['startTime'] if test_results
else datetime.datetime.utcnow().isoformat() + 'Z') else datetime.datetime.utcnow().isoformat() + 'Z')
run_identifier = RunIdentifier(results_label, test_suite_start) run_identifier = RunIdentifier(results_label, test_suite_start)
...@@ -74,7 +75,7 @@ def ProcessResults(options): ...@@ -74,7 +75,7 @@ def ProcessResults(options):
util.ApplyInParallel( util.ApplyInParallel(
lambda result: ProcessTestResult( lambda result: ProcessTestResult(
result, upload_bucket, results_label, run_identifier, result, upload_bucket, results_label, run_identifier,
test_suite_start, should_compute_metrics), test_suite_start, should_compute_metrics, max_num_values),
test_results, test_results,
on_failure=lambda result: result.update(status='FAIL'), on_failure=lambda result: result.update(status='FAIL'),
) )
...@@ -95,7 +96,8 @@ def ProcessResults(options): ...@@ -95,7 +96,8 @@ def ProcessResults(options):
def ProcessTestResult(test_result, upload_bucket, results_label, def ProcessTestResult(test_result, upload_bucket, results_label,
run_identifier, test_suite_start, should_compute_metrics): run_identifier, test_suite_start, should_compute_metrics,
max_num_values):
AggregateTraces(test_result) AggregateTraces(test_result)
if upload_bucket is not None: if upload_bucket is not None:
UploadArtifacts(test_result, upload_bucket, run_identifier) UploadArtifacts(test_result, upload_bucket, run_identifier)
...@@ -104,13 +106,21 @@ def ProcessTestResult(test_result, upload_bucket, results_label, ...@@ -104,13 +106,21 @@ def ProcessTestResult(test_result, upload_bucket, results_label,
test_result['_histograms'] = histogram_set.HistogramSet() test_result['_histograms'] = histogram_set.HistogramSet()
compute_metrics.ComputeTBMv2Metrics(test_result) compute_metrics.ComputeTBMv2Metrics(test_result)
ExtractMeasurements(test_result) ExtractMeasurements(test_result)
AddDiagnosticsToHistograms(test_result, test_suite_start, results_label) num_values = len(test_result['_histograms'])
if max_num_values is not None and num_values > max_num_values:
logging.error('%s produced %d values, but only %d are allowed.',
test_result['testPath'], num_values, max_num_values)
test_result['status'] = 'FAIL'
del test_result['_histograms']
else:
AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)
def ExtractHistograms(test_results): def ExtractHistograms(test_results):
histograms = histogram_set.HistogramSet() histograms = histogram_set.HistogramSet()
for result in test_results: for result in test_results:
histograms.Merge(result['_histograms']) if '_histograms' in result:
histograms.Merge(result['_histograms'])
histograms.DeduplicateDiagnostics() histograms.DeduplicateDiagnostics()
return histograms.AsDicts() return histograms.AsDicts()
......
...@@ -63,6 +63,12 @@ class ResultsProcessorIntegrationTests(unittest.TestCase): ...@@ -63,6 +63,12 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
json.dump({'diagnostics': diagnostics}, f) json.dump({'diagnostics': diagnostics}, f)
return testing.Artifact(diag_file) return testing.Artifact(diag_file)
def CreateMeasurementsArtifact(self, measurements):
with tempfile.NamedTemporaryFile(
dir=self.intermediate_dir, delete=False) as artifact_file:
json.dump({'measurements': measurements}, artifact_file)
return testing.Artifact(artifact_file.name)
def testJson3Output(self): def testJson3Output(self):
self.SerializeIntermediateResults( self.SerializeIntermediateResults(
testing.TestResult( testing.TestResult(
...@@ -127,6 +133,41 @@ class ResultsProcessorIntegrationTests(unittest.TestCase): ...@@ -127,6 +133,41 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
self.assertEqual(artifacts['logs'], ['gs://logs.txt']) self.assertEqual(artifacts['logs'], ['gs://logs.txt'])
self.assertEqual(artifacts['trace.html'], ['gs://trace.html']) self.assertEqual(artifacts['trace.html'], ['gs://trace.html'])
def testMaxValuesPerTestCase(self):
def SomeMeasurements(num):
return (
processor.MEASUREMENTS_NAME,
self.CreateMeasurementsArtifact({
'n%d' % i: {'unit': 'count', 'samples': [i]}
for i in range(num)
})
)
self.SerializeIntermediateResults(
testing.TestResult(
'benchmark/story1', status='PASS',
output_artifacts=dict([SomeMeasurements(3)])),
testing.TestResult(
'benchmark/story2', status='PASS',
output_artifacts=dict([SomeMeasurements(7)])),
)
exit_code = processor.main([
'--output-format', 'json-test-results',
'--output-format', 'histograms',
'--output-dir', self.output_dir,
'--intermediate-dir', self.intermediate_dir,
'--max-values-per-test-case', '5'
])
self.assertEqual(exit_code, 1)
with open(os.path.join(
self.output_dir, json3_output.OUTPUT_FILENAME)) as f:
results = json.load(f)
self.assertEqual(results['tests']['benchmark']['story1']['actual'], 'PASS')
self.assertEqual(results['tests']['benchmark']['story2']['actual'], 'FAIL')
def testHistogramsOutput(self): def testHistogramsOutput(self):
self.SerializeIntermediateResults( self.SerializeIntermediateResults(
testing.TestResult( testing.TestResult(
......
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from core.results_processor import command_line
from telemetry.testing import options_for_unittests
def GetRunOptions(*args, **kwargs):
"""Augment telemetry options for tests with results_processor defaults."""
options = options_for_unittests.GetRunOptions(*args, **kwargs)
parser = command_line.ArgumentParser()
processor_options = parser.parse_args([])
for arg in vars(processor_options):
if not hasattr(options, arg):
setattr(options, arg, getattr(processor_options, arg))
return options
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment