[tools/perf] Add --max-values-per-test-case option

Bug: 1001038 Change-Id: I08747c6a733ac42bd564f985e3f9ad52478ce210 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879207Reviewed-by: Caleb Rouleau <crouleau@chromium.org> Commit-Queue: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#709870}

[tools/perf] Add --max-values-per-test-case option
Bug: 1001038 Change-Id: I08747c6a733ac42bd564f985e3f9ad52478ce210 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1879207Reviewed-by: Caleb Rouleau <crouleau@chromium.org> Commit-Queue: Juan Antonio Navarro Pérez <perezju@chromium.org> Cr-Commit-Position: refs/heads/master@{#709870}
f7c3b53e · Juan Antonio Navarro Perez · Commit Bot · 70c310e1 · f7c3b53e · f7c3b53e
Commit f7c3b53e authored Oct 28, 2019 by Juan Antonio Navarro Perez Committed by Commit Bot Oct 28, 2019
7 changed files
--- a/tools/perf/benchmarks/benchmark_smoke_unittest.py
+++ b/tools/perf/benchmarks/benchmark_smoke_unittest.py
@@ -15,9 +15,11 @@ import unittest

 from chrome_telemetry_build import chromium_config

+from core import results_processor
+from core import testing
+
 from telemetry import benchmark as benchmark_module
 from telemetry import decorators
-from telemetry.testing import options_for_unittests
 from telemetry.testing import progress_reporter

 from py_utils import discover
@@ -31,15 +33,20 @@ from benchmarks import speedometer
 from benchmarks import v8_browsing


-MAX_NUM_VALUES = 50000
+# We want to prevent benchmarks from accidentally trying to upload too much
+# data to the chrome perf dashboard. So the smoke tests below cap the max
+# number of values that each story tested would produce when running on the
+# waterfall.
+MAX_VALUES_PERT_TEST_CASE = 1000


-def SmokeTestGenerator(benchmark, num_pages=1):
-  """Generates a benchmark that includes first N pages from pageset.
+def SmokeTestGenerator(benchmark_class, num_pages=1):
+  """Generates a somke test for the first N pages from a benchmark.

  Args:
-    benchmark: benchmark object to make smoke test.
-    num_pages: use the first N pages to run smoke test.
+    benchmark_class: a benchmark class to smoke test.
+    num_pages: only smoke test the first N pages, since smoke testing
+      everything would take too long to run.
  """
  # NOTE TO SHERIFFS: DO NOT DISABLE THIS TEST.
  #
@@ -50,43 +57,30 @@ def SmokeTestGenerator(benchmark, num_pages=1):
  @decorators.Disabled('chromeos')  # crbug.com/351114
  @decorators.Disabled('android')  # crbug.com/641934
  def BenchmarkSmokeTest(self):
-    class SinglePageBenchmark(benchmark):  # pylint: disable=no-init
-      def CreateStorySet(self, options):
-        # pylint: disable=super-on-old-class
-        story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
-
-        # We want to prevent benchmarks from accidentally trying to upload too
-        # much data to the chrome perf dashboard. So this tests tries to
-        # estimate the amount of values that the benchmark _would_ create when
-        # running on the waterfall, and fails if too many values are produced.
-        # As we run a single story and not the whole benchmark, the number of
-        # max values allowed is scaled proportionally.
-        # TODO(crbug.com/981349): This logic is only really valid for legacy
-        # values, and does not take histograms into account. An alternative
-        # should be implemented when using the results processor.
-        type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
-        return story_set
-
    # Some benchmarks are running multiple iterations
    # which is not needed for a smoke test
-    if hasattr(SinglePageBenchmark, 'enable_smoke_test_mode'):
-      SinglePageBenchmark.enable_smoke_test_mode = True
+    if hasattr(benchmark_class, 'enable_smoke_test_mode'):
+      benchmark_class.enable_smoke_test_mode = True

    with tempfile_ext.NamedTemporaryDirectory() as temp_dir:
-      # Set the benchmark's default arguments.
-      options = options_for_unittests.GetRunOptions(
+      options = testing.GetRunOptions(
          output_dir=temp_dir,
-          benchmark_cls=SinglePageBenchmark,
-          # Only smoke test num_pages since smoke testing everything takes
-          # too long.
+          benchmark_cls=benchmark_class,
          overrides={'story_shard_end_index': num_pages},
          environment=chromium_config.GetDefaultChromiumConfig())
      options.pageset_repeat = 1  # For smoke testing only run the page once.
-      single_page_benchmark = SinglePageBenchmark()
-      return_code = single_page_benchmark.Run(options)
-    if return_code == -1:
-      self.skipTest('The benchmark was not run.')
-    self.assertEqual(0, return_code, msg='Failed: %s' % benchmark)
+      options.output_formats = ['histograms']
+      options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE
+      return_code = benchmark_class().Run(options)
+      if return_code == -1:
+        self.skipTest('The benchmark was not run.')
+      self.assertEqual(
+          return_code, 0,
+          msg='Benchmark run failed: %s' % benchmark_class.Name())
+      return_code = results_processor.ProcessResults(options)
+      self.assertEqual(
+          return_code, 0,
+          msg='Result processing failed: %s' % benchmark_class.Name())

  return BenchmarkSmokeTest


--- a/tools/perf/benchmarks/system_health_smoke_test.py
+++ b/tools/perf/benchmarks/system_health_smoke_test.py
@@ -15,10 +15,11 @@ import unittest
 from chrome_telemetry_build import chromium_config

 from core import perf_benchmark
+from core import results_processor
+from core import testing

 from telemetry import decorators
 from telemetry.internal.browser import browser_finder
-from telemetry.testing import options_for_unittests
 from telemetry.testing import progress_reporter

 from py_utils import discover
@@ -194,7 +195,11 @@ _DISABLED_TESTS = frozenset({
 })


-MAX_NUM_VALUES = 50000
+# We want to prevent benchmarks from accidentally trying to upload too much
+# data to the chrome perf dashboard. So the smoke tests below cap the max
+# number of values that each story tested would produce when running on the
+# waterfall.
+MAX_VALUES_PERT_TEST_CASE = 1000


 def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
@@ -211,18 +216,6 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
      def CreateStorySet(self, options):
        # pylint: disable=super-on-old-class
        story_set = super(SinglePageBenchmark, self).CreateStorySet(options)
-
-        # We want to prevent benchmarks from accidentally trying to upload too
-        # much data to the chrome perf dashboard. So this tests tries to
-        # estimate the amount of values that the benchmark _would_ create when
-        # running on the waterfall, and fails if too many values are produced.
-        # As we run a single story and not the whole benchmark, the number of
-        # max values allowed is scaled proportionally.
-        # TODO(crbug.com/981349): This logic is only really valid for legacy
-        # values, and does not take histograms into account. An alternative
-        # should be implemented when using the results processor.
-        type(self).MAX_NUM_VALUES = MAX_NUM_VALUES / len(story_set)
-
        stories_to_remove = [s for s in story_set.stories if s !=
                             story_to_smoke_test]
        for s in stories_to_remove:
@@ -251,9 +244,15 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):
        self.skipTest('Test is explicitly disabled')
      single_page_benchmark = SinglePageBenchmark()
      return_code = single_page_benchmark.Run(options)
-    if return_code == -1:
-      self.skipTest('The benchmark was not run.')
-    self.assertEqual(0, return_code, msg='Failed: %s' % benchmark_class)
+      if return_code == -1:
+        self.skipTest('The benchmark was not run.')
+      self.assertEqual(
+          return_code, 0,
+          msg='Benchmark run failed: %s' % benchmark_class.Name())
+      return_code = results_processor.ProcessResults(options)
+      self.assertEqual(
+          return_code, 0,
+          msg='Result processing failed: %s' % benchmark_class.Name())

  # We attach the test method to SystemHealthBenchmarkSmokeTest dynamically
  # so that we can set the test method name to include
@@ -270,10 +269,12 @@ def _GenerateSmokeTestCase(benchmark_class, story_to_smoke_test):


 def GenerateBenchmarkOptions(output_dir, benchmark_cls):
-  options = options_for_unittests.GetRunOptions(
+  options = testing.GetRunOptions(
      output_dir=output_dir, benchmark_cls=benchmark_cls,
      environment=chromium_config.GetDefaultChromiumConfig())
  options.pageset_repeat = 1  # For smoke testing only run each page once.
+  options.output_formats = ['histograms']
+  options.max_values_per_test_case = MAX_VALUES_PERT_TEST_CASE

  # Enable browser logging in the smoke test only. Hopefully, this will detect
  # all crashes and hence remove the need to enable logging in actual perf
@@ -292,7 +293,7 @@ def load_tests(loader, standard_tests, pattern):
  names_stories_to_smoke_tests = []
  for benchmark_class in benchmark_classes:

-    # HACK: these options should be derived from options_for_unittests which are
+    # HACK: these options should be derived from GetRunOptions which are
    # the resolved options from run_tests' arguments. However, options is only
    # parsed during test time which happens after load_tests are called.
    # Since none of our system health benchmarks creates stories based on

--- a/tools/perf/core/benchmark_runner_test.py
+++ b/tools/perf/core/benchmark_runner_test.py
@@ -10,7 +10,6 @@ import unittest

 import mock

-from telemetry.testing import options_for_unittests
 from telemetry.testing import test_stories
 from telemetry.web_perf import timeline_based_measurement
 from tracing.value.diagnostics import all_diagnostics
@@ -20,6 +19,7 @@ from tracing.value import histogram_set
 from core import benchmark_runner
 from core import perf_benchmark
 from core import results_processor
+from core import testing


 def _FakeParseArgs(environment, args, results_arg_parser):
@@ -53,7 +53,7 @@ class BenchmarkRunnerIntegrationTest(unittest.TestCase):
  """

  def setUp(self):
-    self.options = options_for_unittests.GetRunOptions(
+    self.options = testing.GetRunOptions(
        output_dir=tempfile.mkdtemp())
    self.options.output_formats = ['histograms']


--- a/tools/perf/core/results_processor/command_line.py
+++ b/tools/perf/core/results_processor/command_line.py
@@ -53,6 +53,11 @@ def ArgumentParser(standalone=False, legacy_formats=None):
      help=Sentences(
          'Path to a directory where to write final results.',
          'Default: %(default)s.'))
+  group.add_argument(
+      '--max-values-per-test-case', type=int, metavar='NUM',
+      help=Sentences(
+          'Fail a test run if it produces more than this number of values.'
+          'This includes both ad hoc and metric generated measurements.'))
  group.add_argument(
      '--reset-results', action='store_true',
      help=Sentences(

--- a/tools/perf/core/results_processor/processor.py
+++ b/tools/perf/core/results_processor/processor.py
@@ -65,6 +65,7 @@ def ProcessResults(options):

  upload_bucket = options.upload_bucket
  results_label = options.results_label
+  max_num_values = options.max_values_per_test_case
  test_suite_start = (test_results[0]['startTime'] if test_results
                      else datetime.datetime.utcnow().isoformat() + 'Z')
  run_identifier = RunIdentifier(results_label, test_suite_start)
@@ -74,7 +75,7 @@ def ProcessResults(options):
  util.ApplyInParallel(
      lambda result: ProcessTestResult(
          result, upload_bucket, results_label, run_identifier,
-          test_suite_start, should_compute_metrics),
+          test_suite_start, should_compute_metrics, max_num_values),
      test_results,
      on_failure=lambda result: result.update(status='FAIL'),
  )
@@ -95,7 +96,8 @@ def ProcessResults(options):


 def ProcessTestResult(test_result, upload_bucket, results_label,
-                      run_identifier, test_suite_start, should_compute_metrics):
+                      run_identifier, test_suite_start, should_compute_metrics,
+                      max_num_values):
  AggregateTraces(test_result)
  if upload_bucket is not None:
    UploadArtifacts(test_result, upload_bucket, run_identifier)
@@ -104,13 +106,21 @@ def ProcessTestResult(test_result, upload_bucket, results_label,
    test_result['_histograms'] = histogram_set.HistogramSet()
    compute_metrics.ComputeTBMv2Metrics(test_result)
    ExtractMeasurements(test_result)
-    AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)
+    num_values = len(test_result['_histograms'])
+    if max_num_values is not None and num_values > max_num_values:
+      logging.error('%s produced %d values, but only %d are allowed.',
+                    test_result['testPath'], num_values, max_num_values)
+      test_result['status'] = 'FAIL'
+      del test_result['_histograms']
+    else:
+      AddDiagnosticsToHistograms(test_result, test_suite_start, results_label)


 def ExtractHistograms(test_results):
  histograms = histogram_set.HistogramSet()
  for result in test_results:
-    histograms.Merge(result['_histograms'])
+    if '_histograms' in result:
+      histograms.Merge(result['_histograms'])
  histograms.DeduplicateDiagnostics()
  return histograms.AsDicts()


--- a/tools/perf/core/results_processor/processor_test.py
+++ b/tools/perf/core/results_processor/processor_test.py
@@ -63,6 +63,12 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
      json.dump({'diagnostics': diagnostics}, f)
    return testing.Artifact(diag_file)

+  def CreateMeasurementsArtifact(self, measurements):
+    with tempfile.NamedTemporaryFile(
+        dir=self.intermediate_dir, delete=False) as artifact_file:
+      json.dump({'measurements': measurements}, artifact_file)
+    return testing.Artifact(artifact_file.name)
+
  def testJson3Output(self):
    self.SerializeIntermediateResults(
        testing.TestResult(
@@ -127,6 +133,41 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
    self.assertEqual(artifacts['logs'], ['gs://logs.txt'])
    self.assertEqual(artifacts['trace.html'], ['gs://trace.html'])

+  def testMaxValuesPerTestCase(self):
+    def SomeMeasurements(num):
+      return (
+          processor.MEASUREMENTS_NAME,
+          self.CreateMeasurementsArtifact({
+              'n%d' % i: {'unit': 'count', 'samples': [i]}
+              for i in range(num)
+          })
+      )
+
+    self.SerializeIntermediateResults(
+        testing.TestResult(
+            'benchmark/story1', status='PASS',
+            output_artifacts=dict([SomeMeasurements(3)])),
+        testing.TestResult(
+            'benchmark/story2', status='PASS',
+            output_artifacts=dict([SomeMeasurements(7)])),
+    )
+
+    exit_code = processor.main([
+        '--output-format', 'json-test-results',
+        '--output-format', 'histograms',
+        '--output-dir', self.output_dir,
+        '--intermediate-dir', self.intermediate_dir,
+        '--max-values-per-test-case', '5'
+    ])
+    self.assertEqual(exit_code, 1)
+
+    with open(os.path.join(
+        self.output_dir, json3_output.OUTPUT_FILENAME)) as f:
+      results = json.load(f)
+
+    self.assertEqual(results['tests']['benchmark']['story1']['actual'], 'PASS')
+    self.assertEqual(results['tests']['benchmark']['story2']['actual'], 'FAIL')
+
  def testHistogramsOutput(self):
    self.SerializeIntermediateResults(
        testing.TestResult(

--- a/tools/perf/core/testing.py
+++ b/tools/perf/core/testing.py
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from core.results_processor import command_line
+
+from telemetry.testing import options_for_unittests
+
+
+def GetRunOptions(*args, **kwargs):
+  """Augment telemetry options for tests with results_processor defaults."""
+  options = options_for_unittests.GetRunOptions(*args, **kwargs)
+  parser = command_line.ArgumentParser()
+  processor_options = parser.parse_args([])
+  for arg in vars(processor_options):
+    if not hasattr(options, arg):
+      setattr(options, arg, getattr(processor_options, arg))
+  return options