Unittests for representative perf test scripts

Unittests are added to test the functionalities of representative perf test scripts. Bug: chromium:1029952 Change-Id: Ie3ab5d1a976d9511fbde6a401fc544ead3e50091 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2017163 Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org> Reviewed-by: Caleb Rouleau <crouleau@chromium.org> Reviewed-by: Sadrul Chowdhury <sadrul@chromium.org> Cr-Commit-Position: refs/heads/master@{#736014}

Unittests for representative perf test scripts
Unittests are added to test the functionalities of representative perf test scripts. Bug: chromium:1029952 Change-Id: Ie3ab5d1a976d9511fbde6a401fc544ead3e50091 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2017163 Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org> Reviewed-by: Caleb Rouleau <crouleau@chromium.org> Reviewed-by: Sadrul Chowdhury <sadrul@chromium.org> Cr-Commit-Position: refs/heads/master@{#736014}
ff3381da · behdad · Commit Bot · 04b36299 · ff3381da · ff3381da
Commit ff3381da authored Jan 28, 2020 by behdad Committed by Commit Bot Jan 28, 2020
3 changed files
--- a/chrome/test/BUILD.gn
+++ b/chrome/test/BUILD.gn
@@ -2850,6 +2850,9 @@ group("telemetry_perf_unittests") {
    # For tests in tools/perf/process_perf_results_unittest.py
    "//build/android/pylib/",
    "//tools/swarming_client/",
+    # For representative perf testing run_rendering_benchmark_with_gated_performance.py
+    "//testing/scripts/run_rendering_benchmark_with_gated_performance.py",
  ]
 }

--- a/testing/scripts/run_rendering_benchmark_with_gated_performance.py
+++ b/testing/scripts/run_rendering_benchmark_with_gated_performance.py
@@ -86,48 +86,62 @@ class ResultRecorder(object):
    return (self.output, self.return_code)
-def interpret_run_benchmark_results(upper_limit_data,
+def parse_csv_results(csv_obj, upper_limit_data):
-    isolated_script_test_output, benchmark):
+  """ Parses the raw CSV data
-  out_dir_path = os.path.dirname(isolated_script_test_output)
+  Convers the csv_obj into an array of valid values for averages and
-  output_path = os.path.join(out_dir_path, benchmark, 'test_results.json')
+  confidence intervals based on the described upper_limits.
-  result_recorder = ResultRecorder()
+  Args:
-  with open(output_path, 'r+') as resultsFile:
+    csv_obj: An array of rows (dict) descriving the CSV results
-    initialOut = json.load(resultsFile)
+    upper_limit_data: A dictionary containing the upper limits of each story
-    result_recorder.set_tests(initialOut)
+  Raturns:
-    results_path = os.path.join(out_dir_path, benchmark, 'perf_results.csv')
+    A dictionary which has the stories as keys and an array of confidence
-    values_per_story = {}
+    intervals and valid averages as data.
+  """
-    with open(results_path) as csv_file:
+  values_per_story = {}
-      reader = csv.DictReader(csv_file)
+  for row in csv_obj:
-      for row in reader:
+    # For now only frame_times is used for testing representatives'
-        # For now only frame_times is used for testing representatives'
+    # performance.
-        # performance.
+    if row['name'] != 'frame_times':
-        if row['name'] != 'frame_times':
+      continue
-          continue
+    story_name = row['stories']
-        story_name = row['stories']
+    if (story_name not in upper_limit_data):
-        if (story_name not in upper_limit_data):
+      continue
-          continue
+    if story_name not in values_per_story:
-        if story_name not in values_per_story:
+      values_per_story[story_name] = {
-          values_per_story[story_name] = {
+        'averages': [],
-            'averages': [],
+        'ci_095': []
-            'ci_095': []
+      }
-          }
-        if (row['avg'] == '' or row['count'] == 0):
-          continue
-        values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
-        upper_limit_ci = upper_limit_data[story_name]['ci_095']
-        # Only average values which are not noisy will be used
-        if (float(row['ci_095']) <= upper_limit_ci * CI_ERROR_MARGIN):
-          values_per_story[story_name]['averages'].append(float(row['avg']))
-    # Clearing the result of run_benchmark and write the gated perf results
+    if (row['avg'] == '' or row['count'] == 0):
-    resultsFile.seek(0)
+      continue
-    resultsFile.truncate(0)
+    values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
+    upper_limit_ci = upper_limit_data[story_name]['ci_095']
+    # Only average values which are not noisy will be used
+    if (float(row['ci_095']) <= upper_limit_ci * CI_ERROR_MARGIN):
+      values_per_story[story_name]['averages'].append(float(row['avg']))
+  return values_per_story
+def compare_values(values_per_story, upper_limit_data, benchmark,
+  result_recorder):
+  """ Parses the raw CSV data
+  Compares the values in values_per_story with the upper_limit_data and
+  determines if the story passes or fails.
+  Args:
+    csv_obj: An array of rows (dict) descriving the CSV results
+    upper_limit_data: A dictionary containing the upper limits of each story
+    benchmark: A String for the benchmark (e.g. rendering.desktop) used only
+      for printing the results.
+    result_recorder: A ResultRecorder containing the initial failures if there
+      are stories which failed prior to comparing values (e.g. GPU crashes).
+  Raturns:
+    A ResultRecorder containing the passes and failures.
+  """
  for story_name in values_per_story:
    if len(values_per_story[story_name]['ci_095']) == 0:
      print(('[  FAILED  ] {}/{} has no valid values for frame_times. Check ' +
@@ -158,6 +172,28 @@ def interpret_run_benchmark_results(upper_limit_data,
  return result_recorder
+def interpret_run_benchmark_results(upper_limit_data,
+    isolated_script_test_output, benchmark):
+  out_dir_path = os.path.dirname(isolated_script_test_output)
+  output_path = os.path.join(out_dir_path, benchmark, 'test_results.json')
+  result_recorder = ResultRecorder()
+  with open(output_path, 'r+') as resultsFile:
+    initialOut = json.load(resultsFile)
+    result_recorder.set_tests(initialOut)
+    results_path = os.path.join(out_dir_path, benchmark, 'perf_results.csv')
+    with open(results_path) as csv_file:
+      csv_obj = csv.DictReader(csv_file)
+      values_per_story = parse_csv_results(csv_obj, upper_limit_data)
+    # Clearing the result of run_benchmark and write the gated perf results
+    resultsFile.seek(0)
+    resultsFile.truncate(0)
+  return compare_values(values_per_story, upper_limit_data, benchmark,
+    result_recorder)
 def replace_arg_values(args, key_value_pairs):
  for index in range(0, len(args)):
    for (key, value) in key_value_pairs:

--- a/tools/perf/run_rendering_benchmark_with_gated_performance_unittest.py
+++ b/tools/perf/run_rendering_benchmark_with_gated_performance_unittest.py
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import os
+import sys
+import unittest
+# Add src/testing/ into sys.path for importing representative perf test script.
+PERF_TEST_SCRIPTS_DIR = os.path.join(
+    os.path.dirname(__file__), '..', '..', 'testing', 'scripts')
+sys.path.append(PERF_TEST_SCRIPTS_DIR)
+import run_rendering_benchmark_with_gated_performance as perf_tests  # pylint: disable=wrong-import-position,import-error
+UPPER_LIMIT_DATA_SAMPLE = {
+    'story_1': {
+        'ci_095': 10,
+        'avg': 20
+    },
+    'story_2': {
+        'ci_095': 10,
+        'avg': 16
+    },
+    'story_3': {
+        'ci_095': 10,
+        'avg': 10
+    },
+    'story_4': {
+        'ci_095': 10,
+        'avg': 10
+    },
+    'story_5': {
+        'ci_095': 20,
+        'avg': 10
+    },
+    'story_6': {
+        'ci_095': 20,
+        'avg': 10
+    },
+}
+def create_sample_input(record_list):
+  # Coverts an array of arrays in to an array of dicts with keys of
+  # stories, name, avg, count, ci_095 for the unittests.
+  keys = ['stories', 'name', 'avg', 'count', 'ci_095']
+  result = []
+  for row in record_list:
+    result.append(dict(zip(keys, row)))
+  return result
+def create_sample_perf_results(passed_stories, failed_stories, benchmark):
+  perf_results = {
+      'tests': {},
+      'num_failures_by_type': {
+          'FAIL': len(failed_stories),
+          'PASS': len(passed_stories)
+      }
+  }
+  perf_results['tests'][benchmark] = {}
+  for story in passed_stories:
+    perf_results['tests'][benchmark][story] = {
+        'actual': 'PASS',
+        'is_unexpected': False,
+        'expected': 'PASS'
+    }
+  for story in failed_stories:
+    perf_results['tests'][benchmark][story] = {
+        'actual': 'FAIL',
+        'is_unexpected': True,
+        'expected': 'PASS'
+    }
+  return perf_results
+class TestRepresentativePerfScript(unittest.TestCase):
+  def test_parse_csv_results(self):
+    csv_obj = create_sample_input([
+        ['story_1', 'frame_times', 16, 10, 1.5],
+        ['story_2', 'latency', 10, 8, 4],  # Record for a different metric.
+        ['story_3', 'frame_times', 8, 20, 2],
+        ['story_4', 'frame_times', '', 10, 1],  # Record with no avg.
+        ['story_5', 'frame_times', 12, 0, 3],  # Record with count of 0.
+        ['story_6', 'frame_times', 12, 40, 40],  # High noise record.
+        ['story_7', 'frame_times', 12, 40, 4],
+        ['story_3', 'frame_times', 7, 20, 15],
+        ['story_3', 'frame_times', 12, 20, 16]
+    ])
+    values_per_story = perf_tests.parse_csv_results(csv_obj,
+                                                    UPPER_LIMIT_DATA_SAMPLE)
+    # Existing Frame_times stories in upper_limits should be listed.
+    # All stories but story_2 & story_7.
+    self.assertEquals(len(values_per_story), 5)
+    self.assertEquals(values_per_story['story_1']['averages'], [16.0])
+    self.assertEquals(values_per_story['story_1']['ci_095'], [1.5])
+    # Record with avg 12 has high noise.
+    self.assertEquals(values_per_story['story_3']['averages'], [8.0, 7.0])
+    self.assertEquals(values_per_story['story_3']['ci_095'], [2.0, 15.0, 16.0])
+    self.assertEquals(len(values_per_story['story_4']['averages']), 0)
+    self.assertEquals(len(values_per_story['story_4']['ci_095']), 0)
+    self.assertEquals(len(values_per_story['story_5']['averages']), 0)
+    self.assertEquals(len(values_per_story['story_5']['ci_095']), 0)
+    # High noise record will be filtered.
+    self.assertEquals(len(values_per_story['story_6']['averages']), 0)
+    self.assertEquals(values_per_story['story_6']['ci_095'], [40.0])
+  def test_compare_values_1(self):
+    values_per_story = {
+        'story_1': {
+            'averages': [16.0, 17.0, 21.0],
+            'ci_095': [2.0, 15.0, 16.0],
+        },
+        'story_2': {
+            'averages': [16.0, 17.0, 22.0],
+            'ci_095': [1.0, 1.4, 1.2],
+        }
+    }
+    sample_perf_results = create_sample_perf_results(['story_1', 'story_2'], [],
+                                                     'rendering.desktop')
+    result_recorder = perf_tests.ResultRecorder()
+    result_recorder.set_tests(sample_perf_results)
+    result_recorder = perf_tests.compare_values(
+        values_per_story, UPPER_LIMIT_DATA_SAMPLE, 'rendering.desktop',
+        result_recorder)
+    self.assertEquals(result_recorder.tests, 2)
+    self.assertEquals(result_recorder.failed_stories, set(['story_2']))
+  def test_compare_values_2(self):
+    values_per_story = {
+      'story_1': {
+        'averages': [16.0, 17.0, 21.0],
+        'ci_095': [2.0, 15.0, 16.0],
+      },
+      'story_3': { # Two of the runs have acceptable CI but high averages.
+        'averages': [10, 13],
+        'ci_095': [1.0, 1.4, 1.2],
+      },
+      'story_4': {  # All runs have high noise.
+        'averages': [],
+        'ci_095': [16, 17, 18],
+      },
+      'story_5': {  # No recorded values.
+        'averages': [],
+        'ci_095': [],
+      }
+    }
+    sample_perf_results = create_sample_perf_results(
+        ['story_1', 'story_3', 'story_4', 'story_5'], ['story_2'],
+        'rendering.desktop')
+    result_recorder = perf_tests.ResultRecorder()
+    result_recorder.set_tests(sample_perf_results)
+    self.assertEquals(result_recorder.fails, 1)
+    result_recorder = perf_tests.compare_values(
+        values_per_story, UPPER_LIMIT_DATA_SAMPLE, 'rendering.desktop',
+        result_recorder)
+    self.assertEquals(result_recorder.tests, 5)
+    self.assertEquals(result_recorder.failed_stories,
+                      set(['story_3', 'story_4', 'story_5']))