Commit ff3381da authored by behdad's avatar behdad Committed by Commit Bot

Unittests for representative perf test scripts

Unittests are added to test the functionalities of representative perf
test scripts.

Bug: chromium:1029952
Change-Id: Ie3ab5d1a976d9511fbde6a401fc544ead3e50091
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2017163
Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org>
Reviewed-by: default avatarCaleb Rouleau <crouleau@chromium.org>
Reviewed-by: default avatarSadrul Chowdhury <sadrul@chromium.org>
Cr-Commit-Position: refs/heads/master@{#736014}
parent 04b36299
...@@ -2850,6 +2850,9 @@ group("telemetry_perf_unittests") { ...@@ -2850,6 +2850,9 @@ group("telemetry_perf_unittests") {
# For tests in tools/perf/process_perf_results_unittest.py # For tests in tools/perf/process_perf_results_unittest.py
"//build/android/pylib/", "//build/android/pylib/",
"//tools/swarming_client/", "//tools/swarming_client/",
# For representative perf testing run_rendering_benchmark_with_gated_performance.py
"//testing/scripts/run_rendering_benchmark_with_gated_performance.py",
] ]
} }
......
...@@ -86,48 +86,62 @@ class ResultRecorder(object): ...@@ -86,48 +86,62 @@ class ResultRecorder(object):
return (self.output, self.return_code) return (self.output, self.return_code)
def interpret_run_benchmark_results(upper_limit_data, def parse_csv_results(csv_obj, upper_limit_data):
isolated_script_test_output, benchmark): """ Parses the raw CSV data
out_dir_path = os.path.dirname(isolated_script_test_output) Convers the csv_obj into an array of valid values for averages and
output_path = os.path.join(out_dir_path, benchmark, 'test_results.json') confidence intervals based on the described upper_limits.
result_recorder = ResultRecorder()
Args:
with open(output_path, 'r+') as resultsFile: csv_obj: An array of rows (dict) descriving the CSV results
initialOut = json.load(resultsFile) upper_limit_data: A dictionary containing the upper limits of each story
result_recorder.set_tests(initialOut)
Raturns:
results_path = os.path.join(out_dir_path, benchmark, 'perf_results.csv') A dictionary which has the stories as keys and an array of confidence
values_per_story = {} intervals and valid averages as data.
"""
with open(results_path) as csv_file: values_per_story = {}
reader = csv.DictReader(csv_file) for row in csv_obj:
for row in reader: # For now only frame_times is used for testing representatives'
# For now only frame_times is used for testing representatives' # performance.
# performance. if row['name'] != 'frame_times':
if row['name'] != 'frame_times': continue
continue story_name = row['stories']
story_name = row['stories'] if (story_name not in upper_limit_data):
if (story_name not in upper_limit_data): continue
continue if story_name not in values_per_story:
if story_name not in values_per_story: values_per_story[story_name] = {
values_per_story[story_name] = { 'averages': [],
'averages': [], 'ci_095': []
'ci_095': [] }
}
if (row['avg'] == '' or row['count'] == 0):
continue
values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
upper_limit_ci = upper_limit_data[story_name]['ci_095']
# Only average values which are not noisy will be used
if (float(row['ci_095']) <= upper_limit_ci * CI_ERROR_MARGIN):
values_per_story[story_name]['averages'].append(float(row['avg']))
# Clearing the result of run_benchmark and write the gated perf results if (row['avg'] == '' or row['count'] == 0):
resultsFile.seek(0) continue
resultsFile.truncate(0) values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
upper_limit_ci = upper_limit_data[story_name]['ci_095']
# Only average values which are not noisy will be used
if (float(row['ci_095']) <= upper_limit_ci * CI_ERROR_MARGIN):
values_per_story[story_name]['averages'].append(float(row['avg']))
return values_per_story
def compare_values(values_per_story, upper_limit_data, benchmark,
result_recorder):
""" Parses the raw CSV data
Compares the values in values_per_story with the upper_limit_data and
determines if the story passes or fails.
Args:
csv_obj: An array of rows (dict) descriving the CSV results
upper_limit_data: A dictionary containing the upper limits of each story
benchmark: A String for the benchmark (e.g. rendering.desktop) used only
for printing the results.
result_recorder: A ResultRecorder containing the initial failures if there
are stories which failed prior to comparing values (e.g. GPU crashes).
Raturns:
A ResultRecorder containing the passes and failures.
"""
for story_name in values_per_story: for story_name in values_per_story:
if len(values_per_story[story_name]['ci_095']) == 0: if len(values_per_story[story_name]['ci_095']) == 0:
print(('[ FAILED ] {}/{} has no valid values for frame_times. Check ' + print(('[ FAILED ] {}/{} has no valid values for frame_times. Check ' +
...@@ -158,6 +172,28 @@ def interpret_run_benchmark_results(upper_limit_data, ...@@ -158,6 +172,28 @@ def interpret_run_benchmark_results(upper_limit_data,
return result_recorder return result_recorder
def interpret_run_benchmark_results(upper_limit_data,
isolated_script_test_output, benchmark):
out_dir_path = os.path.dirname(isolated_script_test_output)
output_path = os.path.join(out_dir_path, benchmark, 'test_results.json')
result_recorder = ResultRecorder()
with open(output_path, 'r+') as resultsFile:
initialOut = json.load(resultsFile)
result_recorder.set_tests(initialOut)
results_path = os.path.join(out_dir_path, benchmark, 'perf_results.csv')
with open(results_path) as csv_file:
csv_obj = csv.DictReader(csv_file)
values_per_story = parse_csv_results(csv_obj, upper_limit_data)
# Clearing the result of run_benchmark and write the gated perf results
resultsFile.seek(0)
resultsFile.truncate(0)
return compare_values(values_per_story, upper_limit_data, benchmark,
result_recorder)
def replace_arg_values(args, key_value_pairs): def replace_arg_values(args, key_value_pairs):
for index in range(0, len(args)): for index in range(0, len(args)):
for (key, value) in key_value_pairs: for (key, value) in key_value_pairs:
......
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import unittest
# Add src/testing/ into sys.path for importing representative perf test script.
PERF_TEST_SCRIPTS_DIR = os.path.join(
os.path.dirname(__file__), '..', '..', 'testing', 'scripts')
sys.path.append(PERF_TEST_SCRIPTS_DIR)
import run_rendering_benchmark_with_gated_performance as perf_tests # pylint: disable=wrong-import-position,import-error
UPPER_LIMIT_DATA_SAMPLE = {
'story_1': {
'ci_095': 10,
'avg': 20
},
'story_2': {
'ci_095': 10,
'avg': 16
},
'story_3': {
'ci_095': 10,
'avg': 10
},
'story_4': {
'ci_095': 10,
'avg': 10
},
'story_5': {
'ci_095': 20,
'avg': 10
},
'story_6': {
'ci_095': 20,
'avg': 10
},
}
def create_sample_input(record_list):
# Coverts an array of arrays in to an array of dicts with keys of
# stories, name, avg, count, ci_095 for the unittests.
keys = ['stories', 'name', 'avg', 'count', 'ci_095']
result = []
for row in record_list:
result.append(dict(zip(keys, row)))
return result
def create_sample_perf_results(passed_stories, failed_stories, benchmark):
perf_results = {
'tests': {},
'num_failures_by_type': {
'FAIL': len(failed_stories),
'PASS': len(passed_stories)
}
}
perf_results['tests'][benchmark] = {}
for story in passed_stories:
perf_results['tests'][benchmark][story] = {
'actual': 'PASS',
'is_unexpected': False,
'expected': 'PASS'
}
for story in failed_stories:
perf_results['tests'][benchmark][story] = {
'actual': 'FAIL',
'is_unexpected': True,
'expected': 'PASS'
}
return perf_results
class TestRepresentativePerfScript(unittest.TestCase):
def test_parse_csv_results(self):
csv_obj = create_sample_input([
['story_1', 'frame_times', 16, 10, 1.5],
['story_2', 'latency', 10, 8, 4], # Record for a different metric.
['story_3', 'frame_times', 8, 20, 2],
['story_4', 'frame_times', '', 10, 1], # Record with no avg.
['story_5', 'frame_times', 12, 0, 3], # Record with count of 0.
['story_6', 'frame_times', 12, 40, 40], # High noise record.
['story_7', 'frame_times', 12, 40, 4],
['story_3', 'frame_times', 7, 20, 15],
['story_3', 'frame_times', 12, 20, 16]
])
values_per_story = perf_tests.parse_csv_results(csv_obj,
UPPER_LIMIT_DATA_SAMPLE)
# Existing Frame_times stories in upper_limits should be listed.
# All stories but story_2 & story_7.
self.assertEquals(len(values_per_story), 5)
self.assertEquals(values_per_story['story_1']['averages'], [16.0])
self.assertEquals(values_per_story['story_1']['ci_095'], [1.5])
# Record with avg 12 has high noise.
self.assertEquals(values_per_story['story_3']['averages'], [8.0, 7.0])
self.assertEquals(values_per_story['story_3']['ci_095'], [2.0, 15.0, 16.0])
self.assertEquals(len(values_per_story['story_4']['averages']), 0)
self.assertEquals(len(values_per_story['story_4']['ci_095']), 0)
self.assertEquals(len(values_per_story['story_5']['averages']), 0)
self.assertEquals(len(values_per_story['story_5']['ci_095']), 0)
# High noise record will be filtered.
self.assertEquals(len(values_per_story['story_6']['averages']), 0)
self.assertEquals(values_per_story['story_6']['ci_095'], [40.0])
def test_compare_values_1(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
},
'story_2': {
'averages': [16.0, 17.0, 22.0],
'ci_095': [1.0, 1.4, 1.2],
}
}
sample_perf_results = create_sample_perf_results(['story_1', 'story_2'], [],
'rendering.desktop')
result_recorder = perf_tests.ResultRecorder()
result_recorder.set_tests(sample_perf_results)
result_recorder = perf_tests.compare_values(
values_per_story, UPPER_LIMIT_DATA_SAMPLE, 'rendering.desktop',
result_recorder)
self.assertEquals(result_recorder.tests, 2)
self.assertEquals(result_recorder.failed_stories, set(['story_2']))
def test_compare_values_2(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [1.0, 1.4, 1.2],
},
'story_4': { # All runs have high noise.
'averages': [],
'ci_095': [16, 17, 18],
},
'story_5': { # No recorded values.
'averages': [],
'ci_095': [],
}
}
sample_perf_results = create_sample_perf_results(
['story_1', 'story_3', 'story_4', 'story_5'], ['story_2'],
'rendering.desktop')
result_recorder = perf_tests.ResultRecorder()
result_recorder.set_tests(sample_perf_results)
self.assertEquals(result_recorder.fails, 1)
result_recorder = perf_tests.compare_values(
values_per_story, UPPER_LIMIT_DATA_SAMPLE, 'rendering.desktop',
result_recorder)
self.assertEquals(result_recorder.tests, 5)
self.assertEquals(result_recorder.failed_stories,
set(['story_3', 'story_4', 'story_5']))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment