Commit 0bd20fee authored by behdad's avatar behdad Committed by Commit Bot

Low cpu_Wall_time_ratio can invalidate failures

Use cpu_wall_time_ratio in representative perf tests logic to invalidate failures which had lower than usual cpu_wall_time_ratio in order to reduce flaky failures.
As a result only stories which have higher frame_times than the limit and acceptable cpu_wall_time_ratio would be failing.

Bug: chromium:1052361
Change-Id: I294361375f91096a32e226f57dc61450926fab0b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2292811
Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org>
Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#789189}
parent 2a39758f
......@@ -180,8 +180,8 @@ class RenderingRepresentativePerfTest(object):
values_per_story = {}
for row in csv_obj:
# For now only frame_times is used for testing representatives'
# performance.
if row['name'] != METRIC_NAME:
# performance and cpu_wall_time_ratio is used for validation.
if row['name'] != METRIC_NAME and row['name'] != 'cpu_wall_time_ratio':
continue
story_name = row['stories']
if (story_name not in self.upper_limit_data):
......@@ -189,13 +189,16 @@ class RenderingRepresentativePerfTest(object):
if story_name not in values_per_story:
values_per_story[story_name] = {
'averages': [],
'ci_095': []
'ci_095': [],
'cpu_wall_time_ratio': []
}
if (row['avg'] == '' or row['count'] == 0):
continue
values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
values_per_story[story_name]['averages'].append(float(row['avg']))
if row['name'] == METRIC_NAME and row['avg'] != '' and row['count'] != 0:
values_per_story[story_name]['ci_095'].append(float(row['ci_095']))
values_per_story[story_name]['averages'].append(float(row['avg']))
elif row['name'] == 'cpu_wall_time_ratio' and row['avg'] != '':
values_per_story[story_name]['cpu_wall_time_ratio'].append(
float(row['avg']))
return values_per_story
......@@ -219,10 +222,14 @@ class RenderingRepresentativePerfTest(object):
self.result_recorder[rerun].add_failure(story_name, self.benchmark)
continue
upper_limit_avg = self.upper_limit_data[story_name]['avg']
upper_limit_ci = self.upper_limit_data[story_name]['ci_095']
upper_limits = self.upper_limit_data
upper_limit_avg = upper_limits[story_name]['avg']
upper_limit_ci = upper_limits[story_name]['ci_095']
lower_limit_cpu_ratio = upper_limits[story_name]['cpu_wall_time_ratio']
measured_avg = np.mean(np.array(values_per_story[story_name]['averages']))
measured_ci = np.mean(np.array(values_per_story[story_name]['ci_095']))
measured_cpu_ratio = np.mean(np.array(
values_per_story[story_name]['cpu_wall_time_ratio']))
if (measured_ci > upper_limit_ci * CI_ERROR_MARGIN and
self.is_control_story(story_name)):
......@@ -233,10 +240,16 @@ class RenderingRepresentativePerfTest(object):
self.result_recorder[rerun].add_failure(
story_name, self.benchmark, True)
elif (measured_avg > upper_limit_avg * AVG_ERROR_MARGIN):
print(('[ FAILED ] {}/{} higher average {}({:.3f}) compared' +
' to upper limit ({:.3f})').format(self.benchmark, story_name,
METRIC_NAME, measured_avg, upper_limit_avg))
self.result_recorder[rerun].add_failure(story_name, self.benchmark)
if (measured_cpu_ratio >= lower_limit_cpu_ratio):
print(('[ FAILED ] {}/{} higher average {}({:.3f}) compared' +
' to upper limit ({:.3f})').format(self.benchmark, story_name,
METRIC_NAME, measured_avg, upper_limit_avg))
self.result_recorder[rerun].add_failure(story_name, self.benchmark)
else:
print(('[ OK ] {}/{} higher average {}({:.3f}) compared ' +
'to upper limit({:.3f}). Invalidated for low cpu_wall_time_ratio'
).format(self.benchmark, story_name, METRIC_NAME, measured_avg,
upper_limit_avg))
else:
print(('[ OK ] {}/{} lower average {}({:.3f}) compared ' +
'to upper limit({:.3f}).').format(self.benchmark, story_name,
......
......@@ -18,32 +18,39 @@ BENCHMARK = 'rendering.desktop'
UPPER_LIMIT_DATA_SAMPLE = {
'story_1': {
'ci_095': 10,
'avg': 20
'avg': 20,
'cpu_wall_time_ratio': 0.4,
},
'story_2': {
'ci_095': 10,
'avg': 16
'avg': 16,
'cpu_wall_time_ratio': 0.3,
},
'story_3': {
'ci_095': 10,
'avg': 10
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_4': {
'ci_095': 10,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
'control': True,
},
'story_5': {
'ci_095': 20,
'avg': 10
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_6': {
'ci_095': 20,
'avg': 10
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_7': {
'ci_095': 20,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
'experimental': True,
},
}
......@@ -96,14 +103,18 @@ class TestRepresentativePerfScript(unittest.TestCase):
def test_parse_csv_results(self):
csv_obj = create_sample_input([
['story_1', 'frame_times', 16, 10, 1.5],
['story_1', 'cpu_wall_time_ratio', 0.5, 1, 1],
['story_2', 'latency', 10, 8, 4], # Record for a different metric.
['story_3', 'frame_times', 8, 20, 2],
['story_3', 'frame_times', 7, 20, 15],
['story_3', 'frame_times', 12, 20, 16],
['story_3', 'cpu_wall_time_ratio', 0.3, 1, 1],
['story_3', 'cpu_wall_time_ratio', 0.7, 1, 1],
['story_3', 'cpu_wall_time_ratio', '', 0, 1],
['story_4', 'frame_times', '', 10, 1], # Record with no avg.
['story_5', 'frame_times', 12, 0, 3], # Record with count of 0.
['story_6', 'frame_times', 12, 40, 40], # High noise record.
['story_8', 'frame_times', 12, 40, 4],
['story_3', 'frame_times', 7, 20, 15],
['story_3', 'frame_times', 12, 20, 16]
])
perf_test = perf_test_initializer()
......@@ -114,10 +125,13 @@ class TestRepresentativePerfScript(unittest.TestCase):
self.assertEquals(len(values_per_story), 5)
self.assertEquals(values_per_story['story_1']['averages'], [16.0])
self.assertEquals(values_per_story['story_1']['ci_095'], [1.5])
self.assertEquals(values_per_story['story_1']['cpu_wall_time_ratio'], [0.5])
# Record with avg 12 has high noise.
self.assertEquals(values_per_story['story_3']['averages'], [8.0, 7.0, 12.0])
self.assertEquals(values_per_story['story_3']['ci_095'], [2.0, 15.0, 16.0])
self.assertEquals(values_per_story['story_3']['cpu_wall_time_ratio'],
[0.3, 0.7])
self.assertEquals(len(values_per_story['story_4']['averages']), 0)
self.assertEquals(len(values_per_story['story_4']['ci_095']), 0)
......@@ -131,46 +145,59 @@ class TestRepresentativePerfScript(unittest.TestCase):
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.5, 0.52, 0.57]
},
'story_2': {
'averages': [16.0, 17.0, 22.0],
'ci_095': [1.0, 1.4, 1.2],
'cpu_wall_time_ratio': [0.3, 0.3, 0.3]
},
'story_3': {
'averages': [20.0, 15.0, 22.0],
'ci_095': [1.0, 0.8, 1.2],
'cpu_wall_time_ratio': [0.5, 0.5, 0.49]
}
}
sample_perf_results = create_sample_perf_results(['story_1', 'story_2'], [],
BENCHMARK)
sample_perf_results = create_sample_perf_results(
['story_1', 'story_2', 'story_3'], [], BENCHMARK)
rerun = False
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 2)
self.assertEquals(result_recorder.tests, 3)
# The failure for story_3 is invalidated (low cpu_wall_time_ratio)
self.assertEquals(result_recorder.failed_stories, set(['story_2']))
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 1)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 1)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_2']['actual'], 'FAIL')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
def test_compare_values_2(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42],
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [14, 16, 12]
'ci_095': [14, 16, 12],
'cpu_wall_time_ratio': [0.5, 0.52],
},
'story_4': { # All runs have high noise.
'averages': [],
'ci_095': [16, 17, 18],
'cpu_wall_time_ratio': [],
},
'story_5': { # No recorded values.
'averages': [],
'ci_095': [],
'cpu_wall_time_ratio': [],
}
}
......@@ -204,11 +231,13 @@ class TestRepresentativePerfScript(unittest.TestCase):
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42, 0.44],
},
'story_7':
{ # Experimental story with higher value than the upper limit.
'averages': [20, 26],
'ci_095': [14, 16]
'ci_095': [14, 16],
'cpu_wall_time_ratio': [0.45, 0.42, 0.44],
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment