Commit 29130140 authored by behdad's avatar behdad Committed by Commit Bot

Failure invalidation of rep_perf tests should show green status

This change reports the tests as a success in case of failure invalidation, and adds a field to the output.json for invalidation reason.
The two failure reasons used are:
- Noisy control test
- Low cpu_wal_time_ratio


Bug: chromium:1106934
Change-Id: Ib9217352fc8e779588a67093acaba2538b97a263
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2315539Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#791317}
parent 1ff3c501
...@@ -64,20 +64,26 @@ class ResultRecorder(object): ...@@ -64,20 +64,26 @@ class ResultRecorder(object):
if is_control: if is_control:
self._noisy_control_stories.add(name) self._noisy_control_stories.add(name)
def remove_failure(self, name, benchmark, is_control=False): def remove_failure(self, name, benchmark, is_control=False,
invalidation_reason=None):
self.output['tests'][benchmark][name]['actual'] = 'PASS' self.output['tests'][benchmark][name]['actual'] = 'PASS'
self.output['tests'][benchmark][name]['is_unexpected'] = False self.output['tests'][benchmark][name]['is_unexpected'] = False
self._failed_stories.remove(name) self._failed_stories.remove(name)
self.fails -= 1 self.fails -= 1
if is_control: if is_control:
self._noisy_control_stories.remove(name) self._noisy_control_stories.remove(name)
if invalidation_reason:
self.add_invalidation_reason(name, benchmark, invalidation_reason)
def invalidate_failures(self, benchmark): def invalidate_failures(self, benchmark):
# The method is for invalidating the failures in case of noisy control test # The method is for invalidating the failures in case of noisy control test
for story in self._failed_stories: for story in self._failed_stories.copy():
print(story + ' [Invalidated Failure]: The story failed but was ' + print(story + ' [Invalidated Failure]: The story failed but was ' +
'invalidated as a result of noisy control test.') 'invalidated as a result of noisy control test.')
self.output['tests'][benchmark][story]['is_unexpected'] = False self.remove_failure(story, benchmark, False, 'Noisy control test')
def add_invalidation_reason(self, name, benchmark, reason):
self.output['tests'][benchmark][name]['invalidation_reason'] = reason
@property @property
def failed_stories(self): def failed_stories(self):
...@@ -90,10 +96,7 @@ class ResultRecorder(object): ...@@ -90,10 +96,7 @@ class ResultRecorder(object):
def get_output(self, return_code): def get_output(self, return_code):
self.output['seconds_since_epoch'] = time.time() - self.start_time self.output['seconds_since_epoch'] = time.time() - self.start_time
self.output['num_failures_by_type']['PASS'] = self.tests - self.fails self.output['num_failures_by_type']['PASS'] = self.tests - self.fails
if self.fails > 0 and not self.is_control_stories_noisy: self.output['num_failures_by_type']['FAIL'] = self.fails
self.output['num_failures_by_type']['FAIL'] = self.fails
else:
self.output['num_failures_by_type']['FAIL'] = 0
if return_code == 1: if return_code == 1:
self.output['interrupted'] = True self.output['interrupted'] = True
...@@ -101,7 +104,7 @@ class ResultRecorder(object): ...@@ -101,7 +104,7 @@ class ResultRecorder(object):
tests = lambda n: plural(n, 'test', 'tests') tests = lambda n: plural(n, 'test', 'tests')
print('[ PASSED ] ' + tests(self.tests - self.fails) + '.') print('[ PASSED ] ' + tests(self.tests - self.fails) + '.')
if self.fails > 0 and not self.is_control_stories_noisy: if self.fails > 0:
print('[ FAILED ] ' + tests(self.fails) + '.') print('[ FAILED ] ' + tests(self.fails) + '.')
self.return_code = 1 self.return_code = 1
...@@ -250,6 +253,8 @@ class RenderingRepresentativePerfTest(object): ...@@ -250,6 +253,8 @@ class RenderingRepresentativePerfTest(object):
'to upper limit({:.3f}). Invalidated for low cpu_wall_time_ratio' 'to upper limit({:.3f}). Invalidated for low cpu_wall_time_ratio'
).format(self.benchmark, story_name, METRIC_NAME, measured_avg, ).format(self.benchmark, story_name, METRIC_NAME, measured_avg,
upper_limit_avg)) upper_limit_avg))
self.result_recorder[rerun].add_invalidation_reason(
story_name, self.benchmark, 'Low cpu_wall_time_ratio')
else: else:
print(('[ OK ] {}/{} lower average {}({:.3f}) compared ' + print(('[ OK ] {}/{} lower average {}({:.3f}) compared ' +
'to upper limit({:.3f}).').format(self.benchmark, story_name, 'to upper limit({:.3f}).').format(self.benchmark, story_name,
...@@ -388,4 +393,4 @@ if __name__ == '__main__': ...@@ -388,4 +393,4 @@ if __name__ == '__main__':
'compile_targets': main_compile_targets, 'compile_targets': main_compile_targets,
} }
sys.exit(common.run_script(sys.argv[1:], funcs)) sys.exit(common.run_script(sys.argv[1:], funcs))
sys.exit(main()) sys.exit(main())
\ No newline at end of file
...@@ -219,14 +219,74 @@ class TestRepresentativePerfScript(unittest.TestCase): ...@@ -219,14 +219,74 @@ class TestRepresentativePerfScript(unittest.TestCase):
result_recorder.invalidate_failures(BENCHMARK) result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0) (output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 1)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 1)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_2']['actual'], 'FAIL')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_4']['actual'], 'PASS')
# Invalidating failure as a result of noisy control test
def test_compare_values_3(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42],
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [14, 16, 12],
'cpu_wall_time_ratio': [0.5, 0.52],
},
'story_4': { # All runs have high noise.
'averages': [],
'ci_095': [16, 17, 18],
'cpu_wall_time_ratio': [],
},
'story_5': { # No recorded values.
'averages': [],
'ci_095': [],
'cpu_wall_time_ratio': [],
}
}
sample_perf_results = create_sample_perf_results(
['story_1', 'story_3', 'story_4', 'story_5'], [], BENCHMARK)
rerun = True
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 0)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 4)
self.assertEquals(result_recorder.failed_stories,
set(['story_3', 'story_4', 'story_5']))
self.assertTrue(result_recorder.is_control_stories_noisy)
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0) self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0) self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS') self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'FAIL') self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_4']['actual'], 'FAIL') self.assertEquals(output['tests'][BENCHMARK]['story_4']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_5']['actual'], 'PASS')
self.assertEquals(
output['tests'][BENCHMARK]['story_3']['invalidation_reason'],
'Noisy control test')
self.assertEquals(
output['tests'][BENCHMARK]['story_4']['invalidation_reason'],
'Noisy control test')
self.assertEquals(
output['tests'][BENCHMARK]['story_5']['invalidation_reason'],
'Noisy control test')
# Experimental stories should not fail the test # Experimental stories should not fail the test
def test_compare_values_3(self): def test_compare_values_4(self):
values_per_story = { values_per_story = {
'story_1': { 'story_1': {
'averages': [16.0, 17.0, 21.0], 'averages': [16.0, 17.0, 21.0],
...@@ -254,10 +314,42 @@ class TestRepresentativePerfScript(unittest.TestCase): ...@@ -254,10 +314,42 @@ class TestRepresentativePerfScript(unittest.TestCase):
self.assertEquals(result_recorder.tests, 2) self.assertEquals(result_recorder.tests, 2)
self.assertEquals(result_recorder.failed_stories, set([])) self.assertEquals(result_recorder.failed_stories, set([]))
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0) (output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0) self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0) self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS') self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_7']['actual'], 'PASS') self.assertEquals(output['tests'][BENCHMARK]['story_7']['actual'], 'PASS')
# Low cpu_wall_time_ratio invalidates the failure
def test_compare_values_5(self):
values_per_story = {
'story_1': {
'averages': [26.0, 27.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.35, 0.42, 0.34],
# Higher avg than upper limit with low Cpu_wall_time_ratio
}
}
sample_perf_results = create_sample_perf_results(['story_1'], [], BENCHMARK)
rerun = False
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 0)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 1)
self.assertEquals(result_recorder.failed_stories, set([]))
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(
output['tests'][BENCHMARK]['story_1']['invalidation_reason'],
'Low cpu_wall_time_ratio')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment