Commit 5068d536 authored by behdad's avatar behdad Committed by Commit Bot

Stop failures as a result of noise using control test

Main_30fps_impl_60fps story in representative perf test is marked as a
control test, so that if this test fails as a result of high noise, it
would invalidate the failure of other tests and would result in less
false positives.

Bug: chromium:1029846
Change-Id: I50c8e41108d584f2f5cbcf095be50dc09228406b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2025510
Commit-Queue: Behdad Bakhshinategh <behdadb@chromium.org>
Reviewed-by: default avatarSadrul Chowdhury <sadrul@chromium.org>
Reviewed-by: default avatarCaleb Rouleau <crouleau@chromium.org>
Cr-Commit-Position: refs/heads/master@{#738735}
parent 2d7e2dd9
......@@ -50,7 +50,8 @@
},
"main_30fps_impl_60fps": {
"ci_095": 0.121,
"avg": 16.689
"avg": 16.689,
"control": true
},
"css_value_type_shadow": {
"ci_095": 1.306,
......@@ -96,7 +97,8 @@
},
"main_30fps_impl_60fps": {
"ci_095": 0.185,
"avg": 16.695
"avg": 16.695,
"control": true
},
"ie_chalkboard": {
"ci_095": 43.55,
......@@ -142,7 +144,8 @@
},
"main_30fps_impl_60fps": {
"ci_095": 0.603,
"avg": 18.009
"avg": 18.009,
"control": true
},
"new_tilings": {
"ci_095": 1.008,
......
......@@ -44,30 +44,39 @@ class ResultRecorder(object):
self.output = {}
self.return_code = 0
self._failed_stories = set()
self._noisy_control_stories = set()
# Set of _noisy_control_stories keeps track of control tests which failed
# because of high noise values.
def set_tests(self, output):
self.output = output
self.fails = 0
if 'FAIL' in output['num_failures_by_type']:
self.fails = output['num_failures_by_type']['FAIL']
self.tests = self.fails + output['num_failures_by_type']['PASS']
self.fails = output['num_failures_by_type'].get('FAIL', 0)
self.tests = self.fails + output['num_failures_by_type'].get('PASS', 0)
def add_failure(self, name, benchmark):
def add_failure(self, name, benchmark, is_control=False):
self.output['tests'][benchmark][name]['actual'] = 'FAIL'
self.output['tests'][benchmark][name]['is_unexpected'] = True
self._failed_stories.add(name)
self.fails += 1
if is_control:
self._noisy_control_stories.add(name)
def remove_failure(self, name, benchmark):
def remove_failure(self, name, benchmark, is_control=False):
self.output['tests'][benchmark][name]['actual'] = 'PASS'
self.output['tests'][benchmark][name]['is_unexpected'] = False
self._failed_stories.remove(name)
self.fails -= 1
if is_control:
self._noisy_control_stories.remove(name)
@property
def failed_stories(self):
return self._failed_stories
@property
def is_control_stories_noisy(self):
return len(self._noisy_control_stories) > 0
def get_output(self, return_code):
self.output['seconds_since_epoch'] = time.time() - self.start_time
self.output['num_failures_by_type']['PASS'] = self.tests - self.fails
......@@ -82,6 +91,7 @@ class ResultRecorder(object):
print('[ PASSED ] ' + tests(self.tests - self.fails) + '.')
if self.fails > 0:
print('[ FAILED ] ' + tests(self.fails) + '.')
if not self.is_control_stories_noisy:
self.return_code = 1
return (self.output, self.return_code)
......@@ -159,7 +169,9 @@ def compare_values(values_per_story, upper_limit_data, benchmark,
print(('[ FAILED ] {}/{} frame_times has higher noise ({:.3f}) ' +
'compared to upper limit ({:.3f})').format(
benchmark, story_name, measured_ci,upper_limit_ci))
result_recorder.add_failure(story_name, benchmark)
result_recorder.add_failure(story_name, benchmark,
is_control_story(upper_limit_data[story_name]))
elif (measured_avg > upper_limit_avg * AVG_ERROR_MARGIN):
print(('[ FAILED ] {}/{} higher average frame_times({:.3f}) compared' +
' to upper limit ({:.3f})').format(
......@@ -204,6 +216,9 @@ def replace_arg_values(args, key_value_pairs):
args[index+1] = value
return args
def is_control_story(story_data):
return ('control' in story_data and story_data['control'] == True)
def main():
overall_return_code = 0
options = parse_arguments()
......@@ -282,7 +297,8 @@ def main():
for story_name in result_recorder.failed_stories.copy():
if story_name not in re_run_result_recorder.failed_stories:
result_recorder.remove_failure(story_name, benchmark)
result_recorder.remove_failure(story_name, benchmark,
is_control_story(upper_limit_data[platform][story_name]))
(
finalOut,
......@@ -294,6 +310,10 @@ def main():
with open(options.isolated_script_test_output, 'w') as outputFile:
json.dump(finalOut, outputFile, indent=4)
if result_recorder.is_control_stories_noisy:
assert overall_return_code == 0
print('Control story has high noise. These runs are not reliable!')
return overall_return_code
def parse_arguments():
......
......@@ -27,7 +27,8 @@ UPPER_LIMIT_DATA_SAMPLE = {
},
'story_4': {
'ci_095': 10,
'avg': 10
'avg': 10,
'control': True,
},
'story_5': {
'ci_095': 20,
......@@ -132,6 +133,8 @@ class TestRepresentativePerfScript(unittest.TestCase):
result_recorder)
self.assertEquals(result_recorder.tests, 2)
self.assertEquals(result_recorder.failed_stories, set(['story_2']))
(_, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 1)
def test_compare_values_2(self):
values_per_story = {
......@@ -141,7 +144,7 @@ class TestRepresentativePerfScript(unittest.TestCase):
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [1.0, 1.4, 1.2],
'ci_095': [14, 16, 12]
},
'story_4': { # All runs have high noise.
'averages': [],
......@@ -167,3 +170,6 @@ class TestRepresentativePerfScript(unittest.TestCase):
self.assertEquals(result_recorder.tests, 5)
self.assertEquals(result_recorder.failed_stories,
set(['story_3', 'story_4', 'story_5']))
self.assertTrue(result_recorder.is_control_stories_noisy)
(_, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment