Commit 356557dd authored by Robert Ma's avatar Robert Ma Committed by Commit Bot

[blinkpy] Fix expectations updater after JSON output tweaks

Following https://crrev.com/c/1154107, the test results JSON no longer
collapses multiple retries if they are all same, so some of the
assumptions in the expectations updater no longer hold. This CL changes
the updater to properly support multiple retries in the "actual"
results.

Besides, add a "--patchset" argument to the tool for easier debugging.

Bug: 870528
Change-Id: Ia6045eac7781708046a7ab8f07eb40c23a9f5897
Reviewed-on: https://chromium-review.googlesource.com/1164027
Commit-Queue: Robert Ma <robertma@chromium.org>
Reviewed-by: default avatarQuinten Yearsley <qyearsley@chromium.org>
Cr-Commit-Position: refs/heads/master@{#581293}
parent 24678e58
...@@ -68,12 +68,14 @@ class LayoutTestResult(object): ...@@ -68,12 +68,14 @@ class LayoutTestResult(object):
def expected_results(self): def expected_results(self):
return self._result_dict['expected'] return self._result_dict['expected']
def last_retry_result(self):
return self.actual_results().split()[-1]
def has_mismatch_result(self): def has_mismatch_result(self):
last_retry_result = self.actual_results().split()[-1] return self.last_retry_result() in ('TEXT', 'IMAGE', 'IMAGE+TEXT', 'AUDIO')
return last_retry_result in ('TEXT', 'IMAGE', 'IMAGE+TEXT', 'AUDIO')
def is_missing_baseline(self): def is_missing_baseline(self):
return self._result_dict['actual'] == 'MISSING' return self.last_retry_result() == 'MISSING'
# FIXME: This should be unified with ResultsSummary or other NRWT layout tests code # FIXME: This should be unified with ResultsSummary or other NRWT layout tests code
......
...@@ -41,15 +41,19 @@ class WPTExpectationsUpdater(object): ...@@ -41,15 +41,19 @@ class WPTExpectationsUpdater(object):
self.finder = PathFinder(self.host.filesystem) self.finder = PathFinder(self.host.filesystem)
self.ports_with_no_results = set() self.ports_with_no_results = set()
self.ports_with_all_pass = set() self.ports_with_all_pass = set()
self.patchset = None
def run(self, args=None): def run(self, args=None):
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--patchset', default=None,
help='Patchset number to fetch new baselines from.')
parser.add_argument('-v', '--verbose', action='store_true', help='More verbose logging.') parser.add_argument('-v', '--verbose', action='store_true', help='More verbose logging.')
args = parser.parse_args(args) args = parser.parse_args(args)
log_level = logging.DEBUG if args.verbose else logging.INFO log_level = logging.DEBUG if args.verbose else logging.INFO
configure_logging(logging_level=log_level, include_time=True) configure_logging(logging_level=log_level, include_time=True)
self.patchset = args.patchset
self.update_expectations() self.update_expectations()
return 0 return 0
...@@ -105,7 +109,7 @@ class WPTExpectationsUpdater(object): ...@@ -105,7 +109,7 @@ class WPTExpectationsUpdater(object):
def get_latest_try_jobs(self): def get_latest_try_jobs(self):
"""Returns the latest finished try jobs as Build objects.""" """Returns the latest finished try jobs as Build objects."""
return self.git_cl.latest_try_jobs(self._get_try_bots()) return self.git_cl.latest_try_jobs(self._get_try_bots(), patchset=self.patchset)
def get_failing_results_dict(self, build): def get_failing_results_dict(self, build):
"""Returns a nested dict of failing test results. """Returns a nested dict of failing test results.
...@@ -255,21 +259,22 @@ class WPTExpectationsUpdater(object): ...@@ -255,21 +259,22 @@ class WPTExpectationsUpdater(object):
Returns: Returns:
A set of one or more test expectation strings with the first letter A set of one or more test expectation strings with the first letter
capitalized. Example: set(['Failure', 'Timeout']). capitalized. Example: {'Failure', 'Timeout'}.
""" """
actual_results = set(result.actual.split())
# If the result is MISSING, this implies that the test was not # If the result is MISSING, this implies that the test was not
# rebaselined and has an actual result but no baseline. We can't # rebaselined and has an actual result but no baseline. We can't
# add a Missing expectation (this is not allowed), but no other # add a Missing expectation (this is not allowed), but no other
# expectation is correct. # expectation is correct.
# We also want to skip any new manual tests that are not automated; # We also want to skip any new manual tests that are not automated;
# see crbug.com/708241 for context. # see crbug.com/708241 for context.
if (result.actual == 'MISSING' or if ('MISSING' in actual_results or
'-manual.' in test_name and result.actual == 'TIMEOUT'): '-manual.' in test_name and 'TIMEOUT' in actual_results):
return {'Skip'} return {'Skip'}
expectations = set() expectations = set()
failure_types = ('TEXT', 'IMAGE+TEXT', 'IMAGE', 'AUDIO') failure_types = {'TEXT', 'IMAGE+TEXT', 'IMAGE', 'AUDIO'}
other_types = ('TIMEOUT', 'CRASH', 'PASS') other_types = {'TIMEOUT', 'CRASH', 'PASS'}
for actual in result.actual.split(): for actual in actual_results:
if actual in failure_types: if actual in failure_types:
expectations.add('Failure') expectations.add('Failure')
if actual in other_types: if actual in other_types:
...@@ -496,14 +501,18 @@ class WPTExpectationsUpdater(object): ...@@ -496,14 +501,18 @@ class WPTExpectationsUpdater(object):
_log.info(' %s', test) _log.info(' %s', test)
blink_tool = self.finder.path_from_blink_tools('blink_tool.py') blink_tool = self.finder.path_from_blink_tools('blink_tool.py')
self.host.executive.run_command([ command = [
'python', 'python',
blink_tool, blink_tool,
'rebaseline-cl', 'rebaseline-cl',
'--verbose', '--verbose',
'--no-trigger-jobs', '--no-trigger-jobs',
'--fill-missing', '--fill-missing',
] + tests_to_rebaseline) ]
if self.patchset:
command.append('--patchset=' + str(self.patchset))
command += tests_to_rebaseline
self.host.executive.run_command(command)
return tests_to_rebaseline, test_results return tests_to_rebaseline, test_results
def get_tests_to_rebaseline(self, test_results): def get_tests_to_rebaseline(self, test_results):
...@@ -540,7 +549,7 @@ class WPTExpectationsUpdater(object): ...@@ -540,7 +549,7 @@ class WPTExpectationsUpdater(object):
""" """
if self.is_reference_test(test_name): if self.is_reference_test(test_name):
return False return False
if result.actual in ('CRASH', 'TIMEOUT', 'MISSING'): if any(x in result.actual for x in ('CRASH', 'TIMEOUT', 'MISSING')):
return False return False
return True return True
......
...@@ -256,12 +256,21 @@ class WPTExpectationsUpdaterTest(LoggingTestCase): ...@@ -256,12 +256,21 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('FAIL', 'PASS', 'bug')), updater.get_expectations(SimpleTestResult('FAIL', 'PASS', 'bug')),
{'Pass'}) {'Pass'})
self.assertEqual(
updater.get_expectations(SimpleTestResult('FAIL', 'PASS PASS', 'bug')),
{'Pass'})
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('FAIL', 'TIMEOUT', 'bug')), updater.get_expectations(SimpleTestResult('FAIL', 'TIMEOUT', 'bug')),
{'Timeout'}) {'Timeout'})
self.assertEqual(
updater.get_expectations(SimpleTestResult('FAIL', 'TIMEOUT TIMEOUT', 'bug')),
{'Timeout'})
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('TIMEOUT', 'PASS', 'bug')), updater.get_expectations(SimpleTestResult('TIMEOUT', 'PASS', 'bug')),
{'Pass'}) {'Pass'})
self.assertEqual(
updater.get_expectations(SimpleTestResult('TIMEOUT', 'PASS PASS', 'bug')),
{'Pass'})
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('PASS', 'TEXT PASS', 'bug')), updater.get_expectations(SimpleTestResult('PASS', 'TEXT PASS', 'bug')),
{'Pass', 'Failure'}) {'Pass', 'Failure'})
...@@ -277,6 +286,9 @@ class WPTExpectationsUpdaterTest(LoggingTestCase): ...@@ -277,6 +286,9 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('Pass', 'MISSING', 'bug')), updater.get_expectations(SimpleTestResult('Pass', 'MISSING', 'bug')),
{'Skip'}) {'Skip'})
self.assertEqual(
updater.get_expectations(SimpleTestResult('Pass', 'MISSING MISSING', 'bug')),
{'Skip'})
self.assertEqual( self.assertEqual(
updater.get_expectations(SimpleTestResult('Pass', 'TIMEOUT', 'bug'), test_name='foo/bar-manual.html'), updater.get_expectations(SimpleTestResult('Pass', 'TIMEOUT', 'bug'), test_name='foo/bar-manual.html'),
{'Skip'}) {'Skip'})
...@@ -581,6 +593,26 @@ class WPTExpectationsUpdaterTest(LoggingTestCase): ...@@ -581,6 +593,26 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
# The original dict isn't modified. # The original dict isn't modified.
self.assertEqual(results, results_copy) self.assertEqual(results, results_copy)
def test_get_tests_to_rebaseline_handles_retries(self):
host = self.mock_host()
results = {
'external/wpt/test/foo.html': {
'bot': SimpleTestResult(expected='PASS', actual='TEXT TEXT', bug='bug'),
},
'external/wpt/test/bar.html': {
'bot': SimpleTestResult(expected='PASS', actual='TIMEOUT TIMEOUT', bug='bug'),
},
}
updater = WPTExpectationsUpdater(host)
tests_to_rebaseline, modified_test_results = updater.get_tests_to_rebaseline(results)
self.assertEqual(tests_to_rebaseline, ['external/wpt/test/foo.html'])
self.assertEqual(modified_test_results, {
'external/wpt/test/foo.html': {},
'external/wpt/test/bar.html': {
'bot': SimpleTestResult(expected='PASS', actual='TIMEOUT TIMEOUT', bug='bug'),
},
})
def test_run_no_issue_number(self): def test_run_no_issue_number(self):
updater = WPTExpectationsUpdater(self.mock_host()) updater = WPTExpectationsUpdater(self.mock_host())
updater.git_cl = MockGitCL(updater.host, issue_number='None') updater.git_cl = MockGitCL(updater.host, issue_number='None')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment