[blinkpy] Fix expectations updater after JSON output tweaks

Following https://crrev.com/c/1154107, the test results JSON no longer collapses multiple retries if they are all same, so some of the assumptions in the expectations updater no longer hold. This CL changes the updater to properly support multiple retries in the "actual" results. Besides, add a "--patchset" argument to the tool for easier debugging. Bug: 870528 Change-Id: Ia6045eac7781708046a7ab8f07eb40c23a9f5897 Reviewed-on: https://chromium-review.googlesource.com/1164027 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> Cr-Commit-Position: refs/heads/master@{#581293}

[blinkpy] Fix expectations updater after JSON output tweaks
Following https://crrev.com/c/1154107, the test results JSON no longer collapses multiple retries if they are all same, so some of the assumptions in the expectations updater no longer hold. This CL changes the updater to properly support multiple retries in the "actual" results. Besides, add a "--patchset" argument to the tool for easier debugging. Bug: 870528 Change-Id: Ia6045eac7781708046a7ab8f07eb40c23a9f5897 Reviewed-on: https://chromium-review.googlesource.com/1164027 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> Cr-Commit-Position: refs/heads/master@{#581293}
356557dd · Robert Ma · Commit Bot · 24678e58 · 356557dd · 356557dd
Commit 356557dd authored Aug 07, 2018 by Robert Ma Committed by Commit Bot Aug 07, 2018
3 changed files
--- a/third_party/blink/tools/blinkpy/common/net/layout_test_results.py
+++ b/third_party/blink/tools/blinkpy/common/net/layout_test_results.py
@@ -68,12 +68,14 @@ class LayoutTestResult(object):
    def expected_results(self):
        return self._result_dict['expected']
+    def last_retry_result(self):
+        return self.actual_results().split()[-1]
    def has_mismatch_result(self):
-        last_retry_result = self.actual_results().split()[-1]
+        return self.last_retry_result() in ('TEXT', 'IMAGE', 'IMAGE+TEXT', 'AUDIO')
-        return last_retry_result in ('TEXT', 'IMAGE', 'IMAGE+TEXT', 'AUDIO')
    def is_missing_baseline(self):
-        return self._result_dict['actual'] == 'MISSING'
+        return self.last_retry_result() == 'MISSING'
 # FIXME: This should be unified with ResultsSummary or other NRWT layout tests code

--- a/third_party/blink/tools/blinkpy/w3c/wpt_expectations_updater.py
+++ b/third_party/blink/tools/blinkpy/w3c/wpt_expectations_updater.py
@@ -41,15 +41,19 @@ class WPTExpectationsUpdater(object):
        self.finder = PathFinder(self.host.filesystem)
        self.ports_with_no_results = set()
        self.ports_with_all_pass = set()
+        self.patchset = None
    def run(self, args=None):
        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('--patchset', default=None,
+                            help='Patchset number to fetch new baselines from.')
        parser.add_argument('-v', '--verbose', action='store_true', help='More verbose logging.')
        args = parser.parse_args(args)
        log_level = logging.DEBUG if args.verbose else logging.INFO
        configure_logging(logging_level=log_level, include_time=True)
+        self.patchset = args.patchset
        self.update_expectations()
        return 0
@@ -105,7 +109,7 @@ class WPTExpectationsUpdater(object):
    def get_latest_try_jobs(self):
        """Returns the latest finished try jobs as Build objects."""
-        return self.git_cl.latest_try_jobs(self._get_try_bots())
+        return self.git_cl.latest_try_jobs(self._get_try_bots(), patchset=self.patchset)
    def get_failing_results_dict(self, build):
        """Returns a nested dict of failing test results.
@@ -255,21 +259,22 @@ class WPTExpectationsUpdater(object):
        Returns:
            A set of one or more test expectation strings with the first letter
-            capitalized. Example: set(['Failure', 'Timeout']).
+            capitalized. Example: {'Failure', 'Timeout'}.
        """
+        actual_results = set(result.actual.split())
        # If the result is MISSING, this implies that the test was not
        # rebaselined and has an actual result but no baseline. We can't
        # add a Missing expectation (this is not allowed), but no other
        # expectation is correct.
        # We also want to skip any new manual tests that are not automated;
        # see crbug.com/708241 for context.
-        if (result.actual == 'MISSING' or
+        if ('MISSING' in actual_results or
-                '-manual.' in test_name and result.actual == 'TIMEOUT'):
+                '-manual.' in test_name and 'TIMEOUT' in actual_results):
            return {'Skip'}
        expectations = set()
-        failure_types = ('TEXT', 'IMAGE+TEXT', 'IMAGE', 'AUDIO')
+        failure_types = {'TEXT', 'IMAGE+TEXT', 'IMAGE', 'AUDIO'}
-        other_types = ('TIMEOUT', 'CRASH', 'PASS')
+        other_types = {'TIMEOUT', 'CRASH', 'PASS'}
-        for actual in result.actual.split():
+        for actual in actual_results:
            if actual in failure_types:
                expectations.add('Failure')
            if actual in other_types:
@@ -496,14 +501,18 @@ class WPTExpectationsUpdater(object):
            _log.info('  %s', test)
        blink_tool = self.finder.path_from_blink_tools('blink_tool.py')
-        self.host.executive.run_command([
+        command = [
            'python',
            blink_tool,
            'rebaseline-cl',
            '--verbose',
            '--no-trigger-jobs',
            '--fill-missing',
-        ] + tests_to_rebaseline)
+        ]
+        if self.patchset:
+            command.append('--patchset=' + str(self.patchset))
+        command += tests_to_rebaseline
+        self.host.executive.run_command(command)
        return tests_to_rebaseline, test_results
    def get_tests_to_rebaseline(self, test_results):
@@ -540,7 +549,7 @@ class WPTExpectationsUpdater(object):
        """
        if self.is_reference_test(test_name):
            return False
-        if result.actual in ('CRASH', 'TIMEOUT', 'MISSING'):
+        if any(x in result.actual for x in ('CRASH', 'TIMEOUT', 'MISSING')):
            return False
        return True

--- a/third_party/blink/tools/blinkpy/w3c/wpt_expectations_updater_unittest.py
+++ b/third_party/blink/tools/blinkpy/w3c/wpt_expectations_updater_unittest.py
@@ -256,12 +256,21 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('FAIL', 'PASS', 'bug')),
            {'Pass'})
+        self.assertEqual(
+            updater.get_expectations(SimpleTestResult('FAIL', 'PASS PASS', 'bug')),
+            {'Pass'})
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('FAIL', 'TIMEOUT', 'bug')),
            {'Timeout'})
+        self.assertEqual(
+            updater.get_expectations(SimpleTestResult('FAIL', 'TIMEOUT TIMEOUT', 'bug')),
+            {'Timeout'})
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('TIMEOUT', 'PASS', 'bug')),
            {'Pass'})
+        self.assertEqual(
+            updater.get_expectations(SimpleTestResult('TIMEOUT', 'PASS PASS', 'bug')),
+            {'Pass'})
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('PASS', 'TEXT PASS', 'bug')),
            {'Pass', 'Failure'})
@@ -277,6 +286,9 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('Pass', 'MISSING', 'bug')),
            {'Skip'})
+        self.assertEqual(
+            updater.get_expectations(SimpleTestResult('Pass', 'MISSING MISSING', 'bug')),
+            {'Skip'})
        self.assertEqual(
            updater.get_expectations(SimpleTestResult('Pass', 'TIMEOUT', 'bug'), test_name='foo/bar-manual.html'),
            {'Skip'})
@@ -581,6 +593,26 @@ class WPTExpectationsUpdaterTest(LoggingTestCase):
        # The original dict isn't modified.
        self.assertEqual(results, results_copy)
+    def test_get_tests_to_rebaseline_handles_retries(self):
+        host = self.mock_host()
+        results = {
+            'external/wpt/test/foo.html': {
+                'bot': SimpleTestResult(expected='PASS', actual='TEXT TEXT', bug='bug'),
+            },
+            'external/wpt/test/bar.html': {
+                'bot': SimpleTestResult(expected='PASS', actual='TIMEOUT TIMEOUT', bug='bug'),
+            },
+        }
+        updater = WPTExpectationsUpdater(host)
+        tests_to_rebaseline, modified_test_results = updater.get_tests_to_rebaseline(results)
+        self.assertEqual(tests_to_rebaseline, ['external/wpt/test/foo.html'])
+        self.assertEqual(modified_test_results, {
+            'external/wpt/test/foo.html': {},
+            'external/wpt/test/bar.html': {
+                'bot': SimpleTestResult(expected='PASS', actual='TIMEOUT TIMEOUT', bug='bug'),
+            },
+        })
    def test_run_no_issue_number(self):
        updater = WPTExpectationsUpdater(self.mock_host())
        updater.git_cl = MockGitCL(updater.host, issue_number='None')