[blinkpy] List all repeats & iterations in actual results

Previously, if a test was retried (because of unexpected failures), the results from all retry attempts would show up in the "actual" field in the summary; but if a test ran multiple times because of --iterations or --repeat-each, only the last result would be included in the summary. This CL makes iterations and repeats behave the same as retries in terms of the result summary. All results from all iterations and repeats from all retries are now included in the "actual" result field in summary. This will make it easier for other tools (e.g. FindIt) and users to debug flaky tests, and won't affect existing bots as they don't use --iterations or --repeat-each. Some code-health improvements are done along the way (e.g. expectation_to_string). There are some questions regarding the existing behaviours. In order to minimize the risk, this CL strives to ensure backward compatibility. We will revisit some of the behaviours later. Bug: 828605 Change-Id: Ibb5a329eb8dc60fd0ae857083ebddc3b1c86f5f0 Reviewed-on: https://chromium-review.googlesource.com/1029430 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Dirk Pranke <dpranke@chromium.org> Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> Cr-Commit-Position: refs/heads/master@{#555848}

[blinkpy] List all repeats & iterations in actual results
Previously, if a test was retried (because of unexpected failures), the results from all retry attempts would show up in the "actual" field in the summary; but if a test ran multiple times because of --iterations or --repeat-each, only the last result would be included in the summary. This CL makes iterations and repeats behave the same as retries in terms of the result summary. All results from all iterations and repeats from all retries are now included in the "actual" result field in summary. This will make it easier for other tools (e.g. FindIt) and users to debug flaky tests, and won't affect existing bots as they don't use --iterations or --repeat-each. Some code-health improvements are done along the way (e.g. expectation_to_string). There are some questions regarding the existing behaviours. In order to minimize the risk, this CL strives to ensure backward compatibility. We will revisit some of the behaviours later. Bug: 828605 Change-Id: Ibb5a329eb8dc60fd0ae857083ebddc3b1c86f5f0 Reviewed-on: https://chromium-review.googlesource.com/1029430 Commit-Queue: Robert Ma <robertma@chromium.org> Reviewed-by: Dirk Pranke <dpranke@chromium.org> Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> Cr-Commit-Position: refs/heads/master@{#555848}
6d938afc · Robert Ma · Commit Bot · a1b71cc6 · 6d938afc · 6d938afc
Commit 6d938afc authored May 03, 2018 by Robert Ma Committed by Commit Bot May 03, 2018
7 changed files
--- a/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py
+++ b/third_party/blink/tools/blinkpy/web_tests/controllers/manager.py
@@ -395,6 +395,10 @@ class Manager(object):
        test_inputs = []
        for _ in xrange(iterations):
+            # TODO(crbug.com/650747): We may want to switch the two loops below
+            # to make the behavior consistent with gtest runner (--gtest_repeat
+            # is an alias for --repeat-each now), which looks like "ABCABCABC".
+            # And remember to update the help text when we do so.
            for test in tests_to_run:
                for _ in xrange(repeat_each):
                    test_inputs.append(self._test_input_for_file(test))

--- a/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
+++ b/third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
@@ -188,7 +188,7 @@ class BotTestExpectations(object):
    def unexpected_results_by_path(self):
        """For tests with unexpected results, returns original expectations + results."""
        def exp_to_string(exp):
-            return TestExpectations.EXPECTATIONS_TO_STRING.get(exp, None).upper()
+            return TestExpectations.EXPECTATIONS_TO_STRING.get(exp, None)
        def string_to_exp(string):
            # Needs a bit more logic than the method above,

--- a/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
@@ -720,17 +720,10 @@ class TestExpectationsModel(object):
            expectations.remove(SKIP)
        for expectation in expectations:
-            retval.append(self.expectation_to_string(expectation))
+            retval.append(TestExpectations.expectation_to_string(expectation))
        return ' '.join(retval)
-    def expectation_to_string(self, expectation):
-        """Return the uppercased string equivalent of a given expectation."""
-        for item in TestExpectations.EXPECTATIONS.items():
-            if item[1] == expectation:
-                return item[0].upper()
-        raise ValueError(expectation)
    def remove_expectation_line(self, test):
        if not self.has_test(test.name):
            return
@@ -926,7 +919,7 @@ class TestExpectations(object):
        TestExpectationParser.REBASELINE_MODIFIER: REBASELINE,
    }
-    EXPECTATIONS_TO_STRING = dict((k, v) for (v, k) in EXPECTATIONS.iteritems())
+    EXPECTATIONS_TO_STRING = {k: v.upper() for (v, k) in EXPECTATIONS.iteritems()}
    # (aggregated by category, pass/fail/skip, type)
    EXPECTATION_DESCRIPTIONS = {
@@ -965,6 +958,14 @@ class TestExpectations(object):
        assert ' ' not in string  # This only handles one expectation at a time.
        return cls.EXPECTATIONS.get(string.lower())
+    @classmethod
+    def expectation_to_string(cls, expectation):
+        """Return the uppercased string equivalent of a given expectation."""
+        try:
+            return cls.EXPECTATIONS_TO_STRING[expectation]
+        except KeyError:
+            raise ValueError(expectation)
    @staticmethod
    def result_was_expected(result, expected_results, test_needs_rebaselining):
        """Returns whether we got a result we were expecting.
@@ -1118,9 +1119,6 @@ class TestExpectations(object):
    def get_expectations_string(self, test):
        return self._model.get_expectations_string(test)
-    def expectation_to_string(self, expectation):
-        return self._model.expectation_to_string(expectation)
    def matches_an_expected_result(self, test, result, pixel_tests_are_enabled, sanitizer_is_enabled):
        expected_results = self._model.get_expectations(test)
        if sanitizer_is_enabled:

--- a/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
--- a/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
@@ -398,6 +398,27 @@ class SummarizedResultsTest(unittest.TestCase):
        self.assertEquals(summary['num_passes'], 0)
        self.assertEquals(summary['num_regressions'], 0)
+    def test_summarized_results_with_iterations(self):
+        test_name = 'passes/text.html'
+        expectations = test_expectations.TestExpectations(self.port, [test_name])
+        initial_results = test_run_results.TestRunResults(expectations, 3)
+        initial_results.add(get_result(test_name, test_expectations.CRASH), False, False)
+        initial_results.add(get_result(test_name, test_expectations.IMAGE), False, False)
+        initial_results.add(get_result(test_name, test_expectations.TIMEOUT), False, False)
+        all_retry_results = [test_run_results.TestRunResults(expectations, 2)]
+        all_retry_results[0].add(get_result(test_name, test_expectations.TEXT), False, False)
+        all_retry_results[0].add(get_result(test_name, test_expectations.LEAK), False, False)
+        summary = test_run_results.summarize_results(
+            self.port, expectations, initial_results, all_retry_results,
+            enabled_pixel_tests_in_retry=True)
+        print summary
+        self.assertEquals(summary['tests']['passes']['text.html']['expected'], 'PASS')
+        self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'CRASH IMAGE TIMEOUT TEXT LEAK')
+        self.assertEquals(summary['num_flaky'], 0)
+        self.assertEquals(summary['num_passes'], 0)
+        self.assertEquals(summary['num_regressions'], 1)
    def test_summarized_results_regression(self):
        summary = summarized_results(self.port, expected=False, passing=False, flaky=False)

--- a/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests.py
+++ b/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests.py
@@ -342,7 +342,6 @@ def parse_args(args):
                      "'unexpected' == Ignore any tests that had unexpected results on the bot.")),
            optparse.make_option(
                '--iterations',
-                '--gtest_repeat',
                type='int',
                default=1,
                help='Number of times to run the set of tests (e.g. ABCABCABC)'),
@@ -379,6 +378,7 @@ def parse_args(args):
                help='Output per-test profile information, using the specified profiler.'),
            optparse.make_option(
                '--repeat-each',
+                '--gtest_repeat',
                type='int',
                default=1,
                help='Number of times to run each test (e.g. AAABBBCCC)'),

--- a/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
+++ b/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
@@ -401,12 +401,12 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
    def test_gtest_repeat(self):
        tests_to_run = ['passes/image.html', 'passes/text.html']
        tests_run = get_tests_run(['--gtest_repeat', '2', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/image.html', 'passes/text.html', 'passes/image.html', 'passes/text.html'])
+        self.assertEqual(tests_run, ['passes/image.html', 'passes/image.html', 'passes/text.html', 'passes/text.html'])
-    def test_gtest_repeat_overrides_iterations(self):
+    def test_gtest_repeat_overrides_repeat_each(self):
        tests_to_run = ['passes/image.html', 'passes/text.html']
-        tests_run = get_tests_run(['--iterations', '4', '--gtest_repeat', '2', '--order', 'natural'] + tests_to_run)
+        tests_run = get_tests_run(['--repeat-each', '4', '--gtest_repeat', '2', '--order', 'natural'] + tests_to_run)
-        self.assertEqual(tests_run, ['passes/image.html', 'passes/text.html', 'passes/image.html', 'passes/text.html'])
+        self.assertEqual(tests_run, ['passes/image.html', 'passes/image.html', 'passes/text.html', 'passes/text.html'])
    def test_ignore_flag(self):
        # Note that passes/image.html is expected to be run since we specified it directly.