Revert "Update json test results for webkit_layout_tests."

This reverts commit 53235652. Reason for revert: Build is no longer reporting failures correctly. Original change's description: > Update json test results for webkit_layout_tests. > > This brings the test results more into compliance with the > current test result standard in bit.ly/chromium-json-test-results-format. > > Notable changes: > - If a test is run multiple times, we explicitly return every > `actual` result. Previously, if the test produced the same result every > time, we'd only return a single value for `actual` > - If a test is skipped unexpectedly, that will be considered a regression > and an unexpected result. > - The test results will contain `is_unexpected`, `is_flaky`, and > `is_regression` fields for the matching conditions. > > Bug: 837047, 822078 > Change-Id: I4896e61469d3b576ea9e7dbbe16fac709f74b6b9 > Reviewed-on: https://chromium-review.googlesource.com/1103611 > Commit-Queue: Dirk Pranke <dpranke@chromium.org> > Reviewed-by: Robert Ma <robertma@chromium.org> > Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> > Cr-Commit-Position: refs/heads/master@{#569466} TBR=qyearsley@chromium.org,dpranke@chromium.org,seanmccullough@chromium.org,robertma@chromium.org Change-Id: Icf1882e8eea328b115a458afa6378b35bb11a638 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 837047, 822078 Reviewed-on: https://chromium-review.googlesource.com/1112178Reviewed-by: Dirk Pranke <dpranke@chromium.org> Commit-Queue: Dirk Pranke <dpranke@chromium.org> Cr-Commit-Position: refs/heads/master@{#569703}

Revert "Update json test results for webkit_layout_tests."
This reverts commit 53235652. Reason for revert: Build is no longer reporting failures correctly. Original change's description: > Update json test results for webkit_layout_tests. > > This brings the test results more into compliance with the > current test result standard in bit.ly/chromium-json-test-results-format. > > Notable changes: > - If a test is run multiple times, we explicitly return every > `actual` result. Previously, if the test produced the same result every > time, we'd only return a single value for `actual` > - If a test is skipped unexpectedly, that will be considered a regression > and an unexpected result. > - The test results will contain `is_unexpected`, `is_flaky`, and > `is_regression` fields for the matching conditions. > > Bug: 837047, 822078 > Change-Id: I4896e61469d3b576ea9e7dbbe16fac709f74b6b9 > Reviewed-on: https://chromium-review.googlesource.com/1103611 > Commit-Queue: Dirk Pranke <dpranke@chromium.org> > Reviewed-by: Robert Ma <robertma@chromium.org> > Reviewed-by: Quinten Yearsley <qyearsley@chromium.org> > Cr-Commit-Position: refs/heads/master@{#569466} TBR=qyearsley@chromium.org,dpranke@chromium.org,seanmccullough@chromium.org,robertma@chromium.org Change-Id: Icf1882e8eea328b115a458afa6378b35bb11a638 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 837047, 822078 Reviewed-on: https://chromium-review.googlesource.com/1112178Reviewed-by: Dirk Pranke <dpranke@chromium.org> Commit-Queue: Dirk Pranke <dpranke@chromium.org> Cr-Commit-Position: refs/heads/master@{#569703}
687768e9 · Dirk Pranke · Commit Bot · b069c93a · 687768e9 · 687768e9
Commit 687768e9 authored Jun 22, 2018 by Dirk Pranke Committed by Commit Bot Jun 22, 2018
5 changed files
--- a/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
@@ -936,7 +936,7 @@ class TestExpectations(object):
        MISSING: 'missing results',
    }

-    NON_TEST_OUTCOME_EXPECTATIONS = (REBASELINE, SLOW, WONTFIX)
+    NON_TEST_OUTCOME_EXPECTATIONS = (REBASELINE, SKIP, SLOW, WONTFIX)

    BUILD_TYPES = ('debug', 'release')

@@ -983,10 +983,10 @@ class TestExpectations(object):
            return True
        if result in (TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO) and FAIL in expected_results:
            return True
-        if result == SKIP and WONTFIX in expected_results:
-            return True
        if result == MISSING and test_needs_rebaselining:
            return True
+        if result == SKIP:
+            return True
        return False

    @staticmethod

--- a/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_expectations_unittest.py
@@ -129,8 +129,8 @@ class MiscTests(Base):
        self.assertEqual(TestExpectations.result_was_expected(FAIL, set([PASS]), test_needs_rebaselining=False), False)

        # test handling of SKIPped tests and results
-        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH]), test_needs_rebaselining=False), False)
-        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK]), test_needs_rebaselining=False), False)
+        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH]), test_needs_rebaselining=False), True)
+        self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK]), test_needs_rebaselining=False), True)

        # test handling of MISSING results and the REBASELINE specifier
        self.assertEqual(TestExpectations.result_was_expected(MISSING, set([PASS]), test_needs_rebaselining=True), True)

--- a/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_run_results.py
@@ -245,28 +245,16 @@ def summarize_results(port_obj, expectations, initial_results,
                    has_unexpected_pass = True
            else:
                has_expected = True
-
-        # TODO(crbug.com/855255): This code calls a test flaky if it has both
-        # expected and unexpected runs (NOT pass and failure); this is generally
-        # wrong (really it should just be if there are multiple kinds of results),
-        # but this works in the normal case because a test will only be retried
-        # if a result is unexpected, and if you get an expected result on the
-        # retry, then you did get multiple results. This fails if you get
-        # one kind of unexpected failure initially and another kind of
-        # unexpected failure on the retry (e.g., TIMEOUT CRASH), or if you
-        # explicitly run a test multiple times and get multiple expected results.
+        # A test is flaky if it has both expected and unexpected runs (NOT pass
+        # and failure).
        is_flaky = has_expected and has_unexpected

-        test_dict = {}
-        test_dict['expected'] = expected
-        test_dict['actual'] = ' '.join(actual)
-
-        # Fields below are optional. To avoid bloating the output results json
-        # too much, only add them when they are True or non-empty.
+        if len(set(actual)) == 1:
+            actual = [actual[0]]
+            actual_types = [actual_types[0]]

        if is_flaky:
            num_flaky += 1
-            test_dict['is_flaky'] = True
        elif all_pass or has_unexpected_pass:
            # We count two situations as a "pass":
            # 1. All test runs pass (which is obviously non-flaky, but does not
@@ -280,10 +268,19 @@ def summarize_results(port_obj, expectations, initial_results,
            num_passes += 1
            if not has_stderr and only_include_failing:
                continue
-        elif has_unexpected:
+        elif has_unexpected and result.type != test_expectations.SKIP:
            # Either no retries or all retries failed unexpectedly.
+            # TODO(robertma): When will there be unexpected skip? Do we really
+            # want to ignore them when counting regressions?
            num_regressions += 1

+        test_dict = {}
+
+        test_dict['expected'] = expected
+        test_dict['actual'] = ' '.join(actual)
+
+        # Fields below are optional. To avoid bloating the output results json
+        # too much, only add them when they are True or non-empty.

        rounded_run_time = round(initial_result.test_run_time, 1)
        if rounded_run_time:
@@ -321,15 +318,11 @@ def summarize_results(port_obj, expectations, initial_results,
                                                           port_obj.get_option('pixel_tests') or initial_result.reftest_type,
                                                           port_obj.get_option('enable_sanitizer'))

-        # Note: is_unexpected and is_regression are intended to reflect the
-        # *last* result. In the normal use case (stop retrying failures
-        # once they pass), this is equivalent to saying that all of the
-        # results were unexpected failures.
-        last_result = actual_types[-1]
-        if not is_expected(last_result):
+        # Note: is_unexpected is intended to capture the *last* result. In the
+        # normal use case (stop retrying failures once they pass), this is
+        # equivalent to checking if none of the results is expected.
+        if not any(is_expected(actual_result) for actual_result in actual_types):
            test_dict['is_unexpected'] = True
-            if last_result != test_expectations.PASS:
-                test_dict['is_regression'] = True

        if initial_result.has_repaint_overlay:
            test_dict['has_repaint_overlay'] = True

--- a/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
+++ b/third_party/blink/tools/blinkpy/web_tests/models/test_run_results_unittest.py
@@ -83,7 +83,7 @@ def summarized_results(port, expected, passing, flaky, only_include_failing=Fals
    elif passing:
        skipped_result = get_result('passes/skipped/skip.html')
        skipped_result.type = test_expectations.SKIP
-        initial_results.add(skipped_result, True, test_is_slow)
+        initial_results.add(skipped_result, expected, test_is_slow)

        initial_results.add(get_result('passes/text.html', run_time=1), expected, test_is_slow)
        initial_results.add(get_result('failures/expected/audio.html'), expected, test_is_slow)
@@ -288,6 +288,7 @@ class SummarizedResultsTest(unittest.TestCase):
        self.port._options.builder_name = 'dummy builder'
        summary = summarized_results(self.port, expected=False, passing=True, flaky=False)
        self.assertTrue(summary['tests']['passes']['text.html'])
+        self.assertTrue('is_unexpected' not in summary['tests']['passes']['text.html'])
        self.assertEqual(summary['num_passes'], 5)
        self.assertEqual(summary['num_regressions'], 0)
        self.assertEqual(summary['num_flaky'], 0)
@@ -349,6 +350,7 @@ class SummarizedResultsTest(unittest.TestCase):
    def test_summarized_results_flaky(self):
        summary = summarized_results(self.port, expected=False, passing=False, flaky=True)

+        self.assertTrue('is_unexpected' not in summary['tests']['failures']['expected']['crash.html'])
        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT AUDIO CRASH LEAK')

@@ -426,15 +428,15 @@ class SummarizedResultsTest(unittest.TestCase):

        self.assertTrue(summary['tests']['passes']['text.html']['is_unexpected'])
        self.assertEquals(summary['tests']['passes']['text.html']['expected'], 'PASS')
-        self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
+        self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT')

        self.assertTrue(summary['tests']['failures']['expected']['crash.html']['is_unexpected'])
        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
-        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
+        self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT')

        self.assertTrue(summary['tests']['failures']['expected']['leak.html']['is_unexpected'])
        self.assertEquals(summary['tests']['failures']['expected']['leak.html']['expected'], 'LEAK')
-        self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
+        self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT')

        self.assertTrue(summary['tests']['failures']['expected']['audio.html']['is_unexpected'])
        self.assertEquals(summary['tests']['failures']['expected']['audio.html']['expected'], 'FAIL')

--- a/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
+++ b/third_party/blink/tools/blinkpy/web_tests/run_webkit_tests_unittest.py
@@ -617,27 +617,16 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
                                     'failures/unexpected/text-image-checksum.html'],
                                    tests_included=True, host=host)
        self.assertEqual(details.exit_code, 2)
-        results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
-        self.assertEqual(
-            results['tests']['failures']['unexpected']['text-image-checksum.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'IMAGE+TEXT',
-                'is_unexpected': True,
-                'is_regression': True,
-                'text_mismatch': 'general text mismatch',
-            })
-        self.assertEqual(
-            results['tests']['failures']['unexpected']['missing_text.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'MISSING',
-                'is_unexpected': True,
-                'is_regression': True,
-                'is_missing_text': True,
-            })
-        self.assertEqual(results['num_regressions'], 2)
-        self.assertEqual(results['num_flaky'], 0)
+        json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
+        self.assertTrue(json_string.find(
+            '"text-image-checksum.html":{'
+            '"expected":"PASS",'
+            '"text_mismatch":"general text mismatch",'
+            '"actual":"IMAGE+TEXT","is_unexpected":true') != -1)
+        self.assertTrue(json_string.find(
+            '"missing_text.html":{"expected":"PASS","is_missing_text":true,"actual":"MISSING","is_unexpected":true') != -1)
+        self.assertTrue(json_string.find('"num_regressions":2') != -1)
+        self.assertTrue(json_string.find('"num_flaky":0') != -1)

    def test_different_failure_on_retry(self):
        # This tests that if a test fails two different ways -- both unexpected
@@ -670,8 +659,8 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
    def test_crash_with_stderr(self):
        host = MockHost()
        logging_run(['failures/unexpected/crash-with-stderr.html'], tests_included=True, host=host)
-        full_results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
-        self.assertEqual(full_results['tests']['failures']['unexpected']['crash-with-stderr.html']['has_stderr'], True)
+        self.assertTrue(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json').find(
+            '{"crash-with-stderr.html":{"expected":"PASS","actual":"CRASH","has_stderr":true,"is_unexpected":true') != -1)

    def test_no_image_failure_with_image_diff(self):
        host = MockHost()
@@ -839,15 +828,11 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
            host.filesystem.exists('/tmp/layout-test-results/retry_3/failures/unexpected/text-image-checksum-actual.png'))
        json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
        results = parse_full_results(json_string)
-        self.assertEqual(
-            results['tests']['failures']['unexpected']['text-image-checksum.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
-                'is_regression': True,
-                'is_unexpected': True,
-                'text_mismatch': 'general text mismatch',
-            })
+        self.assertEqual(results['tests']['failures']['unexpected']['text-image-checksum.html'],
+                         {'expected': 'PASS',
+                          'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
+                          'is_unexpected': True,
+                          'text_mismatch': 'general text mismatch'})
        self.assertFalse(results['pixel_tests_enabled'])
        self.assertTrue(details.enabled_pixel_tests_in_retry)

@@ -940,7 +925,7 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
        host = MockHost()
        logging_run(['--no-show-results', 'reftests/foo/'], tests_included=True, host=host)
        results = parse_full_results(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
-        self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING MISSING MISSING MISSING')
+        self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING')
        self.assertEqual(results['num_regressions'], 5)
        self.assertEqual(results['num_flaky'], 0)

@@ -1139,33 +1124,12 @@ class EndToEndTest(unittest.TestCase):
        self.assertTrue('multiple-mismatch-success.html' not in results['tests']['reftests']['foo'])
        self.assertTrue('multiple-both-success.html' not in results['tests']['reftests']['foo'])

-        self.assertEqual(
-            results['tests']['reftests']['foo']['multiple-match-failure.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'IMAGE IMAGE IMAGE IMAGE',
-                'reftest_type': ['=='],
-                'is_regression': True,
-                'is_unexpected': True,
-            })
-        self.assertEqual(
-            results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'IMAGE IMAGE IMAGE IMAGE',
-                'reftest_type': ['!='],
-                'is_regression': True,
-                'is_unexpected': True,
-            })
-        self.assertEqual(
-            results['tests']['reftests']['foo']['multiple-both-failure.html'],
-            {
-                'expected': 'PASS',
-                'actual': 'IMAGE IMAGE IMAGE IMAGE',
-                'reftest_type': ['==', '!='],
-                'is_regression': True,
-                'is_unexpected': True,
-            })
+        self.assertEqual(results['tests']['reftests']['foo']['multiple-match-failure.html'],
+                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['=='], 'is_unexpected': True})
+        self.assertEqual(results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
+                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['!='], 'is_unexpected': True})
+        self.assertEqual(results['tests']['reftests']['foo']['multiple-both-failure.html'],
+                         {'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['==', '!='], 'is_unexpected': True})


 class RebaselineTest(unittest.TestCase, StreamTestingMixin):