Commit 687768e9 authored by Dirk Pranke's avatar Dirk Pranke Committed by Commit Bot

Revert "Update json test results for webkit_layout_tests."

This reverts commit 53235652.

Reason for revert: Build is no longer reporting failures correctly.

Original change's description:
> Update json test results for webkit_layout_tests.
> 
> This brings the test results more into compliance with the
> current test result standard in bit.ly/chromium-json-test-results-format.
> 
> Notable changes:
> - If a test is run multiple times, we explicitly return every
>   `actual` result. Previously, if the test produced the same result every
>   time, we'd only return a single value for `actual`
> - If a test is skipped unexpectedly, that will be considered a regression
>   and an unexpected result.
> - The test results will contain `is_unexpected`, `is_flaky`, and
>   `is_regression` fields for the matching conditions.
> 
> Bug: 837047, 822078
> Change-Id: I4896e61469d3b576ea9e7dbbe16fac709f74b6b9
> Reviewed-on: https://chromium-review.googlesource.com/1103611
> Commit-Queue: Dirk Pranke <dpranke@chromium.org>
> Reviewed-by: Robert Ma <robertma@chromium.org>
> Reviewed-by: Quinten Yearsley <qyearsley@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#569466}

TBR=qyearsley@chromium.org,dpranke@chromium.org,seanmccullough@chromium.org,robertma@chromium.org

Change-Id: Icf1882e8eea328b115a458afa6378b35bb11a638
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 837047, 822078
Reviewed-on: https://chromium-review.googlesource.com/1112178Reviewed-by: default avatarDirk Pranke <dpranke@chromium.org>
Commit-Queue: Dirk Pranke <dpranke@chromium.org>
Cr-Commit-Position: refs/heads/master@{#569703}
parent b069c93a
......@@ -936,7 +936,7 @@ class TestExpectations(object):
MISSING: 'missing results',
}
NON_TEST_OUTCOME_EXPECTATIONS = (REBASELINE, SLOW, WONTFIX)
NON_TEST_OUTCOME_EXPECTATIONS = (REBASELINE, SKIP, SLOW, WONTFIX)
BUILD_TYPES = ('debug', 'release')
......@@ -983,10 +983,10 @@ class TestExpectations(object):
return True
if result in (TEXT, IMAGE, IMAGE_PLUS_TEXT, AUDIO) and FAIL in expected_results:
return True
if result == SKIP and WONTFIX in expected_results:
return True
if result == MISSING and test_needs_rebaselining:
return True
if result == SKIP:
return True
return False
@staticmethod
......
......@@ -129,8 +129,8 @@ class MiscTests(Base):
self.assertEqual(TestExpectations.result_was_expected(FAIL, set([PASS]), test_needs_rebaselining=False), False)
# test handling of SKIPped tests and results
self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH]), test_needs_rebaselining=False), False)
self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK]), test_needs_rebaselining=False), False)
self.assertEqual(TestExpectations.result_was_expected(SKIP, set([CRASH]), test_needs_rebaselining=False), True)
self.assertEqual(TestExpectations.result_was_expected(SKIP, set([LEAK]), test_needs_rebaselining=False), True)
# test handling of MISSING results and the REBASELINE specifier
self.assertEqual(TestExpectations.result_was_expected(MISSING, set([PASS]), test_needs_rebaselining=True), True)
......
......@@ -245,28 +245,16 @@ def summarize_results(port_obj, expectations, initial_results,
has_unexpected_pass = True
else:
has_expected = True
# TODO(crbug.com/855255): This code calls a test flaky if it has both
# expected and unexpected runs (NOT pass and failure); this is generally
# wrong (really it should just be if there are multiple kinds of results),
# but this works in the normal case because a test will only be retried
# if a result is unexpected, and if you get an expected result on the
# retry, then you did get multiple results. This fails if you get
# one kind of unexpected failure initially and another kind of
# unexpected failure on the retry (e.g., TIMEOUT CRASH), or if you
# explicitly run a test multiple times and get multiple expected results.
# A test is flaky if it has both expected and unexpected runs (NOT pass
# and failure).
is_flaky = has_expected and has_unexpected
test_dict = {}
test_dict['expected'] = expected
test_dict['actual'] = ' '.join(actual)
# Fields below are optional. To avoid bloating the output results json
# too much, only add them when they are True or non-empty.
if len(set(actual)) == 1:
actual = [actual[0]]
actual_types = [actual_types[0]]
if is_flaky:
num_flaky += 1
test_dict['is_flaky'] = True
elif all_pass or has_unexpected_pass:
# We count two situations as a "pass":
# 1. All test runs pass (which is obviously non-flaky, but does not
......@@ -280,10 +268,19 @@ def summarize_results(port_obj, expectations, initial_results,
num_passes += 1
if not has_stderr and only_include_failing:
continue
elif has_unexpected:
elif has_unexpected and result.type != test_expectations.SKIP:
# Either no retries or all retries failed unexpectedly.
# TODO(robertma): When will there be unexpected skip? Do we really
# want to ignore them when counting regressions?
num_regressions += 1
test_dict = {}
test_dict['expected'] = expected
test_dict['actual'] = ' '.join(actual)
# Fields below are optional. To avoid bloating the output results json
# too much, only add them when they are True or non-empty.
rounded_run_time = round(initial_result.test_run_time, 1)
if rounded_run_time:
......@@ -321,15 +318,11 @@ def summarize_results(port_obj, expectations, initial_results,
port_obj.get_option('pixel_tests') or initial_result.reftest_type,
port_obj.get_option('enable_sanitizer'))
# Note: is_unexpected and is_regression are intended to reflect the
# *last* result. In the normal use case (stop retrying failures
# once they pass), this is equivalent to saying that all of the
# results were unexpected failures.
last_result = actual_types[-1]
if not is_expected(last_result):
# Note: is_unexpected is intended to capture the *last* result. In the
# normal use case (stop retrying failures once they pass), this is
# equivalent to checking if none of the results is expected.
if not any(is_expected(actual_result) for actual_result in actual_types):
test_dict['is_unexpected'] = True
if last_result != test_expectations.PASS:
test_dict['is_regression'] = True
if initial_result.has_repaint_overlay:
test_dict['has_repaint_overlay'] = True
......
......@@ -83,7 +83,7 @@ def summarized_results(port, expected, passing, flaky, only_include_failing=Fals
elif passing:
skipped_result = get_result('passes/skipped/skip.html')
skipped_result.type = test_expectations.SKIP
initial_results.add(skipped_result, True, test_is_slow)
initial_results.add(skipped_result, expected, test_is_slow)
initial_results.add(get_result('passes/text.html', run_time=1), expected, test_is_slow)
initial_results.add(get_result('failures/expected/audio.html'), expected, test_is_slow)
......@@ -288,6 +288,7 @@ class SummarizedResultsTest(unittest.TestCase):
self.port._options.builder_name = 'dummy builder'
summary = summarized_results(self.port, expected=False, passing=True, flaky=False)
self.assertTrue(summary['tests']['passes']['text.html'])
self.assertTrue('is_unexpected' not in summary['tests']['passes']['text.html'])
self.assertEqual(summary['num_passes'], 5)
self.assertEqual(summary['num_regressions'], 0)
self.assertEqual(summary['num_flaky'], 0)
......@@ -349,6 +350,7 @@ class SummarizedResultsTest(unittest.TestCase):
def test_summarized_results_flaky(self):
summary = summarized_results(self.port, expected=False, passing=False, flaky=True)
self.assertTrue('is_unexpected' not in summary['tests']['failures']['expected']['crash.html'])
self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT AUDIO CRASH LEAK')
......@@ -426,15 +428,15 @@ class SummarizedResultsTest(unittest.TestCase):
self.assertTrue(summary['tests']['passes']['text.html']['is_unexpected'])
self.assertEquals(summary['tests']['passes']['text.html']['expected'], 'PASS')
self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
self.assertEquals(summary['tests']['passes']['text.html']['actual'], 'TIMEOUT')
self.assertTrue(summary['tests']['failures']['expected']['crash.html']['is_unexpected'])
self.assertEquals(summary['tests']['failures']['expected']['crash.html']['expected'], 'CRASH')
self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
self.assertEquals(summary['tests']['failures']['expected']['crash.html']['actual'], 'TIMEOUT')
self.assertTrue(summary['tests']['failures']['expected']['leak.html']['is_unexpected'])
self.assertEquals(summary['tests']['failures']['expected']['leak.html']['expected'], 'LEAK')
self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT TIMEOUT TIMEOUT TIMEOUT')
self.assertEquals(summary['tests']['failures']['expected']['leak.html']['actual'], 'TIMEOUT')
self.assertTrue(summary['tests']['failures']['expected']['audio.html']['is_unexpected'])
self.assertEquals(summary['tests']['failures']['expected']['audio.html']['expected'], 'FAIL')
......
......@@ -617,27 +617,16 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
'failures/unexpected/text-image-checksum.html'],
tests_included=True, host=host)
self.assertEqual(details.exit_code, 2)
results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
self.assertEqual(
results['tests']['failures']['unexpected']['text-image-checksum.html'],
{
'expected': 'PASS',
'actual': 'IMAGE+TEXT',
'is_unexpected': True,
'is_regression': True,
'text_mismatch': 'general text mismatch',
})
self.assertEqual(
results['tests']['failures']['unexpected']['missing_text.html'],
{
'expected': 'PASS',
'actual': 'MISSING',
'is_unexpected': True,
'is_regression': True,
'is_missing_text': True,
})
self.assertEqual(results['num_regressions'], 2)
self.assertEqual(results['num_flaky'], 0)
json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
self.assertTrue(json_string.find(
'"text-image-checksum.html":{'
'"expected":"PASS",'
'"text_mismatch":"general text mismatch",'
'"actual":"IMAGE+TEXT","is_unexpected":true') != -1)
self.assertTrue(json_string.find(
'"missing_text.html":{"expected":"PASS","is_missing_text":true,"actual":"MISSING","is_unexpected":true') != -1)
self.assertTrue(json_string.find('"num_regressions":2') != -1)
self.assertTrue(json_string.find('"num_flaky":0') != -1)
def test_different_failure_on_retry(self):
# This tests that if a test fails two different ways -- both unexpected
......@@ -670,8 +659,8 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
def test_crash_with_stderr(self):
host = MockHost()
logging_run(['failures/unexpected/crash-with-stderr.html'], tests_included=True, host=host)
full_results = json.loads(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
self.assertEqual(full_results['tests']['failures']['unexpected']['crash-with-stderr.html']['has_stderr'], True)
self.assertTrue(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json').find(
'{"crash-with-stderr.html":{"expected":"PASS","actual":"CRASH","has_stderr":true,"is_unexpected":true') != -1)
def test_no_image_failure_with_image_diff(self):
host = MockHost()
......@@ -839,15 +828,11 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
host.filesystem.exists('/tmp/layout-test-results/retry_3/failures/unexpected/text-image-checksum-actual.png'))
json_string = host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json')
results = parse_full_results(json_string)
self.assertEqual(
results['tests']['failures']['unexpected']['text-image-checksum.html'],
{
'expected': 'PASS',
'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
'is_regression': True,
'is_unexpected': True,
'text_mismatch': 'general text mismatch',
})
self.assertEqual(results['tests']['failures']['unexpected']['text-image-checksum.html'],
{'expected': 'PASS',
'actual': 'TEXT IMAGE+TEXT IMAGE+TEXT IMAGE+TEXT',
'is_unexpected': True,
'text_mismatch': 'general text mismatch'})
self.assertFalse(results['pixel_tests_enabled'])
self.assertTrue(details.enabled_pixel_tests_in_retry)
......@@ -940,7 +925,7 @@ class RunTest(unittest.TestCase, StreamTestingMixin):
host = MockHost()
logging_run(['--no-show-results', 'reftests/foo/'], tests_included=True, host=host)
results = parse_full_results(host.filesystem.read_text_file('/tmp/layout-test-results/full_results.json'))
self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING MISSING MISSING MISSING')
self.assertEqual(results['tests']['reftests']['foo']['unlistedtest.html']['actual'], 'MISSING')
self.assertEqual(results['num_regressions'], 5)
self.assertEqual(results['num_flaky'], 0)
......@@ -1139,33 +1124,12 @@ class EndToEndTest(unittest.TestCase):
self.assertTrue('multiple-mismatch-success.html' not in results['tests']['reftests']['foo'])
self.assertTrue('multiple-both-success.html' not in results['tests']['reftests']['foo'])
self.assertEqual(
results['tests']['reftests']['foo']['multiple-match-failure.html'],
{
'expected': 'PASS',
'actual': 'IMAGE IMAGE IMAGE IMAGE',
'reftest_type': ['=='],
'is_regression': True,
'is_unexpected': True,
})
self.assertEqual(
results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
{
'expected': 'PASS',
'actual': 'IMAGE IMAGE IMAGE IMAGE',
'reftest_type': ['!='],
'is_regression': True,
'is_unexpected': True,
})
self.assertEqual(
results['tests']['reftests']['foo']['multiple-both-failure.html'],
{
'expected': 'PASS',
'actual': 'IMAGE IMAGE IMAGE IMAGE',
'reftest_type': ['==', '!='],
'is_regression': True,
'is_unexpected': True,
})
self.assertEqual(results['tests']['reftests']['foo']['multiple-match-failure.html'],
{'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['=='], 'is_unexpected': True})
self.assertEqual(results['tests']['reftests']['foo']['multiple-mismatch-failure.html'],
{'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['!='], 'is_unexpected': True})
self.assertEqual(results['tests']['reftests']['foo']['multiple-both-failure.html'],
{'expected': 'PASS', 'actual': 'IMAGE', 'reftest_type': ['==', '!='], 'is_unexpected': True})
class RebaselineTest(unittest.TestCase, StreamTestingMixin):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment