Commit 3c2a7b80 authored by ojan@chromium.org's avatar ojan@chromium.org

Improve flakiness logic for print-flaky-tests.

We only compare the 2nd attempt of a run to the tests expected results to
see if it was very flaky. Instead we should see if any of the runs had
an unexpected result. That way, any test that gets marked flaky will
stop getting printed out by this script as soon as the bots have cycled once.

Also, use TestExpectations.result_was_expected to determine if a result was
actually expected. The previous "in" check didn't work for cases like Text,
which are a subset of Failure instead of verbatim include in TestExpectations
as Text.

Finally, just use the length of the results entry to decide if something
is very flaky vs regular flaky. If there are <=2 entries, then it's not
very flaky. That catches the cases of a test that reliably fails as well
as the case of a test that fails the first attempt and runs as expected
on the second run (not necessarily passing on the second attempt, just
matching the listing in TestExpectations for that run).

R=joelo@chromium.org

Review URL: https://codereview.chromium.org/1289163002 .

git-svn-id: svn://svn.chromium.org/blink/trunk@200768 bbb929c8-8fbe-4397-9dbb-9b2b20218538
parent 2f5747ed
......@@ -36,7 +36,7 @@ import urllib
import urllib2
from webkitpy.layout_tests.port import builders
from webkitpy.layout_tests.models.test_expectations import TestExpectations
from webkitpy.layout_tests.models.test_expectations import TestExpectations, PASS
from webkitpy.layout_tests.models.test_expectations import TestExpectationLine
......@@ -152,8 +152,6 @@ class BotTestExpectations(object):
# The JSON can contain results for expectations, not just actual result types.
NON_RESULT_TYPES = ['S', 'X'] # SLOW, SKIP
PASS_EXPECTATION = 'PASS'
# specifiers arg is used in unittests to avoid the static dependency on builders.
def __init__(self, results_json, specifiers=None):
self.results_json = results_json
......@@ -167,7 +165,7 @@ class BotTestExpectations(object):
line.path = test_path # FIXME: Should this be normpath?
line.matching_tests = [test_path]
line.bugs = ["crbug.com/FILE_A_BUG_BEFORE_COMMITTING_THIS"]
line.expectations = sorted(map(self.results_json.expectation_for_type, flaky_types))
line.expectations = sorted(flaky_types)
line.specifiers = self.specifiers
return line
......@@ -178,7 +176,7 @@ class BotTestExpectations(object):
flaky_types = self._flaky_types_in_results(entry, only_ignore_very_flaky)
if len(flaky_types) <= 1:
continue
flakes_by_path[test_path] = sorted(map(self.results_json.expectation_for_type, flaky_types))
flakes_by_path[test_path] = sorted(flaky_types)
return flakes_by_path
def unexpected_results_by_path(self):
......@@ -247,12 +245,18 @@ class BotTestExpectations(object):
return results
def _result_to_enum(self, result):
return TestExpectations.EXPECTATIONS[result.lower()]
def _flaky_types_in_results(self, results_entry, only_ignore_very_flaky):
flaky_results = set()
latest_expectations = [self.PASS_EXPECTATION]
# Always include pass as an expected result. Passes will never turn the bot red.
# This fixes cases where the expectations have an implicit Pass, e.g. [ Slow ].
latest_expectations = [PASS]
if self.results_json.EXPECTATIONS_KEY in results_entry:
latest_expectations = results_entry[self.results_json.EXPECTATIONS_KEY].split(' ')
expectations_list = results_entry[self.results_json.EXPECTATIONS_KEY].split(' ')
latest_expectations += [self._result_to_enum(expectation) for expectation in expectations_list]
for result_item in results_entry[self.results_json.RESULTS_KEY]:
_, result_types_str = self.results_json.occurances_and_type_from_result_item(result_item)
......@@ -261,7 +265,7 @@ class BotTestExpectations(object):
for result_type in result_types_str:
# TODO(ojan): Remove this if-statement once crbug.com/514378 is fixed.
if result_type not in self.NON_RESULT_TYPES:
result_types.append(result_type)
result_types.append(self.results_json.expectation_for_type(result_type))
# It didn't flake if it didn't retry.
if len(result_types) <= 1:
......@@ -270,13 +274,29 @@ class BotTestExpectations(object):
# If the test ran as expected after only one retry, it's not very flaky.
# It's only very flaky if it failed the first run and the first retry
# and then ran as expected in one of the subsequent retries.
if only_ignore_very_flaky and self.results_json.expectation_for_type(result_types[1]) in latest_expectations:
# Once we get a pass, we don't retry again. So if the first retry passed,
# then there should only be 2 entries because we don't do another retry.
# TODO(ojan): Reenable this assert once crbug.com/514378 is fixed.
# assert(len(result_types) == 2)
# If there are only two entries, then that means it failed on the first
# try and ran as expected on the second because otherwise we'd have
# a third entry from the next try.
second_result_type_enum_value = self._result_to_enum(result_types[1])
if only_ignore_very_flaky and len(result_types) == 2:
continue
flaky_results = flaky_results.union(set(result_types))
has_unexpected_results = False
for result_type in result_types:
result_enum = self._result_to_enum(result_type)
# TODO(ojan): We really should be grabbing the expected results from the time
# of the run instead of looking at the latest expected results. That's a lot
# more complicated though. So far we've been looking at the aggregated
# results_small.json off test_results.appspot, which has all the information
# for the last 100 runs. In order to do this, we'd need to look at the
# individual runs' full_results.json, which would be slow and more complicated.
# The only thing we lose by not fixing this is that a test that was flaky
# and got fixed will still get printed out until 100 runs have passed.
if not TestExpectations.result_was_expected(result_enum, latest_expectations, test_needs_rebaselining=False):
has_unexpected_results = True
break
if has_unexpected_results:
flaky_results = flaky_results.union(set(result_types))
return flaky_results
......@@ -135,6 +135,12 @@ class BotTestExpectationsTest(unittest.TestCase):
'maybeflaky.html': self._results_from_string('FP'),
'notflakypass.html': self._results_from_string('P'),
'notflakyfail.html': self._results_from_string('F'),
# Even if there are no expected results, it's not very flaky if it didn't do mulitple retries.
# This accounts for the latest expectations not necessarily matching the expectations
# at the time of the given run.
'notverflakynoexpected.html': self._results_from_string('FT'),
# If the test is flaky, but marked as such, it shouldn't get printed out.
'notflakyexpected.html': {'results': [[2, 'FFFP']], 'expected': 'PASS FAIL'},
}
}
}
......@@ -144,6 +150,7 @@ class BotTestExpectationsTest(unittest.TestCase):
self._assert_expectations(test_data, {
'foo/veryflaky.html': sorted(["TEXT", "PASS"]),
'foo/notverflakynoexpected.html': ['TEXT', 'TIMEOUT'],
'foo/maybeflaky.html': sorted(["TEXT", "PASS"]),
}, only_ignore_very_flaky=False)
......
......@@ -26,6 +26,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import logging
import os
import optparse
from webkitpy.tool.multicommandtool import AbstractDeclarativeCommand
......@@ -35,15 +36,30 @@ from webkitpy.layout_tests.port import builders
from webkitpy.common.net import sheriff_calendar
_log = logging.getLogger(__name__)
class FlakyTests(AbstractDeclarativeCommand):
name = "print-flaky-tests"
help_text = "Print out flaky lines from the flakiness dashboard"
show_in_main_help = True
FLAKY_TEST_CONTENTS = '''%s
FLAKINESS_DASHBOARD_URL = 'https://test-results.appspot.com/dashboards/flakiness_dashboard.html#tests=%s'
BUG_TEMPLATE = 'https://code.google.com/p/chromium/issues/entry?owner=FILL_ME_IN&status=Assigned&labels=Pri-1,Cr-Blink,FlakyLayoutTest&summary=XXXXXXX%20is%20flaky&comment=XXXXXXX%20is%20flaky.%0A%0AIt%20failed%20twice%20and%20then%20passed%20on%20the%203rd%20or%204th%20retry.%20This%20is%20too%20flaky.%20The%20test%20will%20be%20skipped%20until%20it%27s%20fixed.%20If%20not%20fixed%20in%203%20months,%20it%20will%20be%20deleted%20or%20perma-skipped.%0A%0AIn%20the%20flakiness%20dashboard,%20the%20turquoise%20boxes%20are%20runs%20where%20the%20test%20failed%20and%20then%20passed%20on%20retry.%0A%0Ahttp://test-results.appspot.com/dashboards/flakiness_dashboard.html%23tests=XXXXXXX'
HEADER = '''Manually add bug numbers for these and then put the lines in LayoutTests/TestExpectations.
Look up the test in the flakiness dashboard first to see if the the platform
specifiers should be made more general.
Bug template:
%s
''' % BUG_TEMPLATE
Manually add bug numbers for these and then put the lines in LayoutTests/TestExpectations.
TODO(ojan): Write a script to file/assign the bugs then create a bot to do this automatically.
OUTPUT = '''%s
%s
Flakiness dashboard: %s
'''
def __init__(self):
......@@ -67,6 +83,13 @@ TODO(ojan): Write a script to file/assign the bugs then create a bot to do this
models.append(model)
expectations = factory.expectations_for_builder(builder_name)
# TODO(ojan): We should also skip bots that haven't uploaded recently,
# e.g. if they're >24h stale.
if not expectations:
_log.error("Can't load flakiness data for builder: %s" % builder_name)
continue
for line in expectations.expectation_lines(only_ignore_very_flaky=True):
# TODO(ojan): Find a way to merge specifiers instead of removing build types.
# We can't just union because some specifiers will change the meaning of others.
......@@ -93,5 +116,10 @@ TODO(ojan): Write a script to file/assign the bugs then create a bot to do this
fs = tool.filesystem
lines = filter(lambda line: fs.exists(fs.join(port.layout_tests_dir(), line.path)), lines)
print self.FLAKY_TEST_CONTENTS % TestExpectations.list_to_string(lines) # pylint: disable=E1601
test_names = [line.name for line in lines]
flakiness_dashbord_url = self.FLAKINESS_DASHBOARD_URL % ','.join(test_names)
expectations_string = TestExpectations.list_to_string(lines)
# pylint: disable=E1601
print self.OUTPUT % (self.HEADER, expectations_string, flakiness_dashbord_url)
......@@ -71,11 +71,9 @@ class FlakyTestsTest(CommandsTest):
tool = MockTool()
command.expectations_factory = FakeBotTestExpectationsFactory
options = MockOptions(upload=True)
expected_stdout = '''
Manually add bug numbers for these and then put the lines in LayoutTests/TestExpectations.
TODO(ojan): Write a script to file/assign the bugs then create a bot to do this automatically.
'''
expected_stdout = flakytests.FlakyTests.OUTPUT % (
flakytests.FlakyTests.HEADER,
'',
flakytests.FlakyTests.FLAKINESS_DASHBOARD_URL % '') + '\n'
self.assert_execute_outputs(command, options=options, tool=tool, expected_stdout=expected_stdout)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment