Commit bdfab875 authored by robertocn's avatar robertocn Committed by Commit bot

Obtain confidence score based off last known good and first known bad revision results.

BUG=448817

Review URL: https://codereview.chromium.org/850013004

Cr-Commit-Position: refs/heads/master@{#314257}
parent 36dcef3d
......@@ -86,6 +86,10 @@ MAX_LINUX_BUILD_TIME = 14400
# The confidence percentage we require to consider the initial range a
# regression based on the test results of the inital good and bad revisions.
REGRESSION_CONFIDENCE = 80
# How many times to repeat the test on the last known good and first known bad
# revisions in order to assess a more accurate confidence score in the
# regression culprit.
BORDER_REVISIONS_EXTRA_RUNS = 2
# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
......@@ -1272,7 +1276,7 @@ class BisectPerformanceMetrics(object):
def RunPerformanceTestAndParseResults(
self, command_to_run, metric, reset_on_first_run=False,
upload_on_last_run=False, results_label=None):
upload_on_last_run=False, results_label=None, test_run_multiplier=1):
"""Runs a performance test on the current revision and parses the results.
Args:
......@@ -1285,6 +1289,8 @@ class BisectPerformanceMetrics(object):
results_label: A value for the option flag --results-label.
The arguments reset_on_first_run, upload_on_last_run and results_label
are all ignored if the test is not a Telemetry test.
test_run_multiplier: Factor by which to multiply the number of test runs
and the timeout period specified in self.opts.
Returns:
(values dict, 0) if --debug_ignore_perf_test was passed.
......@@ -1326,7 +1332,8 @@ class BisectPerformanceMetrics(object):
metric_values = []
output_of_all_runs = ''
for i in xrange(self.opts.repeat_test_count):
repeat_count = self.opts.repeat_test_count * test_run_multiplier
for i in xrange(repeat_count):
# Can ignore the return code since if the tests fail, it won't return 0.
current_args = copy.copy(args)
if is_telemetry:
......@@ -1368,7 +1375,8 @@ class BisectPerformanceMetrics(object):
metric_values.append(return_code)
elapsed_minutes = (time.time() - start_time) / 60.0
if elapsed_minutes >= self.opts.max_time_minutes:
time_limit = self.opts.max_time_minutes * test_run_multiplier
if elapsed_minutes >= time_limit:
break
if metric and len(metric_values) == 0:
......@@ -1473,7 +1481,8 @@ class BisectPerformanceMetrics(object):
return False
def RunTest(self, revision, depot, command, metric, skippable=False,
skip_sync=False, create_patch=False, force_build=False):
skip_sync=False, create_patch=False, force_build=False,
test_run_multiplier=1):
"""Performs a full sync/build/run of the specified revision.
Args:
......@@ -1484,6 +1493,8 @@ class BisectPerformanceMetrics(object):
skip_sync: Skip the sync step.
create_patch: Create a patch with any locally modified files.
force_build: Force a local build.
test_run_multiplier: Factor by which to multiply the given number of runs
and the set timeout period.
Returns:
On success, a tuple containing the results of the performance test.
......@@ -1525,7 +1536,8 @@ class BisectPerformanceMetrics(object):
command = self.GetCompatibleCommand(command, revision, depot)
# Run the command and get the results.
results = self.RunPerformanceTestAndParseResults(command, metric)
results = self.RunPerformanceTestAndParseResults(
command, metric, test_run_multiplier=test_run_multiplier)
# Restore build output directory once the tests are done, to avoid
# any discrepancies.
......@@ -2439,6 +2451,9 @@ class BisectPerformanceMetrics(object):
self.printer.PrintPartialResults(bisect_state)
bisect_utils.OutputAnnotationStepClosed()
self._ConfidenceExtraTestRuns(min_revision_state, max_revision_state,
command_to_run, metric)
results = BisectResults(bisect_state, self.depot_registry, self.opts,
self.warnings)
......@@ -2452,6 +2467,21 @@ class BisectPerformanceMetrics(object):
'[%s..%s]' % (good_revision, bad_revision))
return BisectResults(error=error)
def _ConfidenceExtraTestRuns(self, good_state, bad_state, command_to_run,
metric):
if (bool(good_state.passed) != bool(bad_state.passed)
and good_state.passed not in ('Skipped', 'Build Failed')
and bad_state.passed not in ('Skipped', 'Build Failed')):
for state in (good_state, bad_state):
run_results = self.RunTest(
state.revision,
state.depot,
command_to_run,
metric,
test_run_multiplier=BORDER_REVISIONS_EXTRA_RUNS)
# Is extend the right thing to do here?
state.value['values'].extend(run_results[0]['values'])
def _IsPlatformSupported():
"""Checks that this platform and build system are supported.
......
......@@ -230,6 +230,20 @@ class BisectResults(object):
@staticmethod
def FindBreakingRevRange(revision_states):
"""Finds the last known good and first known bad revisions.
Note that since revision_states is expected to be in reverse chronological
order, the last known good revision is the first revision in the list that
has the passed property set to 1, therefore the name
`first_working_revision`. The inverse applies to `last_broken_revision`.
Args:
revision_states: A list of RevisionState instances.
Returns:
A tuple containing the two revision states at the border. (Last
known good and first known bad.)
"""
first_working_revision = None
last_broken_revision = None
......@@ -287,10 +301,13 @@ class BisectResults(object):
[working_mean, broken_mean]) /
max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
# Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the
# overall graph is.
confidence_params = (sum(working_means, []), sum(broken_means, []))
# Give a "confidence" in the bisect. Currently, we consider the values of
# only the revisions at the breaking range (last known good and first known
# bad) see the note in the docstring for FindBreakingRange.
confidence_params = (
sum([first_working_rev.value['values']], []),
sum([last_broken_rev.value['values']], [])
)
confidence = cls.ConfidenceScore(*confidence_params)
bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs
......
......@@ -158,6 +158,58 @@ class BisectResultsTest(unittest.TestCase):
self.assertEqual(revision_states[2], results.first_working_revision)
self.assertEqual(revision_states[1], results.last_broken_revision)
def testCorrectlyFindsBreakingRangeNotInOrder(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[1].passed = 1
revision_states[2].passed = 0
revision_states[3].passed = 1
revision_states[4].passed = 1
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(revision_states[1], results.first_working_revision)
self.assertEqual(revision_states[2], results.last_broken_revision)
def testCorrectlyFindsBreakingRangeIncompleteBisect(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[1].passed = 0
revision_states[2].passed = '?'
revision_states[3].passed = 1
revision_states[4].passed = 1
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(revision_states[3], results.first_working_revision)
self.assertEqual(revision_states[1], results.last_broken_revision)
def testFindBreakingRangeAllPassed(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 1
revision_states[1].passed = 1
revision_states[2].passed = 1
revision_states[3].passed = 1
revision_states[4].passed = 1
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(revision_states[0], results.first_working_revision)
self.assertIsNone(results.last_broken_revision)
def testFindBreakingRangeNonePassed(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[1].passed = 0
revision_states[2].passed = 0
revision_states[3].passed = 0
revision_states[4].passed = 0
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertIsNone(results.first_working_revision)
self.assertEqual(revision_states[4], results.last_broken_revision)
def testCorrectlyComputesRegressionStatistics(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
......@@ -227,9 +279,9 @@ class BisectResultsTest(unittest.TestCase):
def testWarningForTooLowConfidence(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[2].value = {'values': [95, 100, 90]}
revision_states[3].value = {'values': [95, 100, 90]}
revision_states[4].value = {'values': [95, 100, 90]}
revision_states[2].value = {'values': [95, 90, 90]}
revision_states[3].value = {'values': [95, 90, 90]}
revision_states[4].value = {'values': [95, 90, 90]}
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertGreater(results.confidence, 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment