Commit cdb492db authored by sergiyb's avatar sergiyb Committed by Commit bot

Refactored bisect results dicts into a separate class

R=qyearsley@chromium.org

Review URL: https://codereview.chromium.org/554283003

Cr-Commit-Position: refs/heads/master@{#297006}
parent 893b8d6d
......@@ -36,7 +36,6 @@ import copy
import datetime
import errno
import hashlib
import math
import optparse
import os
import re
......@@ -50,12 +49,12 @@ import zipfile
sys.path.append(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'telemetry'))
from bisect_results import BisectResults
import bisect_utils
import builder
import math_utils
import request_build
import source_control as source_control_module
import ttest
from telemetry.util import cloud_storage
# Below is the map of "depot" names to information about each depot. Each depot
......@@ -273,42 +272,6 @@ def _AddAdditionalDepotInfo(depot_info):
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
def ConfidenceScore(good_results_lists, bad_results_lists):
"""Calculates a confidence score.
This score is a percentage which represents our degree of confidence in the
proposition that the good results and bad results are distinct groups, and
their differences aren't due to chance alone.
Args:
good_results_lists: A list of lists of "good" result numbers.
bad_results_lists: A list of lists of "bad" result numbers.
Returns:
A number in the range [0, 100].
"""
# If there's only one item in either list, this means only one revision was
# classified good or bad; this isn't good enough evidence to make a decision.
# If an empty list was passed, that also implies zero confidence.
if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:
return 0.0
# Flatten the lists of results lists.
sample1 = sum(good_results_lists, [])
sample2 = sum(bad_results_lists, [])
# If there were only empty lists in either of the lists (this is unexpected
# and normally shouldn't happen), then we also want to return 0.
if not sample1 or not sample2:
return 0.0
# The p-value is approximately the probability of obtaining the given set
# of good and bad values just by chance.
_, _, p_value = ttest.WelchsTTest(sample1, sample2)
return 100.0 * (1.0 - p_value)
def GetSHA1HexDigest(contents):
"""Returns SHA1 hex digest of the given string."""
return hashlib.sha1(contents).hexdigest()
......@@ -865,44 +828,36 @@ def _PrintStepTime(revision_data_sorted):
seconds=int(step_perf_time_avg))
def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good):
"""Compiles a list of other possible regressions from the revision data.
class DepotDirectoryRegistry(object):
Args:
revision_data_sorted: Sorted list of (revision, revision data) pairs.
bad_greater_than_good: Whether the result value at the "bad" revision is
numerically greater than the result value at the "good" revision.
def __init__(self, src_cwd):
self.depot_cwd = {}
for depot in DEPOT_NAMES:
# The working directory of each depot is just the path to the depot, but
# since we're already in 'src', we can skip that part.
path_in_src = DEPOT_DEPS_NAME[depot]['src'][4:]
self.AddDepot(depot, os.path.join(src_cwd, path_in_src))
Returns:
A list of [current_rev, previous_rev, confidence] for other places where
there may have been a regression.
self.AddDepot('chromium', src_cwd)
self.AddDepot('cros', os.path.join(src_cwd, 'tools', 'cros'))
def AddDepot(self, depot_name, depot_dir):
self.depot_cwd[depot_name] = depot_dir
def GetDepotDir(self, depot_name):
if depot_name in self.depot_cwd:
return self.depot_cwd[depot_name]
else:
assert False, ('Unknown depot [ %s ] encountered. Possibly a new one '
'was added without proper support?' % depot_name)
def ChangeToDepotDir(self, depot_name):
"""Given a depot, changes to the appropriate working directory.
Args:
depot_name: The name of the depot (see DEPOT_NAMES).
"""
other_regressions = []
previous_values = []
previous_id = None
for current_id, current_data in revision_data_sorted:
current_values = current_data['value']
if current_values:
current_values = current_values['values']
if previous_values:
confidence = ConfidenceScore(previous_values, [current_values])
mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
mean_of_current_runs = math_utils.Mean(current_values)
# Check that the potential regression is in the same direction as
# the overall regression. If the mean of the previous runs < the
# mean of the current runs, this local regression is in same
# direction.
prev_less_than_current = mean_of_prev_runs < mean_of_current_runs
is_same_direction = (prev_less_than_current if
bad_greater_than_good else not prev_less_than_current)
# Only report potential regressions with high confidence.
if is_same_direction and confidence > 50:
other_regressions.append([current_id, previous_id, confidence])
previous_values.append(current_values)
previous_id = current_id
return other_regressions
os.chdir(self.GetDepotDir(depot_name))
class BisectPerformanceMetrics(object):
......@@ -922,18 +877,12 @@ class BisectPerformanceMetrics(object):
# where the bisect script is running from. Instead, it's the src/ directory
# inside the bisect/ directory which is created before running.
self.src_cwd = os.getcwd()
self.cros_cwd = os.path.join(os.getcwd(), '..', 'cros')
self.depot_cwd = {}
self.depot_registry = DepotDirectoryRegistry(self.src_cwd)
self.cleanup_commands = []
self.warnings = []
self.builder = builder.Builder.FromOpts(opts)
for depot in DEPOT_NAMES:
# The working directory of each depot is just the path to the depot, but
# since we're already in 'src', we can skip that part.
self.depot_cwd[depot] = os.path.join(
self.src_cwd, DEPOT_DEPS_NAME[depot]['src'][4:])
def PerformCleanup(self):
"""Performs cleanup when script is finished."""
os.chdir(self.src_cwd)
......@@ -954,7 +903,7 @@ class BisectPerformanceMetrics(object):
revision_range_end = bad_revision
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory('cros')
self.depot_registry.ChangeToDepotDir('cros')
# Print the commit timestamps for every commit in the revision time
# range. We'll sort them and bisect by that. There is a remote chance that
......@@ -974,7 +923,7 @@ class BisectPerformanceMetrics(object):
[int(o) for o in output.split('\n') if bisect_utils.IsStringInt(o)]))
revision_work_list = sorted(revision_work_list, reverse=True)
else:
cwd = self._GetDepotDirectory(depot)
cwd = self.depot_registry.GetDepotDir(depot)
revision_work_list = self.source_control.GetRevisionList(bad_revision,
good_revision, cwd=cwd)
......@@ -994,8 +943,8 @@ class BisectPerformanceMetrics(object):
# As of 01/24/2014, V8 trunk descriptions are formatted:
# "Version 3.X.Y (based on bleeding_edge revision rZ)"
# So we can just try parsing that out first and fall back to the old way.
v8_dir = self._GetDepotDirectory('v8')
v8_bleeding_edge_dir = self._GetDepotDirectory('v8_bleeding_edge')
v8_dir = self.depot_registry.GetDepotDir('v8')
v8_bleeding_edge_dir = self.depot_registry.GetDepotDir('v8_bleeding_edge')
revision_info = self.source_control.QueryRevisionInfo(revision,
cwd=v8_dir)
......@@ -1035,7 +984,7 @@ class BisectPerformanceMetrics(object):
return None
def _GetNearestV8BleedingEdgeFromTrunk(self, revision, search_forward=True):
cwd = self._GetDepotDirectory('v8')
cwd = self.depot_registry.GetDepotDir('v8')
cmd = ['log', '--format=%ct', '-1', revision]
output = bisect_utils.CheckRunGit(cmd, cwd=cwd)
commit_time = int(output)
......@@ -1097,8 +1046,8 @@ class BisectPerformanceMetrics(object):
depot_data_src = depot_data.get('src') or depot_data.get('src_old')
src_dir = deps_data.get(depot_data_src)
if src_dir:
self.depot_cwd[depot_name] = os.path.join(
self.src_cwd, depot_data_src[4:])
self.depot_registry.AddDepot(depot_name, os.path.join(
self.src_cwd, depot_data_src[4:]))
re_results = rxp.search(src_dir)
if re_results:
results[depot_name] = re_results.group('revision')
......@@ -1135,7 +1084,7 @@ class BisectPerformanceMetrics(object):
A dict in the format {depot: revision} if successful, otherwise None.
"""
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory(depot)
self.depot_registry.ChangeToDepotDir(depot)
results = {}
......@@ -1176,7 +1125,7 @@ class BisectPerformanceMetrics(object):
self.warnings.append(warningText)
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory('chromium')
self.depot_registry.ChangeToDepotDir('chromium')
cmd = ['log', '-1', '--format=%H',
'--author=chrome-release@google.com',
'--grep=to %s' % version, 'origin/master']
......@@ -1414,7 +1363,7 @@ class BisectPerformanceMetrics(object):
new_data = None
if re.search(deps_revision, deps_contents):
commit_position = self.source_control.GetCommitPosition(
git_revision, self._GetDepotDirectory(depot))
git_revision, self.depot_registry.GetDepotDir(depot))
if not commit_position:
print 'Could not determine commit position for %s' % git_revision
return None
......@@ -1778,7 +1727,7 @@ class BisectPerformanceMetrics(object):
commit_position = self.source_control.GetCommitPosition(revision)
for d in DEPOT_DEPS_NAME[depot]['depends']:
self.ChangeToDepotWorkingDirectory(d)
self.depot_registry.ChangeToDepotDir(d)
dependant_rev = self.source_control.ResolveToRevision(
commit_position, d, DEPOT_DEPS_NAME, -1000)
......@@ -1789,7 +1738,7 @@ class BisectPerformanceMetrics(object):
num_resolved = len(revisions_to_sync)
num_needed = len(DEPOT_DEPS_NAME[depot]['depends'])
self.ChangeToDepotWorkingDirectory(depot)
self.depot_registry.ChangeToDepotDir(depot)
if not ((num_resolved - 1) == num_needed):
return None
......@@ -1815,7 +1764,7 @@ class BisectPerformanceMetrics(object):
True if successful.
"""
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory('cros')
self.depot_registry.ChangeToDepotDir('cros')
cmd = [bisect_utils.CROS_SDK_PATH, '--delete']
return_code = bisect_utils.RunProcess(cmd)
os.chdir(cwd)
......@@ -1828,7 +1777,7 @@ class BisectPerformanceMetrics(object):
True if successful.
"""
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory('cros')
self.depot_registry.ChangeToDepotDir('cros')
cmd = [bisect_utils.CROS_SDK_PATH, '--create']
return_code = bisect_utils.RunProcess(cmd)
os.chdir(cwd)
......@@ -1990,7 +1939,7 @@ class BisectPerformanceMetrics(object):
True if successful, False otherwise.
"""
for depot, revision in revisions_to_sync:
self.ChangeToDepotWorkingDirectory(depot)
self.depot_registry.ChangeToDepotDir(depot)
if sync_client:
self.PerformPreBuildCleanup()
......@@ -2031,25 +1980,6 @@ class BisectPerformanceMetrics(object):
return dist_to_good_value < dist_to_bad_value
def _GetDepotDirectory(self, depot_name):
if depot_name == 'chromium':
return self.src_cwd
elif depot_name == 'cros':
return self.cros_cwd
elif depot_name in DEPOT_NAMES:
return self.depot_cwd[depot_name]
else:
assert False, ('Unknown depot [ %s ] encountered. Possibly a new one '
'was added without proper support?' % depot_name)
def ChangeToDepotWorkingDirectory(self, depot_name):
"""Given a depot, changes to the appropriate working directory.
Args:
depot_name: The name of the depot (see DEPOT_NAMES).
"""
os.chdir(self._GetDepotDirectory(depot_name))
def _FillInV8BleedingEdgeInfo(self, min_revision_data, max_revision_data):
r1 = self._GetNearestV8BleedingEdgeFromTrunk(min_revision_data['revision'],
search_forward=True)
......@@ -2125,7 +2055,7 @@ class BisectPerformanceMetrics(object):
"""
# Change into working directory of external library to run
# subsequent commands.
self.ChangeToDepotWorkingDirectory(current_depot)
self.depot_registry.ChangeToDepotDir(current_depot)
# V8 (and possibly others) is merged in periodically. Bisecting
# this directory directly won't give much good info.
......@@ -2139,7 +2069,7 @@ class BisectPerformanceMetrics(object):
return []
if current_depot == 'v8_bleeding_edge':
self.ChangeToDepotWorkingDirectory('chromium')
self.depot_registry.ChangeToDepotDir('chromium')
shutil.move('v8', 'v8.bak')
shutil.move('v8_bleeding_edge', 'v8')
......@@ -2147,16 +2077,17 @@ class BisectPerformanceMetrics(object):
self.cleanup_commands.append(['mv', 'v8', 'v8_bleeding_edge'])
self.cleanup_commands.append(['mv', 'v8.bak', 'v8'])
self.depot_cwd['v8_bleeding_edge'] = os.path.join(self.src_cwd, 'v8')
self.depot_cwd['v8'] = os.path.join(self.src_cwd, 'v8.bak')
self.depot_registry.AddDepot('v8_bleeding_edge',
os.path.join(self.src_cwd, 'v8'))
self.depot_registry.AddDepot('v8', os.path.join(self.src_cwd, 'v8.bak'))
self.ChangeToDepotWorkingDirectory(current_depot)
self.depot_registry.ChangeToDepotDir(current_depot)
depot_revision_list = self.GetRevisionList(current_depot,
end_revision,
start_revision)
self.ChangeToDepotWorkingDirectory('chromium')
self.depot_registry.ChangeToDepotDir('chromium')
return depot_revision_list
......@@ -2262,7 +2193,7 @@ class BisectPerformanceMetrics(object):
True if the revisions are in the proper order (good earlier than bad).
"""
if self.source_control.IsGit() and target_depot != 'cros':
cwd = self._GetDepotDirectory(target_depot)
cwd = self.depot_registry.GetDepotDir(target_depot)
cmd = ['log', '--format=%ct', '-1', good_revision]
output = bisect_utils.CheckRunGit(cmd, cwd=cwd)
......@@ -2329,41 +2260,9 @@ class BisectPerformanceMetrics(object):
metric: The performance metric to monitor.
Returns:
A dict with 2 members, 'revision_data' and 'error'. On success,
'revision_data' will contain a dict mapping revision ids to
data about that revision. Each piece of revision data consists of a
dict with the following keys:
'passed': Represents whether the performance test was successful at
that revision. Possible values include: 1 (passed), 0 (failed),
'?' (skipped), 'F' (build failed).
'depot': The depot that this revision is from (i.e. WebKit)
'external': If the revision is a 'src' revision, 'external' contains
the revisions of each of the external libraries.
'sort': A sort value for sorting the dict in order of commits.
For example:
{
'error':None,
'revision_data':
{
'CL #1':
{
'passed': False,
'depot': 'chromium',
'external': None,
'sort': 0
}
}
}
If an error occurred, the 'error' field will contain the message and
'revision_data' will be empty.
A BisectResults object.
"""
results = {
'revision_data' : {},
'error' : None,
}
results = BisectResults(self.depot_registry, self.source_control)
# Choose depot to bisect first
target_depot = 'chromium'
......@@ -2373,7 +2272,7 @@ class BisectPerformanceMetrics(object):
target_depot = 'android-chrome'
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory(target_depot)
self.depot_registry.ChangeToDepotDir(target_depot)
# If they passed SVN revisions, we can try match them to git SHA1 hashes.
bad_revision = self.source_control.ResolveToRevision(
......@@ -2383,17 +2282,17 @@ class BisectPerformanceMetrics(object):
os.chdir(cwd)
if bad_revision is None:
results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in
results.error = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in
return results
if good_revision is None:
results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in
results.error = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in
return results
# Check that they didn't accidentally swap good and bad revisions.
if not self.CheckIfRevisionsInProperOrder(
target_depot, good_revision, bad_revision):
results['error'] = ('bad_revision < good_revision, did you swap these '
results.error = ('bad_revision < good_revision, did you swap these '
'by mistake?')
return results
bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange(
......@@ -2403,7 +2302,7 @@ class BisectPerformanceMetrics(object):
cannot_bisect = self.CanPerformBisect(good_revision, bad_revision)
if cannot_bisect:
results['error'] = cannot_bisect.get('error')
results.error = cannot_bisect.get('error')
return results
print 'Gathering revision range for bisection.'
......@@ -2418,7 +2317,7 @@ class BisectPerformanceMetrics(object):
# revision_data will store information about a revision such as the
# depot it came from, the webkit/V8 revision at that time,
# performance timing, build state, etc...
revision_data = results['revision_data']
revision_data = results.revision_data
# revision_list is the list we're binary searching through at the moment.
revision_list = []
......@@ -2461,14 +2360,14 @@ class BisectPerformanceMetrics(object):
bisect_utils.OutputAnnotationStepClosed()
if bad_results[1]:
results['error'] = ('An error occurred while building and running '
results.error = ('An error occurred while building and running '
'the \'bad\' reference value. The bisect cannot continue without '
'a working \'bad\' revision to start from.\n\nError: %s' %
bad_results[0])
return results
if good_results[1]:
results['error'] = ('An error occurred while building and running '
results.error = ('An error occurred while building and running '
'the \'good\' reference value. The bisect cannot continue without '
'a working \'good\' revision to start from.\n\nError: %s' %
good_results[0])
......@@ -2536,7 +2435,7 @@ class BisectPerformanceMetrics(object):
previous_revision)
if not new_revision_list:
results['error'] = ('An error occurred attempting to retrieve '
results.error = ('An error occurred attempting to retrieve '
'revision range: [%s..%s]' %
(earliest_revision, latest_revision))
return results
......@@ -2568,7 +2467,7 @@ class BisectPerformanceMetrics(object):
next_revision_data = revision_data[next_revision_id]
next_revision_depot = next_revision_data['depot']
self.ChangeToDepotWorkingDirectory(next_revision_depot)
self.depot_registry.ChangeToDepotDir(next_revision_depot)
if self.opts.output_buildbot_annotations:
step_name = 'Working on [%s]' % next_revision_id
......@@ -2617,18 +2516,14 @@ class BisectPerformanceMetrics(object):
bisect_utils.OutputAnnotationStepClosed()
else:
# Weren't able to sync and retrieve the revision range.
results['error'] = ('An error occurred attempting to retrieve revision '
results.error = ('An error occurred attempting to retrieve revision '
'range: [%s..%s]' % (good_revision, bad_revision))
return results
def _PrintPartialResults(self, results_dict):
revision_data = results_dict['revision_data']
revision_data_sorted = sorted(revision_data.iteritems(),
key = lambda x: x[1]['sort'])
results_dict = self._GetResultsDict(revision_data, revision_data_sorted)
self._PrintTestedCommitsTable(revision_data_sorted,
def _PrintPartialResults(self, results):
results_dict = results.GetResultsDict()
self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
results_dict['first_working_revision'],
results_dict['last_broken_revision'],
100, final_step=False)
......@@ -2648,7 +2543,7 @@ class BisectPerformanceMetrics(object):
def _GetViewVCLinkFromDepotAndHash(self, cl, depot):
info = self.source_control.QueryRevisionInfo(cl,
self._GetDepotDirectory(depot))
self.depot_registry.GetDepotDir(depot))
if depot and DEPOT_DEPS_NAME[depot].has_key('viewvc'):
try:
# Format is "git-svn-id: svn://....@123456 <other data>"
......@@ -2801,125 +2696,6 @@ class BisectPerformanceMetrics(object):
previous_data['depot'], previous_link)
print
def _GetResultsDict(self, revision_data, revision_data_sorted):
# Find range where it possibly broke.
first_working_revision = None
first_working_revision_index = -1
last_broken_revision = None
last_broken_revision_index = -1
culprit_revisions = []
other_regressions = []
regression_size = 0.0
regression_std_err = 0.0
confidence = 0.0
for i in xrange(len(revision_data_sorted)):
k, v = revision_data_sorted[i]
if v['passed'] == 1:
if not first_working_revision:
first_working_revision = k
first_working_revision_index = i
if not v['passed']:
last_broken_revision = k
last_broken_revision_index = i
if last_broken_revision != None and first_working_revision != None:
broken_means = []
for i in xrange(0, last_broken_revision_index + 1):
if revision_data_sorted[i][1]['value']:
broken_means.append(revision_data_sorted[i][1]['value']['values'])
working_means = []
for i in xrange(first_working_revision_index, len(revision_data_sorted)):
if revision_data_sorted[i][1]['value']:
working_means.append(revision_data_sorted[i][1]['value']['values'])
# Flatten the lists to calculate mean of all values.
working_mean = sum(working_means, [])
broken_mean = sum(broken_means, [])
# Calculate the approximate size of the regression
mean_of_bad_runs = math_utils.Mean(broken_mean)
mean_of_good_runs = math_utils.Mean(working_mean)
regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
mean_of_bad_runs)
if math.isnan(regression_size):
regression_size = 'zero-to-nonzero'
regression_std_err = math.fabs(math_utils.PooledStandardError(
[working_mean, broken_mean]) /
max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
# Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the
# overall graph is.
confidence = ConfidenceScore(working_means, broken_means)
culprit_revisions = []
cwd = os.getcwd()
self.ChangeToDepotWorkingDirectory(
revision_data[last_broken_revision]['depot'])
if revision_data[last_broken_revision]['depot'] == 'cros':
# Want to get a list of all the commits and what depots they belong
# to so that we can grab info about each.
cmd = ['repo', 'forall', '-c',
'pwd ; git log --pretty=oneline --before=%d --after=%d' % (
last_broken_revision, first_working_revision + 1)]
output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd)
changes = []
assert not return_code, ('An error occurred while running '
'"%s"' % ' '.join(cmd))
last_depot = None
cwd = os.getcwd()
for l in output.split('\n'):
if l:
# Output will be in form:
# /path_to_depot
# /path_to_other_depot
# <SHA1>
# /path_again
# <SHA1>
# etc.
if l[0] == '/':
last_depot = l
else:
contents = l.split(' ')
if len(contents) > 1:
changes.append([last_depot, contents[0]])
for c in changes:
os.chdir(c[0])
info = self.source_control.QueryRevisionInfo(c[1])
culprit_revisions.append((c[1], info, None))
else:
for i in xrange(last_broken_revision_index, len(revision_data_sorted)):
k, v = revision_data_sorted[i]
if k == first_working_revision:
break
self.ChangeToDepotWorkingDirectory(v['depot'])
info = self.source_control.QueryRevisionInfo(k)
culprit_revisions.append((k, info, v['depot']))
os.chdir(cwd)
# Check for any other possible regression ranges.
other_regressions = _FindOtherRegressions(
revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)
return {
'first_working_revision': first_working_revision,
'last_broken_revision': last_broken_revision,
'culprit_revisions': culprit_revisions,
'other_regressions': other_regressions,
'regression_size': regression_size,
'regression_std_err': regression_std_err,
'confidence': confidence,
}
def _CheckForWarnings(self, results_dict):
if len(results_dict['culprit_revisions']) > 1:
self.warnings.append('Due to build errors, regression range could '
......@@ -2941,10 +2717,7 @@ class BisectPerformanceMetrics(object):
Args:
bisect_results: The results from a bisection test run.
"""
revision_data = bisect_results['revision_data']
revision_data_sorted = sorted(revision_data.iteritems(),
key = lambda x: x[1]['sort'])
results_dict = self._GetResultsDict(revision_data, revision_data_sorted)
results_dict = bisect_results.GetResultsDict()
self._CheckForWarnings(results_dict)
......@@ -2953,7 +2726,7 @@ class BisectPerformanceMetrics(object):
print
print 'Full results of bisection:'
for current_id, current_data in revision_data_sorted:
for current_id, current_data in results_dict['revision_data_sorted']:
build_status = current_data['passed']
if type(build_status) is bool:
......@@ -2981,12 +2754,12 @@ class BisectPerformanceMetrics(object):
self._PrintRevisionInfo(cl, info, depot)
if results_dict['other_regressions']:
self._PrintOtherRegressions(results_dict['other_regressions'],
revision_data)
self._PrintTestedCommitsTable(revision_data_sorted,
results_dict['revision_data'])
self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
results_dict['first_working_revision'],
results_dict['last_broken_revision'],
results_dict['confidence'])
_PrintStepTime(revision_data_sorted)
_PrintStepTime(results_dict['revision_data_sorted'])
self._PrintReproSteps()
_PrintThankYou()
if self.opts.output_buildbot_annotations:
......@@ -3395,8 +3168,8 @@ def main():
opts.bad_revision,
opts.good_revision,
opts.metric)
if bisect_results['error']:
raise RuntimeError(bisect_results['error'])
if bisect_results.error:
raise RuntimeError(bisect_results.error)
bisect_test.FormatAndPrintResults(bisect_results)
return 0
finally:
......
......@@ -8,6 +8,7 @@ import shutil
import unittest
import bisect_perf_regression
import bisect_results
import source_control as source_control_module
def _GetBisectPerformanceMetricsInstance():
......@@ -26,14 +27,20 @@ def _GetBisectPerformanceMetricsInstance():
bisect_options)
bisect_instance = bisect_perf_regression.BisectPerformanceMetrics(
source_control, bisect_options)
bisect_instance.src_cwd = os.path.abspath(
os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
return bisect_instance
class BisectPerfRegressionTest(unittest.TestCase):
"""Test case for other functions and classes in bisect-perf-regression.py."""
def setUp(self):
self.cwd = os.getcwd()
os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir, os.path.pardir)))
def tearDown(self):
os.chdir(self.cwd)
def _AssertConfidence(self, score, bad_values, good_values):
"""Checks whether the given sets of values have a given confidence score.
......@@ -48,7 +55,7 @@ class BisectPerfRegressionTest(unittest.TestCase):
"""
# ConfidenceScore takes a list of lists but these lists are flattened
# inside the function.
confidence = bisect_perf_regression.ConfidenceScore(
confidence = bisect_results.ConfidenceScore(
[[v] for v in bad_values],
[[v] for v in good_values])
self.assertEqual(score, confidence)
......@@ -306,5 +313,40 @@ class BisectPerfRegressionTest(unittest.TestCase):
self.assertIsNotNone(re.search(ss, updated_content))
class DepotDirectoryRegistryTest(unittest.TestCase):
def setUp(self):
self.old_chdir = os.chdir
os.chdir = self.mockChdir
self.old_depot_names = bisect_perf_regression.DEPOT_NAMES
bisect_perf_regression.DEPOT_NAMES = ['mock_depot']
self.old_depot_deps_name = bisect_perf_regression.DEPOT_DEPS_NAME
bisect_perf_regression.DEPOT_DEPS_NAME = {'mock_depot': {'src': 'src/foo'}}
self.registry = bisect_perf_regression.DepotDirectoryRegistry('/mock/src')
self.cur_dir = None
def tearDown(self):
os.chdir = self.old_chdir
bisect_perf_regression.DEPOT_NAMES = self.old_depot_names
bisect_perf_regression.DEPOT_DEPS_NAME = self.old_depot_deps_name
def mockChdir(self, new_dir):
self.cur_dir = new_dir
def testReturnsCorrectResultForChrome(self):
self.assertEqual(self.registry.GetDepotDir('chromium'), '/mock/src')
def testReturnsCorrectResultForChromeOS(self):
self.assertEqual(self.registry.GetDepotDir('cros'), '/mock/src/tools/cros')
def testUsesDepotSpecToInitializeRegistry(self):
self.assertEqual(self.registry.GetDepotDir('mock_depot'), '/mock/src/foo')
def testChangedTheDirectory(self):
self.registry.ChangeToDepotDir('mock_depot')
self.assertEqual(self.cur_dir, '/mock/src/foo')
if __name__ == '__main__':
unittest.main()
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import math
import os
import bisect_utils
import math_utils
import ttest
def ConfidenceScore(good_results_lists, bad_results_lists):
"""Calculates a confidence score.
This score is a percentage which represents our degree of confidence in the
proposition that the good results and bad results are distinct groups, and
their differences aren't due to chance alone.
Args:
good_results_lists: A list of lists of "good" result numbers.
bad_results_lists: A list of lists of "bad" result numbers.
Returns:
A number in the range [0, 100].
"""
# If there's only one item in either list, this means only one revision was
# classified good or bad; this isn't good enough evidence to make a decision.
# If an empty list was passed, that also implies zero confidence.
if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:
return 0.0
# Flatten the lists of results lists.
sample1 = sum(good_results_lists, [])
sample2 = sum(bad_results_lists, [])
# If there were only empty lists in either of the lists (this is unexpected
# and normally shouldn't happen), then we also want to return 0.
if not sample1 or not sample2:
return 0.0
# The p-value is approximately the probability of obtaining the given set
# of good and bad values just by chance.
_, _, p_value = ttest.WelchsTTest(sample1, sample2)
return 100.0 * (1.0 - p_value)
class BisectResults(object):
def __init__(self, depot_registry, source_control):
self._depot_registry = depot_registry
self.revision_data = {}
self.error = None
self._source_control = source_control
@staticmethod
def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good):
"""Compiles a list of other possible regressions from the revision data.
Args:
revision_data_sorted: Sorted list of (revision, revision data) pairs.
bad_greater_than_good: Whether the result value at the "bad" revision is
numerically greater than the result value at the "good" revision.
Returns:
A list of [current_rev, previous_rev, confidence] for other places where
there may have been a regression.
"""
other_regressions = []
previous_values = []
previous_id = None
for current_id, current_data in revision_data_sorted:
current_values = current_data['value']
if current_values:
current_values = current_values['values']
if previous_values:
confidence = ConfidenceScore(previous_values, [current_values])
mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
mean_of_current_runs = math_utils.Mean(current_values)
# Check that the potential regression is in the same direction as
# the overall regression. If the mean of the previous runs < the
# mean of the current runs, this local regression is in same
# direction.
prev_less_than_current = mean_of_prev_runs < mean_of_current_runs
is_same_direction = (prev_less_than_current if
bad_greater_than_good else not prev_less_than_current)
# Only report potential regressions with high confidence.
if is_same_direction and confidence > 50:
other_regressions.append([current_id, previous_id, confidence])
previous_values.append(current_values)
previous_id = current_id
return other_regressions
def GetResultsDict(self):
"""Prepares and returns information about the final resulsts as a dict.
Returns:
A dictionary with the following fields
'first_working_revision': First good revision.
'last_broken_revision': Last bad revision.
'culprit_revisions': A list of revisions, which contain the bad change
introducing the failure.
'other_regressions': A list of tuples representing other regressions,
which may have occured.
'regression_size': For performance bisects, this is a relative change of
the mean metric value. For other bisects this field always contains
'zero-to-nonzero'.
'regression_std_err': For performance bisects, it is a pooled standard
error for groups of good and bad runs. Not used for other bisects.
'confidence': For performance bisects, it is a confidence that the good
and bad runs are distinct groups. Not used for non-performance
bisects.
'revision_data_sorted': dict mapping revision ids to data about that
revision. Each piece of revision data consists of a dict with the
following keys:
'passed': Represents whether the performance test was successful at
that revision. Possible values include: 1 (passed), 0 (failed),
'?' (skipped), 'F' (build failed).
'depot': The depot that this revision is from (i.e. WebKit)
'external': If the revision is a 'src' revision, 'external' contains
the revisions of each of the external libraries.
'sort': A sort value for sorting the dict in order of commits.
For example:
{
'CL #1':
{
'passed': False,
'depot': 'chromium',
'external': None,
'sort': 0
}
}
"""
revision_data_sorted = sorted(self.revision_data.iteritems(),
key = lambda x: x[1]['sort'])
# Find range where it possibly broke.
first_working_revision = None
first_working_revision_index = -1
last_broken_revision = None
last_broken_revision_index = -1
culprit_revisions = []
other_regressions = []
regression_size = 0.0
regression_std_err = 0.0
confidence = 0.0
for i in xrange(len(revision_data_sorted)):
k, v = revision_data_sorted[i]
if v['passed'] == 1:
if not first_working_revision:
first_working_revision = k
first_working_revision_index = i
if not v['passed']:
last_broken_revision = k
last_broken_revision_index = i
if last_broken_revision != None and first_working_revision != None:
broken_means = []
for i in xrange(0, last_broken_revision_index + 1):
if revision_data_sorted[i][1]['value']:
broken_means.append(revision_data_sorted[i][1]['value']['values'])
working_means = []
for i in xrange(first_working_revision_index, len(revision_data_sorted)):
if revision_data_sorted[i][1]['value']:
working_means.append(revision_data_sorted[i][1]['value']['values'])
# Flatten the lists to calculate mean of all values.
working_mean = sum(working_means, [])
broken_mean = sum(broken_means, [])
# Calculate the approximate size of the regression
mean_of_bad_runs = math_utils.Mean(broken_mean)
mean_of_good_runs = math_utils.Mean(working_mean)
regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
mean_of_bad_runs)
if math.isnan(regression_size):
regression_size = 'zero-to-nonzero'
regression_std_err = math.fabs(math_utils.PooledStandardError(
[working_mean, broken_mean]) /
max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
# Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the
# overall graph is.
confidence = ConfidenceScore(working_means, broken_means)
culprit_revisions = []
cwd = os.getcwd()
self._depot_registry.ChangeToDepotDir(
self.revision_data[last_broken_revision]['depot'])
if self.revision_data[last_broken_revision]['depot'] == 'cros':
# Want to get a list of all the commits and what depots they belong
# to so that we can grab info about each.
cmd = ['repo', 'forall', '-c',
'pwd ; git log --pretty=oneline --before=%d --after=%d' % (
last_broken_revision, first_working_revision + 1)]
output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd)
changes = []
assert not return_code, ('An error occurred while running '
'"%s"' % ' '.join(cmd))
last_depot = None
cwd = os.getcwd()
for l in output.split('\n'):
if l:
# Output will be in form:
# /path_to_depot
# /path_to_other_depot
# <SHA1>
# /path_again
# <SHA1>
# etc.
if l[0] == '/':
last_depot = l
else:
contents = l.split(' ')
if len(contents) > 1:
changes.append([last_depot, contents[0]])
for c in changes:
os.chdir(c[0])
info = self._source_control.QueryRevisionInfo(c[1])
culprit_revisions.append((c[1], info, None))
else:
for i in xrange(last_broken_revision_index, len(revision_data_sorted)):
k, v = revision_data_sorted[i]
if k == first_working_revision:
break
self._depot_registry.ChangeToDepotDir(v['depot'])
info = self._source_control.QueryRevisionInfo(k)
culprit_revisions.append((k, info, v['depot']))
os.chdir(cwd)
# Check for any other possible regression ranges.
other_regressions = self._FindOtherRegressions(
revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)
return {
'first_working_revision': first_working_revision,
'last_broken_revision': last_broken_revision,
'culprit_revisions': culprit_revisions,
'other_regressions': other_regressions,
'regression_size': regression_size,
'regression_std_err': regression_std_err,
'confidence': confidence,
'revision_data_sorted': revision_data_sorted
}
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
import bisect_results
import ttest
class ConfidenceScoreTest(unittest.TestCase):
def testConfidenceScoreIsZeroOnTooFewLists(self):
self.assertEqual(bisect_results.ConfidenceScore([], [[1], [2]]), 0.0)
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], []), 0.0)
self.assertEqual(bisect_results.ConfidenceScore([[1]], [[1], [2]]), 0.0)
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], [[1]]), 0.0)
def testConfidenceScoreIsZeroOnEmptyLists(self):
self.assertEqual(bisect_results.ConfidenceScore([[], []], [[1], [2]]), 0.0)
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], [[], []]), 0.0)
def testConfidenceScoreIsUsingTTestWelchsTTest(self):
original_WelchsTTest = ttest.WelchsTTest
try:
ttest.WelchsTTest = lambda _sample1, _sample2: (0, 0, 0.42)
self.assertAlmostEqual(
bisect_results.ConfidenceScore([[1], [1]], [[2], [2]]), 58.0)
finally:
ttest.WelchsTTest = original_WelchsTTest
class BisectResulstsTest(unittest.TestCase):
# TODO(sergiyb): Write tests for GetResultDicts when it is broken into smaller
# pieces.
pass
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment