Commit cfc4b1f1 authored by sergiyb's avatar sergiyb Committed by Commit bot

Refactored auto_bisect bot:

 - Extracted BisectPrinter, which contains everything related to printing
 - Extracted BisectState and RevisionState, which represent bisect-in-progress
 - Rewrote BisectResults - in particular split up GetRevisionDict, which is now its constructor
 - Added tests for BisectResults, fixed bugs in _FindOtherRegressions
 - Added tests for BisectState

R=qyearsley@chromium.org

Review URL: https://codereview.chromium.org/664753002

Cr-Commit-Position: refs/heads/master@{#300677}
parent f9f4000b
...@@ -33,7 +33,6 @@ Example usage using git hashes: ...@@ -33,7 +33,6 @@ Example usage using git hashes:
""" """
import copy import copy
import datetime
import errno import errno
import hashlib import hashlib
import optparse import optparse
...@@ -49,7 +48,9 @@ import zipfile ...@@ -49,7 +48,9 @@ import zipfile
sys.path.append(os.path.join( sys.path.append(os.path.join(
os.path.dirname(__file__), os.path.pardir, 'telemetry')) os.path.dirname(__file__), os.path.pardir, 'telemetry'))
from bisect_printer import BisectPrinter
from bisect_results import BisectResults from bisect_results import BisectResults
from bisect_state import BisectState
import bisect_utils import bisect_utils
import builder import builder
import math_utils import math_utils
...@@ -57,99 +58,6 @@ import request_build ...@@ -57,99 +58,6 @@ import request_build
import source_control import source_control
from telemetry.util import cloud_storage from telemetry.util import cloud_storage
# Below is the map of "depot" names to information about each depot. Each depot
# is a repository, and in the process of bisecting, revision ranges in these
# repositories may also be bisected.
#
# Each depot information dictionary may contain:
# src: Path to the working directory.
# recurse: True if this repository will get bisected.
# depends: A list of other repositories that are actually part of the same
# repository in svn. If the repository has any dependent repositories
# (e.g. skia/src needs skia/include and skia/gyp to be updated), then
# they are specified here.
# svn: URL of SVN repository. Needed for git workflow to resolve hashes to
# SVN revisions.
# from: Parent depot that must be bisected before this is bisected.
# deps_var: Key name in vars variable in DEPS file that has revision
# information.
DEPOT_DEPS_NAME = {
'chromium': {
'src': 'src',
'recurse': True,
'depends': None,
'from': ['cros', 'android-chrome'],
'viewvc':
'http://src.chromium.org/viewvc/chrome?view=revision&revision=',
'deps_var': 'chromium_rev'
},
'webkit': {
'src': 'src/third_party/WebKit',
'recurse': True,
'depends': None,
'from': ['chromium'],
'viewvc':
'http://src.chromium.org/viewvc/blink?view=revision&revision=',
'deps_var': 'webkit_revision'
},
'angle': {
'src': 'src/third_party/angle',
'src_old': 'src/third_party/angle_dx11',
'recurse': True,
'depends': None,
'from': ['chromium'],
'platform': 'nt',
'deps_var': 'angle_revision'
},
'v8': {
'src': 'src/v8',
'recurse': True,
'depends': None,
'from': ['chromium'],
'custom_deps': bisect_utils.GCLIENT_CUSTOM_DEPS_V8,
'viewvc': 'https://code.google.com/p/v8/source/detail?r=',
'deps_var': 'v8_revision'
},
'v8_bleeding_edge': {
'src': 'src/v8_bleeding_edge',
'recurse': True,
'depends': None,
'svn': 'https://v8.googlecode.com/svn/branches/bleeding_edge',
'from': ['v8'],
'viewvc': 'https://code.google.com/p/v8/source/detail?r=',
'deps_var': 'v8_revision'
},
'skia/src': {
'src': 'src/third_party/skia/src',
'recurse': True,
'svn': 'http://skia.googlecode.com/svn/trunk/src',
'depends': ['skia/include', 'skia/gyp'],
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'skia_revision'
},
'skia/include': {
'src': 'src/third_party/skia/include',
'recurse': False,
'svn': 'http://skia.googlecode.com/svn/trunk/include',
'depends': None,
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'None'
},
'skia/gyp': {
'src': 'src/third_party/skia/gyp',
'recurse': False,
'svn': 'http://skia.googlecode.com/svn/trunk/gyp',
'depends': None,
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'None'
}
}
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
# The script is in chromium/src/tools/auto_bisect. Throughout this script, # The script is in chromium/src/tools/auto_bisect. Throughout this script,
# we use paths to other things in the chromium/src repository. # we use paths to other things in the chromium/src repository.
...@@ -167,9 +75,6 @@ MAX_MAC_BUILD_TIME = 14400 ...@@ -167,9 +75,6 @@ MAX_MAC_BUILD_TIME = 14400
MAX_WIN_BUILD_TIME = 14400 MAX_WIN_BUILD_TIME = 14400
MAX_LINUX_BUILD_TIME = 14400 MAX_LINUX_BUILD_TIME = 14400
# The percentage at which confidence is considered high.
HIGH_CONFIDENCE = 95
# Patch template to add a new file, DEPS.sha under src folder. # Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting # This file contains SHA1 value of the DEPS changes made while bisecting
# dependency repositories. This patch send along with DEPS patch to try server. # dependency repositories. This patch send along with DEPS patch to try server.
...@@ -184,72 +89,6 @@ new file mode 100644 ...@@ -184,72 +89,6 @@ new file mode 100644
+%(deps_sha)s +%(deps_sha)s
""" """
# The possible values of the --bisect_mode flag, which determines what to
# use when classifying a revision as "good" or "bad".
BISECT_MODE_MEAN = 'mean'
BISECT_MODE_STD_DEV = 'std_dev'
BISECT_MODE_RETURN_CODE = 'return_code'
# The perf dashboard looks for a string like "Estimated Confidence: 95%"
# to decide whether or not to cc the author(s). If you change this, please
# update the perf dashboard as well.
RESULTS_BANNER = """
===== BISECT JOB RESULTS =====
Status: %(status)s
Test Command: %(command)s
Test Metric: %(metrics)s
Relative Change: %(change)s
Estimated Confidence: %(confidence).02f%%"""
# The perf dashboard specifically looks for the string
# "Author : " to parse out who to cc on a bug. If you change the
# formatting here, please update the perf dashboard as well.
RESULTS_REVISION_INFO = """
===== SUSPECTED CL(s) =====
Subject : %(subject)s
Author : %(author)s%(email_info)s%(commit_info)s
Commit : %(cl)s
Date : %(cl_date)s"""
REPRO_STEPS_LOCAL = """
==== INSTRUCTIONS TO REPRODUCE ====
To run locally:
- Use the test command given under 'BISECT JOB RESULTS' above.
- Consider using a profiler. Pass --profiler=list to list available profilers.
"""
REPRO_STEPS_TRYJOB = """
To reproduce on a performance try bot:
1. Edit run-perf-test.cfg
2. $ git try -b <bot> --svn_repo='svn://svn.chromium.org/chrome-try/try-perf'
Notes:
a) Follow the in-file instructions in run-perf-test.cfg.
b) run-perf-test.cfg is under tools/ or under third_party/WebKit/Tools.
c) Do your edits preferably under a new git branch.
d) --browser=release and --browser=android-chromium-testshell are supported
depending on the platform (desktop|android).
e) Strip any src/ directories from the head of relative path names.
f) Make sure to use the appropriate bot on step 3.
For more details please visit
https://sites.google.com/a/chromium.org/dev/developers/performance-try-bots"""
REPRO_STEPS_TRYJOB_TELEMETRY = """
To reproduce on a performance try bot:
%(command)s
(Where <bot-name> comes from tools/perf/run_benchmark --browser=list)
For more details please visit
https://sites.google.com/a/chromium.org/dev/developers/performance-try-bots
"""
RESULTS_THANKYOU = """
| O O | Visit http://www.chromium.org/developers/core-principles for Chrome's
| X | policy on perf regressions. Contact chrome-perf-dashboard-team with any
| / \ | questions or suggestions about bisecting. THANK YOU."""
# Git branch name used to run bisect try jobs. # Git branch name used to run bisect try jobs.
BISECT_TRYJOB_BRANCH = 'bisect-tryjob' BISECT_TRYJOB_BRANCH = 'bisect-tryjob'
# Git master branch name. # Git master branch name.
...@@ -265,14 +104,6 @@ class RunGitError(Exception): ...@@ -265,14 +104,6 @@ class RunGitError(Exception):
return '%s\nError executing git command.' % self.args[0] return '%s\nError executing git command.' % self.args[0]
def _AddAdditionalDepotInfo(depot_info):
"""Adds additional depot info to the global depot variables."""
global DEPOT_DEPS_NAME
global DEPOT_NAMES
DEPOT_DEPS_NAME = dict(DEPOT_DEPS_NAME.items() + depot_info.items())
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
def GetSHA1HexDigest(contents): def GetSHA1HexDigest(contents):
"""Returns SHA1 hex digest of the given string.""" """Returns SHA1 hex digest of the given string."""
return hashlib.sha1(contents).hexdigest() return hashlib.sha1(contents).hexdigest()
...@@ -570,7 +401,7 @@ def _UpdateDEPSForAngle(revision, depot, deps_file): ...@@ -570,7 +401,7 @@ def _UpdateDEPSForAngle(revision, depot, deps_file):
in such cases check "deps" dictionary variable that matches in such cases check "deps" dictionary variable that matches
angle.git@[a-fA-F0-9]{40}$ and replace git hash. angle.git@[a-fA-F0-9]{40}$ and replace git hash.
""" """
deps_var = DEPOT_DEPS_NAME[depot]['deps_var'] deps_var = bisect_utils.DEPOT_DEPS_NAME[depot]['deps_var']
try: try:
deps_contents = ReadStringFromFile(deps_file) deps_contents = ReadStringFromFile(deps_file)
# Check whether the depot and revision pattern in DEPS file vars variable # Check whether the depot and revision pattern in DEPS file vars variable
...@@ -758,85 +589,14 @@ def _GenerateProfileIfNecessary(command_args): ...@@ -758,85 +589,14 @@ def _GenerateProfileIfNecessary(command_args):
return True return True
def _AddRevisionsIntoRevisionData(revisions, depot, sort, revision_data):
"""Adds new revisions to the revision_data dictionary and initializes them.
Args:
revisions: List of revisions to add.
depot: Depot that's currently in use (src, webkit, etc...)
sort: Sorting key for displaying revisions.
revision_data: A dictionary to add the new revisions into.
Existing revisions will have their sort keys adjusted.
"""
num_depot_revisions = len(revisions)
for _, v in revision_data.iteritems():
if v['sort'] > sort:
v['sort'] += num_depot_revisions
for i in xrange(num_depot_revisions):
r = revisions[i]
revision_data[r] = {
'revision' : r,
'depot' : depot,
'value' : None,
'perf_time' : 0,
'build_time' : 0,
'passed' : '?',
'sort' : i + sort + 1,
}
def _PrintThankYou():
print RESULTS_THANKYOU
def _PrintTableRow(column_widths, row_data):
"""Prints out a row in a formatted table that has columns aligned.
Args:
column_widths: A list of column width numbers.
row_data: A list of items for each column in this row.
"""
assert len(column_widths) == len(row_data)
text = ''
for i in xrange(len(column_widths)):
current_row_data = row_data[i].center(column_widths[i], ' ')
text += ('%%%ds' % column_widths[i]) % current_row_data
print text
def _PrintStepTime(revision_data_sorted):
"""Prints information about how long various steps took.
Args:
revision_data_sorted: The sorted list of revision data dictionaries."""
step_perf_time_avg = 0.0
step_build_time_avg = 0.0
step_count = 0.0
for _, current_data in revision_data_sorted:
if current_data['value']:
step_perf_time_avg += current_data['perf_time']
step_build_time_avg += current_data['build_time']
step_count += 1
if step_count:
step_perf_time_avg = step_perf_time_avg / step_count
step_build_time_avg = step_build_time_avg / step_count
print
print 'Average build time : %s' % datetime.timedelta(
seconds=int(step_build_time_avg))
print 'Average test time : %s' % datetime.timedelta(
seconds=int(step_perf_time_avg))
class DepotDirectoryRegistry(object): class DepotDirectoryRegistry(object):
def __init__(self, src_cwd): def __init__(self, src_cwd):
self.depot_cwd = {} self.depot_cwd = {}
for depot in DEPOT_NAMES: for depot in bisect_utils.DEPOT_NAMES:
# The working directory of each depot is just the path to the depot, but # The working directory of each depot is just the path to the depot, but
# since we're already in 'src', we can skip that part. # since we're already in 'src', we can skip that part.
path_in_src = DEPOT_DEPS_NAME[depot]['src'][4:] path_in_src = bisect_utils.DEPOT_DEPS_NAME[depot]['src'][4:]
self.AddDepot(depot, os.path.join(src_cwd, path_in_src)) self.AddDepot(depot, os.path.join(src_cwd, path_in_src))
self.AddDepot('chromium', src_cwd) self.AddDepot('chromium', src_cwd)
...@@ -1051,8 +811,8 @@ class BisectPerformanceMetrics(object): ...@@ -1051,8 +811,8 @@ class BisectPerformanceMetrics(object):
'bleeding_edge revision r')[1] 'bleeding_edge revision r')[1]
bleeding_edge_revision = int(bleeding_edge_revision.split(')')[0]) bleeding_edge_revision = int(bleeding_edge_revision.split(')')[0])
git_revision = source_control.ResolveToRevision( git_revision = source_control.ResolveToRevision(
bleeding_edge_revision, 'v8_bleeding_edge', DEPOT_DEPS_NAME, 1, bleeding_edge_revision, 'v8_bleeding_edge',
cwd=v8_bleeding_edge_dir) bisect_utils.DEPOT_DEPS_NAME, 1, cwd=v8_bleeding_edge_dir)
return git_revision return git_revision
except (IndexError, ValueError): except (IndexError, ValueError):
pass pass
...@@ -1060,8 +820,8 @@ class BisectPerformanceMetrics(object): ...@@ -1060,8 +820,8 @@ class BisectPerformanceMetrics(object):
if not git_revision: if not git_revision:
# Wasn't successful, try the old way of looking for "Prepare push to" # Wasn't successful, try the old way of looking for "Prepare push to"
git_revision = source_control.ResolveToRevision( git_revision = source_control.ResolveToRevision(
int(commit_position) - 1, 'v8_bleeding_edge', DEPOT_DEPS_NAME, -1, int(commit_position) - 1, 'v8_bleeding_edge',
cwd=v8_bleeding_edge_dir) bisect_utils.DEPOT_DEPS_NAME, -1, cwd=v8_bleeding_edge_dir)
if git_revision: if git_revision:
revision_info = source_control.QueryRevisionInfo(git_revision, revision_info = source_control.QueryRevisionInfo(git_revision,
...@@ -1125,7 +885,7 @@ class BisectPerformanceMetrics(object): ...@@ -1125,7 +885,7 @@ class BisectPerformanceMetrics(object):
rxp = re.compile(".git@(?P<revision>[a-fA-F0-9]+)") rxp = re.compile(".git@(?P<revision>[a-fA-F0-9]+)")
results = {} results = {}
for depot_name, depot_data in DEPOT_DEPS_NAME.iteritems(): for depot_name, depot_data in bisect_utils.DEPOT_DEPS_NAME.iteritems():
if (depot_data.get('platform') and if (depot_data.get('platform') and
depot_data.get('platform') != os.name): depot_data.get('platform') != os.name):
continue continue
...@@ -1154,10 +914,10 @@ class BisectPerformanceMetrics(object): ...@@ -1154,10 +914,10 @@ class BisectPerformanceMetrics(object):
for depot_name, depot_revision in parse_results.iteritems(): for depot_name, depot_revision in parse_results.iteritems():
depot_revision = depot_revision.strip('@') depot_revision = depot_revision.strip('@')
print depot_name, depot_revision print depot_name, depot_revision
for current_name, current_data in DEPOT_DEPS_NAME.iteritems(): for cur_name, cur_data in bisect_utils.DEPOT_DEPS_NAME.iteritems():
if (current_data.has_key('deps_var') and if (cur_data.has_key('deps_var') and
current_data['deps_var'] == depot_name): cur_data['deps_var'] == depot_name):
src_name = current_name src_name = cur_name
results[src_name] = depot_revision results[src_name] = depot_revision
break break
return results return results
...@@ -1436,8 +1196,8 @@ class BisectPerformanceMetrics(object): ...@@ -1436,8 +1196,8 @@ class BisectPerformanceMetrics(object):
if (self.opts.target_platform in ['chromium', 'android'] and if (self.opts.target_platform in ['chromium', 'android'] and
self.opts.gs_bucket): self.opts.gs_bucket):
return (depot == 'chromium' or return (depot == 'chromium' or
'chromium' in DEPOT_DEPS_NAME[depot]['from'] or 'chromium' in bisect_utils.DEPOT_DEPS_NAME[depot]['from'] or
'v8' in DEPOT_DEPS_NAME[depot]['from']) 'v8' in bisect_utils.DEPOT_DEPS_NAME[depot]['from'])
return False return False
def UpdateDepsContents(self, deps_contents, depot, git_revision, deps_key): def UpdateDepsContents(self, deps_contents, depot, git_revision, deps_key):
...@@ -1500,7 +1260,7 @@ class BisectPerformanceMetrics(object): ...@@ -1500,7 +1260,7 @@ class BisectPerformanceMetrics(object):
if not os.path.exists(deps_file): if not os.path.exists(deps_file):
return False return False
deps_var = DEPOT_DEPS_NAME[depot]['deps_var'] deps_var = bisect_utils.DEPOT_DEPS_NAME[depot]['deps_var']
# Don't update DEPS file if deps_var is not set in DEPOT_DEPS_NAME. # Don't update DEPS file if deps_var is not set in DEPOT_DEPS_NAME.
if not deps_var: if not deps_var:
print 'DEPS update not supported for Depot: %s', depot print 'DEPS update not supported for Depot: %s', depot
...@@ -1545,8 +1305,8 @@ class BisectPerformanceMetrics(object): ...@@ -1545,8 +1305,8 @@ class BisectPerformanceMetrics(object):
if not chromium_sha: if not chromium_sha:
raise RuntimeError('Failed to determine Chromium revision for %s' % raise RuntimeError('Failed to determine Chromium revision for %s' %
revision) revision)
if ('chromium' in DEPOT_DEPS_NAME[depot]['from'] or if ('chromium' in bisect_utils.DEPOT_DEPS_NAME[depot]['from'] or
'v8' in DEPOT_DEPS_NAME[depot]['from']): 'v8' in bisect_utils.DEPOT_DEPS_NAME[depot]['from']):
# Checkout DEPS file for the current chromium revision. # Checkout DEPS file for the current chromium revision.
if source_control.CheckoutFileAtRevision( if source_control.CheckoutFileAtRevision(
bisect_utils.FILE_DEPS, chromium_sha, cwd=self.src_cwd): bisect_utils.FILE_DEPS, chromium_sha, cwd=self.src_cwd):
...@@ -1601,13 +1361,14 @@ class BisectPerformanceMetrics(object): ...@@ -1601,13 +1361,14 @@ class BisectPerformanceMetrics(object):
return not bisect_utils.RunGClient(['runhooks'], cwd=self.src_cwd) return not bisect_utils.RunGClient(['runhooks'], cwd=self.src_cwd)
def _IsBisectModeUsingMetric(self): def _IsBisectModeUsingMetric(self):
return self.opts.bisect_mode in [BISECT_MODE_MEAN, BISECT_MODE_STD_DEV] return self.opts.bisect_mode in [bisect_utils.BISECT_MODE_MEAN,
bisect_utils.BISECT_MODE_STD_DEV]
def _IsBisectModeReturnCode(self): def _IsBisectModeReturnCode(self):
return self.opts.bisect_mode in [BISECT_MODE_RETURN_CODE] return self.opts.bisect_mode in [bisect_utils.BISECT_MODE_RETURN_CODE]
def _IsBisectModeStandardDeviation(self): def _IsBisectModeStandardDeviation(self):
return self.opts.bisect_mode in [BISECT_MODE_STD_DEV] return self.opts.bisect_mode in [bisect_utils.BISECT_MODE_STD_DEV]
def GetCompatibleCommand(self, command_to_run, revision, depot): def GetCompatibleCommand(self, command_to_run, revision, depot):
"""Return a possibly modified test command depending on the revision. """Return a possibly modified test command depending on the revision.
...@@ -1816,20 +1577,20 @@ class BisectPerformanceMetrics(object): ...@@ -1816,20 +1577,20 @@ class BisectPerformanceMetrics(object):
# figure out for each mirror which git revision to grab. There's no # figure out for each mirror which git revision to grab. There's no
# guarantee that the SVN revision will exist for each of the dependent # guarantee that the SVN revision will exist for each of the dependent
# depots, so we have to grep the git logs and grab the next earlier one. # depots, so we have to grep the git logs and grab the next earlier one.
if not is_base and DEPOT_DEPS_NAME[depot]['depends']: if not is_base and bisect_utils.DEPOT_DEPS_NAME[depot]['depends']:
commit_position = source_control.GetCommitPosition(revision) commit_position = source_control.GetCommitPosition(revision)
for d in DEPOT_DEPS_NAME[depot]['depends']: for d in bisect_utils.DEPOT_DEPS_NAME[depot]['depends']:
self.depot_registry.ChangeToDepotDir(d) self.depot_registry.ChangeToDepotDir(d)
dependant_rev = source_control.ResolveToRevision( dependant_rev = source_control.ResolveToRevision(
commit_position, d, DEPOT_DEPS_NAME, -1000) commit_position, d, bisect_utils.DEPOT_DEPS_NAME, -1000)
if dependant_rev: if dependant_rev:
revisions_to_sync.append([d, dependant_rev]) revisions_to_sync.append([d, dependant_rev])
num_resolved = len(revisions_to_sync) num_resolved = len(revisions_to_sync)
num_needed = len(DEPOT_DEPS_NAME[depot]['depends']) num_needed = len(bisect_utils.DEPOT_DEPS_NAME[depot]['depends'])
self.depot_registry.ChangeToDepotDir(depot) self.depot_registry.ChangeToDepotDir(depot)
...@@ -2031,7 +1792,8 @@ class BisectPerformanceMetrics(object): ...@@ -2031,7 +1792,8 @@ class BisectPerformanceMetrics(object):
# want so that all the dependencies sync properly as well. # want so that all the dependencies sync properly as well.
# i.e. gclient sync src@<SHA1> # i.e. gclient sync src@<SHA1>
if sync_client == 'gclient': if sync_client == 'gclient':
revision = '%s@%s' % (DEPOT_DEPS_NAME[depot]['src'], revision) revision = '%s@%s' % (bisect_utils.DEPOT_DEPS_NAME[depot]['src'],
revision)
sync_success = source_control.SyncToRevision(revision, sync_client) sync_success = source_control.SyncToRevision(revision, sync_client)
if not sync_success: if not sync_success:
...@@ -2052,7 +1814,7 @@ class BisectPerformanceMetrics(object): ...@@ -2052,7 +1814,7 @@ class BisectPerformanceMetrics(object):
True if the current_value is closer to the known_good_value than the True if the current_value is closer to the known_good_value than the
known_bad_value. known_bad_value.
""" """
if self.opts.bisect_mode == BISECT_MODE_STD_DEV: if self.opts.bisect_mode == bisect_utils.BISECT_MODE_STD_DEV:
dist_to_good_value = abs(current_value['std_dev'] - dist_to_good_value = abs(current_value['std_dev'] -
known_good_value['std_dev']) known_good_value['std_dev'])
dist_to_bad_value = abs(current_value['std_dev'] - dist_to_bad_value = abs(current_value['std_dev'] -
...@@ -2063,18 +1825,18 @@ class BisectPerformanceMetrics(object): ...@@ -2063,18 +1825,18 @@ class BisectPerformanceMetrics(object):
return dist_to_good_value < dist_to_bad_value return dist_to_good_value < dist_to_bad_value
def _FillInV8BleedingEdgeInfo(self, min_revision_data, max_revision_data): def _FillInV8BleedingEdgeInfo(self, min_revision_state, max_revision_state):
r1 = self._GetNearestV8BleedingEdgeFromTrunk(min_revision_data['revision'], r1 = self._GetNearestV8BleedingEdgeFromTrunk(min_revision_state.revision,
search_forward=True) search_forward=True)
r2 = self._GetNearestV8BleedingEdgeFromTrunk(max_revision_data['revision'], r2 = self._GetNearestV8BleedingEdgeFromTrunk(max_revision_state.revision,
search_forward=False) search_forward=False)
min_revision_data['external']['v8_bleeding_edge'] = r1 min_revision_state.external['v8_bleeding_edge'] = r1
max_revision_data['external']['v8_bleeding_edge'] = r2 max_revision_state.external['v8_bleeding_edge'] = r2
if (not self._GetV8BleedingEdgeFromV8TrunkIfMappable( if (not self._GetV8BleedingEdgeFromV8TrunkIfMappable(
min_revision_data['revision']) min_revision_state.revision)
or not self._GetV8BleedingEdgeFromV8TrunkIfMappable( or not self._GetV8BleedingEdgeFromV8TrunkIfMappable(
max_revision_data['revision'])): max_revision_state.revision)):
self.warnings.append( self.warnings.append(
'Trunk revisions in V8 did not map directly to bleeding_edge. ' 'Trunk revisions in V8 did not map directly to bleeding_edge. '
'Attempted to expand the range to find V8 rolls which did map ' 'Attempted to expand the range to find V8 rolls which did map '
...@@ -2082,54 +1844,54 @@ class BisectPerformanceMetrics(object): ...@@ -2082,54 +1844,54 @@ class BisectPerformanceMetrics(object):
'valid.') 'valid.')
def _FindNextDepotToBisect( def _FindNextDepotToBisect(
self, current_depot, min_revision_data, max_revision_data): self, current_depot, min_revision_state, max_revision_state):
"""Decides which depot the script should dive into next (if any). """Decides which depot the script should dive into next (if any).
Args: Args:
current_depot: Current depot being bisected. current_depot: Current depot being bisected.
min_revision_data: Data about the earliest revision in the bisect range. min_revision_state: State of the earliest revision in the bisect range.
max_revision_data: Data about the latest revision in the bisect range. max_revision_state: State of the latest revision in the bisect range.
Returns: Returns:
Name of the depot to bisect next, or None. Name of the depot to bisect next, or None.
""" """
external_depot = None external_depot = None
for next_depot in DEPOT_NAMES: for next_depot in bisect_utils.DEPOT_NAMES:
if DEPOT_DEPS_NAME[next_depot].has_key('platform'): if bisect_utils.DEPOT_DEPS_NAME[next_depot].has_key('platform'):
if DEPOT_DEPS_NAME[next_depot]['platform'] != os.name: if bisect_utils.DEPOT_DEPS_NAME[next_depot]['platform'] != os.name:
continue continue
if not (DEPOT_DEPS_NAME[next_depot]['recurse'] if not (bisect_utils.DEPOT_DEPS_NAME[next_depot]['recurse']
and min_revision_data['depot'] and min_revision_state.depot
in DEPOT_DEPS_NAME[next_depot]['from']): in bisect_utils.DEPOT_DEPS_NAME[next_depot]['from']):
continue continue
if current_depot == 'v8': if current_depot == 'v8':
# We grab the bleeding_edge info here rather than earlier because we # We grab the bleeding_edge info here rather than earlier because we
# finally have the revision range. From that we can search forwards and # finally have the revision range. From that we can search forwards and
# backwards to try to match trunk revisions to bleeding_edge. # backwards to try to match trunk revisions to bleeding_edge.
self._FillInV8BleedingEdgeInfo(min_revision_data, max_revision_data) self._FillInV8BleedingEdgeInfo(min_revision_state, max_revision_state)
if (min_revision_data['external'].get(next_depot) == if (min_revision_state.external.get(next_depot) ==
max_revision_data['external'].get(next_depot)): max_revision_state.external.get(next_depot)):
continue continue
if (min_revision_data['external'].get(next_depot) and if (min_revision_state.external.get(next_depot) and
max_revision_data['external'].get(next_depot)): max_revision_state.external.get(next_depot)):
external_depot = next_depot external_depot = next_depot
break break
return external_depot return external_depot
def PrepareToBisectOnDepot( def PrepareToBisectOnDepot(
self, current_depot, end_revision, start_revision, previous_revision): self, current_depot, start_revision, end_revision, previous_revision):
"""Changes to the appropriate directory and gathers a list of revisions """Changes to the appropriate directory and gathers a list of revisions
to bisect between |start_revision| and |end_revision|. to bisect between |start_revision| and |end_revision|.
Args: Args:
current_depot: The depot we want to bisect. current_depot: The depot we want to bisect.
end_revision: End of the revision range.
start_revision: Start of the revision range. start_revision: Start of the revision range.
end_revision: End of the revision range.
previous_revision: The last revision we synced to on |previous_depot|. previous_revision: The last revision we synced to on |previous_depot|.
Returns: Returns:
...@@ -2142,10 +1904,11 @@ class BisectPerformanceMetrics(object): ...@@ -2142,10 +1904,11 @@ class BisectPerformanceMetrics(object):
# V8 (and possibly others) is merged in periodically. Bisecting # V8 (and possibly others) is merged in periodically. Bisecting
# this directory directly won't give much good info. # this directory directly won't give much good info.
if DEPOT_DEPS_NAME[current_depot].has_key('custom_deps'): if bisect_utils.DEPOT_DEPS_NAME[current_depot].has_key('custom_deps'):
config_path = os.path.join(self.src_cwd, '..') config_path = os.path.join(self.src_cwd, '..')
if bisect_utils.RunGClientAndCreateConfig(self.opts, if bisect_utils.RunGClientAndCreateConfig(
DEPOT_DEPS_NAME[current_depot]['custom_deps'], cwd=config_path): self.opts, bisect_utils.DEPOT_DEPS_NAME[current_depot]['custom_deps'],
cwd=config_path):
return [] return []
if bisect_utils.RunGClient( if bisect_utils.RunGClient(
['sync', '--revision', previous_revision], cwd=self.src_cwd): ['sync', '--revision', previous_revision], cwd=self.src_cwd):
...@@ -2199,8 +1962,8 @@ class BisectPerformanceMetrics(object): ...@@ -2199,8 +1962,8 @@ class BisectPerformanceMetrics(object):
def PrintRevisionsToBisectMessage(self, revision_list, depot): def PrintRevisionsToBisectMessage(self, revision_list, depot):
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
step_name = 'Bisection Range: [%s - %s]' % ( step_name = 'Bisection Range: [%s:%s - %s]' % (depot, revision_list[-1],
revision_list[len(revision_list)-1], revision_list[0]) revision_list[0])
bisect_utils.OutputAnnotationStepStart(step_name) bisect_utils.OutputAnnotationStepStart(step_name)
print print
...@@ -2340,8 +2103,6 @@ class BisectPerformanceMetrics(object): ...@@ -2340,8 +2103,6 @@ class BisectPerformanceMetrics(object):
Returns: Returns:
A BisectResults object. A BisectResults object.
""" """
results = BisectResults(self.depot_registry)
# Choose depot to bisect first # Choose depot to bisect first
target_depot = 'chromium' target_depot = 'chromium'
if self.opts.target_platform == 'cros': if self.opts.target_platform == 'cros':
...@@ -2354,25 +2115,25 @@ class BisectPerformanceMetrics(object): ...@@ -2354,25 +2115,25 @@ class BisectPerformanceMetrics(object):
# If they passed SVN revisions, we can try match them to git SHA1 hashes. # If they passed SVN revisions, we can try match them to git SHA1 hashes.
bad_revision = source_control.ResolveToRevision( bad_revision = source_control.ResolveToRevision(
bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) bad_revision_in, target_depot, bisect_utils.DEPOT_DEPS_NAME, 100)
good_revision = source_control.ResolveToRevision( good_revision = source_control.ResolveToRevision(
good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) good_revision_in, target_depot, bisect_utils.DEPOT_DEPS_NAME, -100)
os.chdir(cwd) os.chdir(cwd)
if bad_revision is None: if bad_revision is None:
results.error = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in return BisectResults(
return results error='Couldn\'t resolve [%s] to SHA1.' % bad_revision_in)
if good_revision is None: if good_revision is None:
results.error = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in return BisectResults(
return results error='Couldn\'t resolve [%s] to SHA1.' % good_revision_in)
# Check that they didn't accidentally swap good and bad revisions. # Check that they didn't accidentally swap good and bad revisions.
if not self.CheckIfRevisionsInProperOrder( if not self.CheckIfRevisionsInProperOrder(
target_depot, good_revision, bad_revision): target_depot, good_revision, bad_revision):
results.error = ('bad_revision < good_revision, did you swap these ' return BisectResults(error='bad_revision < good_revision, did you swap '
'by mistake?') 'these by mistake?')
return results
bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange(
bad_revision, good_revision, good_revision_in) bad_revision, good_revision, good_revision_in)
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
...@@ -2380,45 +2141,17 @@ class BisectPerformanceMetrics(object): ...@@ -2380,45 +2141,17 @@ class BisectPerformanceMetrics(object):
cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) cannot_bisect = self.CanPerformBisect(good_revision, bad_revision)
if cannot_bisect: if cannot_bisect:
results.error = cannot_bisect.get('error') return BisectResults(error=cannot_bisect.get('error'))
return results
print 'Gathering revision range for bisection.' print 'Gathering revision range for bisection.'
# Retrieve a list of revisions to do bisection on. # Retrieve a list of revisions to do bisection on.
src_revision_list = self.GetRevisionList( revision_list = self.GetRevisionList(target_depot, bad_revision,
target_depot, bad_revision, good_revision) good_revision)
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepClosed() bisect_utils.OutputAnnotationStepClosed()
if src_revision_list: if revision_list:
# revision_data will store information about a revision such as the
# depot it came from, the webkit/V8 revision at that time,
# performance timing, build state, etc...
revision_data = results.revision_data
# revision_list is the list we're binary searching through at the moment.
revision_list = []
sort_key_ids = 0
for current_revision_id in src_revision_list:
sort_key_ids += 1
revision_data[current_revision_id] = {
'value' : None,
'passed' : '?',
'depot' : target_depot,
'external' : None,
'perf_time' : 0,
'build_time' : 0,
'sort' : sort_key_ids,
}
revision_list.append(current_revision_id)
min_revision = 0
max_revision = len(revision_list) - 1
self.PrintRevisionsToBisectMessage(revision_list, target_depot) self.PrintRevisionsToBisectMessage(revision_list, target_depot)
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
...@@ -2438,18 +2171,18 @@ class BisectPerformanceMetrics(object): ...@@ -2438,18 +2171,18 @@ class BisectPerformanceMetrics(object):
bisect_utils.OutputAnnotationStepClosed() bisect_utils.OutputAnnotationStepClosed()
if bad_results[1]: if bad_results[1]:
results.error = ('An error occurred while building and running ' error = ('An error occurred while building and running the \'bad\' '
'the \'bad\' reference value. The bisect cannot continue without ' 'reference value. The bisect cannot continue without '
'a working \'bad\' revision to start from.\n\nError: %s' % 'a working \'bad\' revision to start from.\n\nError: %s' %
bad_results[0]) bad_results[0])
return results return BisectResults(error=error)
if good_results[1]: if good_results[1]:
results.error = ('An error occurred while building and running ' error = ('An error occurred while building and running the \'good\' '
'the \'good\' reference value. The bisect cannot continue without ' 'reference value. The bisect cannot continue without '
'a working \'good\' revision to start from.\n\nError: %s' % 'a working \'good\' revision to start from.\n\nError: %s' %
good_results[0]) good_results[0])
return results return BisectResults(error=error)
# We need these reference values to determine if later runs should be # We need these reference values to determine if later runs should be
# classified as pass or fail. # classified as pass or fail.
...@@ -2473,49 +2206,55 @@ class BisectPerformanceMetrics(object): ...@@ -2473,49 +2206,55 @@ class BisectPerformanceMetrics(object):
message += "and the metric appears to have decreased. " message += "and the metric appears to have decreased. "
if ((higher_is_better and metric_increased) or if ((higher_is_better and metric_increased) or
(not higher_is_better and not metric_increased)): (not higher_is_better and not metric_increased)):
results.error = (message + 'Then, the test results for the ends of ' error = (message + 'Then, the test results for the ends of the given '
'the given \'good\' - \'bad\' range of revisions ' '\'good\' - \'bad\' range of revisions represent an '
'represent an improvement (and not a regression).') 'improvement (and not a regression).')
return results return BisectResults(error=error)
print message, "Therefore we continue to bisect." print message, "Therefore we continue to bisect."
bisect_state = BisectState(target_depot, revision_list)
revision_states = bisect_state.GetRevisionStates()
min_revision = 0
max_revision = len(revision_states) - 1
# Can just mark the good and bad revisions explicitly here since we # Can just mark the good and bad revisions explicitly here since we
# already know the results. # already know the results.
bad_revision_data = revision_data[revision_list[0]] bad_revision_state = revision_states[min_revision]
bad_revision_data['external'] = bad_results[2] bad_revision_state.external = bad_results[2]
bad_revision_data['perf_time'] = bad_results[3] bad_revision_state.perf_time = bad_results[3]
bad_revision_data['build_time'] = bad_results[4] bad_revision_state.build_time = bad_results[4]
bad_revision_data['passed'] = False bad_revision_state.passed = False
bad_revision_data['value'] = known_bad_value bad_revision_state.value = known_bad_value
good_revision_data = revision_data[revision_list[max_revision]] good_revision_state = revision_states[max_revision]
good_revision_data['external'] = good_results[2] good_revision_state.external = good_results[2]
good_revision_data['perf_time'] = good_results[3] good_revision_state.perf_time = good_results[3]
good_revision_data['build_time'] = good_results[4] good_revision_state.build_time = good_results[4]
good_revision_data['passed'] = True good_revision_state.passed = True
good_revision_data['value'] = known_good_value good_revision_state.value = known_good_value
next_revision_depot = target_depot bisect_printer = BisectPrinter(self.opts, self.depot_registry)
while True: while True:
if not revision_list: if not revision_states:
break break
min_revision_data = revision_data[revision_list[min_revision]]
max_revision_data = revision_data[revision_list[max_revision]]
if max_revision - min_revision <= 1: if max_revision - min_revision <= 1:
current_depot = min_revision_data['depot'] min_revision_state = revision_states[min_revision]
if min_revision_data['passed'] == '?': max_revision_state = revision_states[max_revision]
current_depot = min_revision_state.depot
# TODO(sergiyb): Under which conditions can first two branches be hit?
if min_revision_state.passed == '?':
next_revision_index = min_revision next_revision_index = min_revision
elif max_revision_data['passed'] == '?': elif max_revision_state.passed == '?':
next_revision_index = max_revision next_revision_index = max_revision
elif current_depot in ['android-chrome', 'cros', 'chromium', 'v8']: elif current_depot in ['android-chrome', 'cros', 'chromium', 'v8']:
previous_revision = revision_list[min_revision] previous_revision = revision_states[min_revision].revision
# If there were changes to any of the external libraries we track, # If there were changes to any of the external libraries we track,
# should bisect the changes there as well. # should bisect the changes there as well.
external_depot = self._FindNextDepotToBisect( external_depot = self._FindNextDepotToBisect(
current_depot, min_revision_data, max_revision_data) current_depot, min_revision_state, max_revision_state)
# If there was no change in any of the external depots, the search # If there was no change in any of the external depots, the search
# is over. # is over.
if not external_depot: if not external_depot:
...@@ -2527,33 +2266,30 @@ class BisectPerformanceMetrics(object): ...@@ -2527,33 +2266,30 @@ class BisectPerformanceMetrics(object):
'bleeding_edge.') 'bleeding_edge.')
break break
earliest_revision = max_revision_data['external'][external_depot] earliest_revision = max_revision_state.external[external_depot]
latest_revision = min_revision_data['external'][external_depot] latest_revision = min_revision_state.external[external_depot]
new_revision_list = self.PrepareToBisectOnDepot( new_revision_list = self.PrepareToBisectOnDepot(
external_depot, latest_revision, earliest_revision, external_depot, earliest_revision, latest_revision,
previous_revision) previous_revision)
if not new_revision_list: if not new_revision_list:
results.error = ('An error occurred attempting to retrieve ' error = ('An error occurred attempting to retrieve revision '
'revision range: [%s..%s]' % 'range: [%s..%s]' % (earliest_revision, latest_revision))
(earliest_revision, latest_revision)) return BisectResults(error=error)
return results
_AddRevisionsIntoRevisionData( revision_states = bisect_state.CreateRevisionStatesAfter(
new_revision_list, external_depot, min_revision_data['sort'], external_depot, new_revision_list, current_depot,
revision_data) previous_revision)
# Reset the bisection and perform it on the newly inserted # Reset the bisection and perform it on the newly inserted states.
# changelists.
revision_list = new_revision_list
min_revision = 0 min_revision = 0
max_revision = len(revision_list) - 1 max_revision = len(revision_states) - 1
sort_key_ids += len(revision_list)
print ('Regression in metric %s appears to be the result of ' print ('Regression in metric %s appears to be the result of '
'changes in [%s].' % (metric, external_depot)) 'changes in [%s].' % (metric, external_depot))
revision_list = [state.revision for state in revision_states]
self.PrintRevisionsToBisectMessage(revision_list, external_depot) self.PrintRevisionsToBisectMessage(revision_list, external_depot)
continue continue
...@@ -2563,36 +2299,34 @@ class BisectPerformanceMetrics(object): ...@@ -2563,36 +2299,34 @@ class BisectPerformanceMetrics(object):
next_revision_index = (int((max_revision - min_revision) / 2) + next_revision_index = (int((max_revision - min_revision) / 2) +
min_revision) min_revision)
next_revision_id = revision_list[next_revision_index] next_revision_state = revision_states[next_revision_index]
next_revision_data = revision_data[next_revision_id] next_revision = next_revision_state.revision
next_revision_depot = next_revision_data['depot'] next_depot = next_revision_state.depot
self.depot_registry.ChangeToDepotDir(next_revision_depot) self.depot_registry.ChangeToDepotDir(next_depot)
message = 'Working on [%s:%s]' % (next_depot, next_revision)
print message
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
step_name = 'Working on [%s]' % next_revision_id bisect_utils.OutputAnnotationStepStart(message)
bisect_utils.OutputAnnotationStepStart(step_name)
print 'Working on revision: [%s]' % next_revision_id run_results = self.RunTest(next_revision, next_depot, command_to_run,
metric, skippable=True)
run_results = self.RunTest(
next_revision_id, next_revision_depot, command_to_run, metric,
skippable=True)
# If the build is successful, check whether or not the metric # If the build is successful, check whether or not the metric
# had regressed. # had regressed.
if not run_results[1]: if not run_results[1]:
if len(run_results) > 2: if len(run_results) > 2:
next_revision_data['external'] = run_results[2] next_revision_state.external = run_results[2]
next_revision_data['perf_time'] = run_results[3] next_revision_state.perf_time = run_results[3]
next_revision_data['build_time'] = run_results[4] next_revision_state.build_time = run_results[4]
passed_regression = self._CheckIfRunPassed(run_results[0], passed_regression = self._CheckIfRunPassed(run_results[0],
known_good_value, known_good_value,
known_bad_value) known_bad_value)
next_revision_data['passed'] = passed_regression next_revision_state.passed = passed_regression
next_revision_data['value'] = run_results[0] next_revision_state.value = run_results[0]
if passed_regression: if passed_regression:
max_revision = next_revision_index max_revision = next_revision_index
...@@ -2600,313 +2334,28 @@ class BisectPerformanceMetrics(object): ...@@ -2600,313 +2334,28 @@ class BisectPerformanceMetrics(object):
min_revision = next_revision_index min_revision = next_revision_index
else: else:
if run_results[1] == BUILD_RESULT_SKIPPED: if run_results[1] == BUILD_RESULT_SKIPPED:
next_revision_data['passed'] = 'Skipped' next_revision_state.passed = 'Skipped'
elif run_results[1] == BUILD_RESULT_FAIL: elif run_results[1] == BUILD_RESULT_FAIL:
next_revision_data['passed'] = 'Build Failed' next_revision_state.passed = 'Build Failed'
print run_results[0] print run_results[0]
# If the build is broken, remove it and redo search. # If the build is broken, remove it and redo search.
revision_list.pop(next_revision_index) revision_states.pop(next_revision_index)
max_revision -= 1 max_revision -= 1
if self.opts.output_buildbot_annotations: if self.opts.output_buildbot_annotations:
self._PrintPartialResults(results) bisect_printer.PrintPartialResults(bisect_state)
bisect_utils.OutputAnnotationStepClosed() bisect_utils.OutputAnnotationStepClosed()
else:
# Weren't able to sync and retrieve the revision range.
results.error = ('An error occurred attempting to retrieve revision '
'range: [%s..%s]' % (good_revision, bad_revision))
return results
def _PrintPartialResults(self, results):
results_dict = results.GetResultsDict()
self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
results_dict['first_working_revision'],
results_dict['last_broken_revision'],
100, final_step=False)
def _ConfidenceLevelStatus(self, results_dict): return BisectResults(bisect_state, self.depot_registry, self.opts,
if not results_dict['confidence']: self.warnings)
return None
confidence_status = 'Successful with %(level)s confidence%(warning)s.'
if results_dict['confidence'] >= HIGH_CONFIDENCE:
level = 'high'
else: else:
level = 'low' # Weren't able to sync and retrieve the revision range.
warning = ' and warnings' error = ('An error occurred attempting to retrieve revision range: '
if not self.warnings: '[%s..%s]' % (good_revision, bad_revision))
warning = '' return BisectResults(error=error)
return confidence_status % {'level': level, 'warning': warning}
def _GetViewVCLinkFromDepotAndHash(self, revision_id, depot):
"""Gets link to the repository browser."""
info = source_control.QueryRevisionInfo(revision_id,
self.depot_registry.GetDepotDir(depot))
if depot and DEPOT_DEPS_NAME[depot].has_key('viewvc'):
try:
# Format is "git-svn-id: svn://....@123456 <other data>"
svn_line = [i for i in info['body'].splitlines() if 'git-svn-id:' in i]
svn_revision = svn_line[0].split('@')
svn_revision = svn_revision[1].split(' ')[0]
return DEPOT_DEPS_NAME[depot]['viewvc'] + svn_revision
except IndexError:
return ''
return ''
def _PrintRevisionInfo(self, cl, info, depot=None):
email_info = ''
if not info['email'].startswith(info['author']):
email_info = '\nEmail : %s' % info['email']
commit_link = self._GetViewVCLinkFromDepotAndHash(cl, depot)
if commit_link:
commit_info = '\nLink : %s' % commit_link
else:
commit_info = ('\nFailed to parse SVN revision from body:\n%s' %
info['body'])
print RESULTS_REVISION_INFO % {
'subject': info['subject'],
'author': info['author'],
'email_info': email_info,
'commit_info': commit_info,
'cl': cl,
'cl_date': info['date']
}
def _PrintTestedCommitsHeader(self):
if self.opts.bisect_mode == BISECT_MODE_MEAN:
_PrintTableRow(
[20, 12, 70, 14, 12, 13],
['Depot', 'Position', 'SHA', 'Mean', 'Std. Error', 'State'])
elif self.opts.bisect_mode == BISECT_MODE_STD_DEV:
_PrintTableRow(
[20, 12, 70, 14, 12, 13],
['Depot', 'Position', 'SHA', 'Std. Error', 'Mean', 'State'])
elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE:
_PrintTableRow(
[20, 12, 70, 14, 13],
['Depot', 'Position', 'SHA', 'Return Code', 'State'])
else:
assert False, 'Invalid bisect_mode specified.'
def _PrintTestedCommitsEntry(self, current_data, commit_position, cl_link,
state_str):
if self.opts.bisect_mode == BISECT_MODE_MEAN:
std_error = '+-%.02f' % current_data['value']['std_err']
mean = '%.02f' % current_data['value']['mean']
_PrintTableRow(
[20, 12, 70, 12, 14, 13],
[current_data['depot'], commit_position, cl_link, mean, std_error,
state_str])
elif self.opts.bisect_mode == BISECT_MODE_STD_DEV:
std_error = '+-%.02f' % current_data['value']['std_err']
mean = '%.02f' % current_data['value']['mean']
_PrintTableRow(
[20, 12, 70, 12, 14, 13],
[current_data['depot'], commit_position, cl_link, std_error, mean,
state_str])
elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE:
mean = '%d' % current_data['value']['mean']
_PrintTableRow(
[20, 12, 70, 14, 13],
[current_data['depot'], commit_position, cl_link, mean,
state_str])
def _PrintTestedCommitsTable(
self, revision_data_sorted, first_working_revision, last_broken_revision,
confidence, final_step=True):
print
if final_step:
print '===== TESTED COMMITS ====='
else:
print '===== PARTIAL RESULTS ====='
self._PrintTestedCommitsHeader()
state = 0
for current_id, current_data in revision_data_sorted:
if current_data['value']:
if (current_id == last_broken_revision or
current_id == first_working_revision):
# If confidence is too low, don't add this empty line since it's
# used to put focus on a suspected CL.
if confidence and final_step:
print
state += 1
if state == 2 and not final_step:
# Just want a separation between "bad" and "good" cl's.
print
state_str = 'Bad'
if state == 1 and final_step:
state_str = 'Suspected CL'
elif state == 2:
state_str = 'Good'
# If confidence is too low, don't bother outputting good/bad.
if not confidence:
state_str = ''
state_str = state_str.center(13, ' ')
cl_link = self._GetViewVCLinkFromDepotAndHash(current_id,
current_data['depot'])
if not cl_link:
cl_link = current_id
commit_position = source_control.GetCommitPosition(
current_id, self.depot_registry.GetDepotDir(current_data['depot']))
commit_position = str(commit_position)
if not commit_position:
commit_position = ''
self._PrintTestedCommitsEntry(current_data, commit_position, cl_link,
state_str)
def _PrintReproSteps(self):
"""Prints out a section of the results explaining how to run the test.
This message includes the command used to run the test.
"""
command = '$ ' + self.opts.command
if bisect_utils.IsTelemetryCommand(self.opts.command):
command += ('\nAlso consider passing --profiler=list to see available '
'profilers.')
print REPRO_STEPS_LOCAL
if bisect_utils.IsTelemetryCommand(self.opts.command):
telemetry_command = re.sub(r'--browser=[^\s]+',
'--browser=<bot-name>',
command)
print REPRO_STEPS_TRYJOB_TELEMETRY % {'command': telemetry_command}
else:
print REPRO_STEPS_TRYJOB
def _PrintOtherRegressions(self, other_regressions, revision_data):
"""Prints a section of the results about other potential regressions."""
print
print 'Other regressions may have occurred:'
print ' %8s %70s %10s' % ('Depot'.center(8, ' '),
'Range'.center(70, ' '), 'Confidence'.center(10, ' '))
for regression in other_regressions:
current_id, previous_id, confidence = regression
current_data = revision_data[current_id]
previous_data = revision_data[previous_id]
current_link = self._GetViewVCLinkFromDepotAndHash(current_id,
current_data['depot'])
previous_link = self._GetViewVCLinkFromDepotAndHash(previous_id,
previous_data['depot'])
# If we can't map it to a viewable URL, at least show the original hash.
if not current_link:
current_link = current_id
if not previous_link:
previous_link = previous_id
print ' %8s %70s %s' % (
current_data['depot'], current_link,
('%d%%' % confidence).center(10, ' '))
print ' %8s %70s' % (
previous_data['depot'], previous_link)
print
def _CheckForWarnings(self, results_dict):
if len(results_dict['culprit_revisions']) > 1:
self.warnings.append('Due to build errors, regression range could '
'not be narrowed down to a single commit.')
if self.opts.repeat_test_count == 1:
self.warnings.append('Tests were only set to run once. This may '
'be insufficient to get meaningful results.')
if 0 < results_dict['confidence'] < HIGH_CONFIDENCE:
self.warnings.append('Confidence is not high. Try bisecting again '
'with increased repeat_count, larger range, or '
'on another metric.')
if not results_dict['confidence']:
self.warnings.append('Confidence score is 0%. Try bisecting again on '
'another platform or another metric.')
def FormatAndPrintResults(self, bisect_results):
"""Prints the results from a bisection run in a readable format.
Args:
bisect_results: The results from a bisection test run.
"""
results_dict = bisect_results.GetResultsDict()
self._CheckForWarnings(results_dict)
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepStart('Build Status Per Revision')
print
print 'Full results of bisection:'
for current_id, current_data in results_dict['revision_data_sorted']:
build_status = current_data['passed']
if type(build_status) is bool:
if build_status:
build_status = 'Good'
else:
build_status = 'Bad'
print ' %20s %40s %s' % (current_data['depot'],
current_id, build_status)
print
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepClosed()
# The perf dashboard scrapes the "results" step in order to comment on
# bugs. If you change this, please update the perf dashboard as well.
bisect_utils.OutputAnnotationStepStart('Results')
self._PrintBanner(results_dict)
self._PrintWarnings()
if results_dict['culprit_revisions'] and results_dict['confidence']:
for culprit in results_dict['culprit_revisions']:
cl, info, depot = culprit
self._PrintRevisionInfo(cl, info, depot)
if results_dict['other_regressions']:
self._PrintOtherRegressions(results_dict['other_regressions'],
results_dict['revision_data'])
self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
results_dict['first_working_revision'],
results_dict['last_broken_revision'],
results_dict['confidence'])
_PrintStepTime(results_dict['revision_data_sorted'])
self._PrintReproSteps()
_PrintThankYou()
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepClosed()
def _PrintBanner(self, results_dict):
if self._IsBisectModeReturnCode():
metrics = 'N/A'
change = 'Yes'
else:
metrics = '/'.join(self.opts.metric)
change = '%.02f%% (+/-%.02f%%)' % (
results_dict['regression_size'], results_dict['regression_std_err'])
if results_dict['culprit_revisions'] and results_dict['confidence']:
status = self._ConfidenceLevelStatus(results_dict)
else:
status = 'Failure, could not reproduce.'
change = 'Bisect could not reproduce a change.'
print RESULTS_BANNER % {
'status': status,
'command': self.opts.command,
'metrics': metrics,
'change': change,
'confidence': results_dict['confidence'],
}
def _PrintWarnings(self):
"""Prints a list of warning strings if there are any."""
if not self.warnings:
return
print
print 'WARNINGS:'
for w in set(self.warnings):
print ' ! %s' % w
def _IsPlatformSupported(): def _IsPlatformSupported():
...@@ -2982,7 +2431,7 @@ class BisectOptions(object): ...@@ -2982,7 +2431,7 @@ class BisectOptions(object):
self.target_build_type = 'Release' self.target_build_type = 'Release'
self.builder_host = None self.builder_host = None
self.builder_port = None self.builder_port = None
self.bisect_mode = BISECT_MODE_MEAN self.bisect_mode = bisect_utils.BISECT_MODE_MEAN
self.improvement_direction = 0 self.improvement_direction = 0
@staticmethod @staticmethod
...@@ -3047,9 +2496,10 @@ class BisectOptions(object): ...@@ -3047,9 +2496,10 @@ class BisectOptions(object):
'discarded).') 'discarded).')
group.add_option('--bisect_mode', group.add_option('--bisect_mode',
type='choice', type='choice',
choices=[BISECT_MODE_MEAN, BISECT_MODE_STD_DEV, choices=[bisect_utils.BISECT_MODE_MEAN,
BISECT_MODE_RETURN_CODE], bisect_utils.BISECT_MODE_STD_DEV,
default=BISECT_MODE_MEAN, bisect_utils.BISECT_MODE_RETURN_CODE],
default=bisect_utils.BISECT_MODE_MEAN,
help='The bisect mode. Choices are to bisect on the ' help='The bisect mode. Choices are to bisect on the '
'difference in mean, std_dev, or return_code.') 'difference in mean, std_dev, or return_code.')
parser.add_option_group(group) parser.add_option_group(group)
...@@ -3167,7 +2617,8 @@ class BisectOptions(object): ...@@ -3167,7 +2617,8 @@ class BisectOptions(object):
if not opts.bad_revision: if not opts.bad_revision:
raise RuntimeError('missing required parameter: --bad_revision') raise RuntimeError('missing required parameter: --bad_revision')
if not opts.metric and opts.bisect_mode != BISECT_MODE_RETURN_CODE: if (not opts.metric and
opts.bisect_mode != bisect_utils.BISECT_MODE_RETURN_CODE):
raise RuntimeError('missing required parameter: --metric') raise RuntimeError('missing required parameter: --metric')
if opts.gs_bucket: if opts.gs_bucket:
...@@ -3194,7 +2645,7 @@ class BisectOptions(object): ...@@ -3194,7 +2645,7 @@ class BisectOptions(object):
if not opts.working_directory: if not opts.working_directory:
raise RuntimeError('missing required parameter: --working_directory') raise RuntimeError('missing required parameter: --working_directory')
if opts.bisect_mode != BISECT_MODE_RETURN_CODE: if opts.bisect_mode != bisect_utils.BISECT_MODE_RETURN_CODE:
metric_values = opts.metric.split('/') metric_values = opts.metric.split('/')
if len(metric_values) != 2: if len(metric_values) != 2:
raise RuntimeError('Invalid metric specified: [%s]' % opts.metric) raise RuntimeError('Invalid metric specified: [%s]' % opts.metric)
...@@ -3230,7 +2681,7 @@ class BisectOptions(object): ...@@ -3230,7 +2681,7 @@ class BisectOptions(object):
assert hasattr(opts, k), 'Invalid %s attribute in BisectOptions.' % k assert hasattr(opts, k), 'Invalid %s attribute in BisectOptions.' % k
setattr(opts, k, v) setattr(opts, k, v)
if opts.metric and opts.bisect_mode != BISECT_MODE_RETURN_CODE: if opts.metric and opts.bisect_mode != bisect_utils.BISECT_MODE_RETURN_CODE:
metric_values = opts.metric.split('/') metric_values = opts.metric.split('/')
if len(metric_values) != 2: if len(metric_values) != 2:
raise RuntimeError('Invalid metric specified: [%s]' % opts.metric) raise RuntimeError('Invalid metric specified: [%s]' % opts.metric)
...@@ -3254,7 +2705,7 @@ def main(): ...@@ -3254,7 +2705,7 @@ def main():
extra_src = bisect_utils.LoadExtraSrc(opts.extra_src) extra_src = bisect_utils.LoadExtraSrc(opts.extra_src)
if not extra_src: if not extra_src:
raise RuntimeError('Invalid or missing --extra_src.') raise RuntimeError('Invalid or missing --extra_src.')
_AddAdditionalDepotInfo(extra_src.GetAdditionalDepotInfo()) bisect_utils.AddAdditionalDepotInfo(extra_src.GetAdditionalDepotInfo())
if opts.working_directory: if opts.working_directory:
custom_deps = bisect_utils.DEFAULT_GCLIENT_CUSTOM_DEPS custom_deps = bisect_utils.DEFAULT_GCLIENT_CUSTOM_DEPS
...@@ -3280,14 +2731,13 @@ def main(): ...@@ -3280,14 +2731,13 @@ def main():
not opts.working_directory): not opts.working_directory):
raise RuntimeError('You must switch to master branch to run bisection.') raise RuntimeError('You must switch to master branch to run bisection.')
bisect_test = BisectPerformanceMetrics(opts) bisect_test = BisectPerformanceMetrics(opts)
bisect_printer = BisectPrinter(opts, bisect_test.depot_registry)
try: try:
bisect_results = bisect_test.Run(opts.command, results = bisect_test.Run(opts.command, opts.bad_revision,
opts.bad_revision, opts.good_revision, opts.metric)
opts.good_revision, if results.error:
opts.metric) raise RuntimeError(results.error)
if bisect_results.error: bisect_printer.FormatAndPrintResults(results)
raise RuntimeError(bisect_results.error)
bisect_test.FormatAndPrintResults(bisect_results)
return 0 return 0
finally: finally:
bisect_test.PerformCleanup() bisect_test.PerformCleanup()
......
...@@ -12,7 +12,8 @@ SRC = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir) ...@@ -12,7 +12,8 @@ SRC = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)
sys.path.append(os.path.join(SRC, 'third_party', 'pymock')) sys.path.append(os.path.join(SRC, 'third_party', 'pymock'))
import bisect_perf_regression import bisect_perf_regression
import bisect_results import bisect_printer
import bisect_utils
import mock import mock
import source_control import source_control
...@@ -26,15 +27,13 @@ DEFAULT_OPTIONS = { ...@@ -26,15 +27,13 @@ DEFAULT_OPTIONS = {
'metric': 'fake/metric', 'metric': 'fake/metric',
'good_revision': 280000, 'good_revision': 280000,
'bad_revision': 280005, 'bad_revision': 280005,
} }
def _GetBisectPerformanceMetricsInstance(options_dict): def _GetBisectPerformanceMetricsInstance(options_dict):
"""Returns an instance of the BisectPerformanceMetrics class.""" """Returns an instance of the BisectPerformanceMetrics class."""
bisect_options = bisect_perf_regression.BisectOptions.FromDict(options_dict) opts = bisect_perf_regression.BisectOptions.FromDict(options_dict)
bisect_instance = bisect_perf_regression.BisectPerformanceMetrics( return bisect_perf_regression.BisectPerformanceMetrics(opts)
bisect_options)
return bisect_instance
def _GetExtendedOptions(d, f): def _GetExtendedOptions(d, f):
...@@ -61,13 +60,14 @@ def _GenericDryRun(options, print_results=False): ...@@ -61,13 +60,14 @@ def _GenericDryRun(options, print_results=False):
try: try:
shutil.rmtree = lambda path, onerror: None shutil.rmtree = lambda path, onerror: None
bisect_instance = _GetBisectPerformanceMetricsInstance(options) bisect_instance = _GetBisectPerformanceMetricsInstance(options)
results = bisect_instance.Run(bisect_instance.opts.command, results = bisect_instance.Run(
bisect_instance.opts.bad_revision, bisect_instance.opts.command, bisect_instance.opts.bad_revision,
bisect_instance.opts.good_revision, bisect_instance.opts.good_revision, bisect_instance.opts.metric)
bisect_instance.opts.metric)
if print_results: if print_results:
bisect_instance.FormatAndPrintResults(results) printer = bisect_printer.BisectPrinter(bisect_instance.opts,
bisect_instance.depot_registry)
printer.FormatAndPrintResults(results)
return results return results
finally: finally:
...@@ -85,74 +85,6 @@ class BisectPerfRegressionTest(unittest.TestCase): ...@@ -85,74 +85,6 @@ class BisectPerfRegressionTest(unittest.TestCase):
def tearDown(self): def tearDown(self):
os.chdir(self.cwd) os.chdir(self.cwd)
def _AssertConfidence(self, score, bad_values, good_values):
"""Checks whether the given sets of values have a given confidence score.
The score represents our confidence that the two sets of values wouldn't
be as different as they are just by chance; that is, that some real change
occurred between the two sets of values.
Args:
score: Expected confidence score.
bad_values: First list of numbers.
good_values: Second list of numbers.
"""
# ConfidenceScore takes a list of lists but these lists are flattened
# inside the function.
confidence = bisect_results.ConfidenceScore(
[[v] for v in bad_values],
[[v] for v in good_values])
self.assertEqual(score, confidence)
def testConfidenceScore_ZeroConfidence(self):
# The good and bad sets contain the same values, so the confidence that
# they're different should be zero.
self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
def testConfidenceScore_MediumConfidence(self):
self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
def testConfidenceScore_HighConfidence(self):
self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
def testConfidenceScore_VeryHighConfidence(self):
# Confidence is high if the two sets of values have no internal variance.
self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
def testConfidenceScore_UnbalancedSampleSize(self):
# The second set of numbers only contains one number, so confidence is 0.
self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
def testConfidenceScore_EmptySample(self):
# Confidence is zero if either or both samples are empty.
self._AssertConfidence(0.0, [], [])
self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
def testConfidenceScore_FunctionalTestResults(self):
self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
def testConfidenceScore_RealWorldCases(self):
"""This method contains a set of data from actual bisect results.
The confidence scores asserted below were all copied from the actual
results, so the purpose of this test method is mainly to show what the
results for real cases are, and compare when we change the confidence
score function in the future.
"""
self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
self._AssertConfidence(80, [67, 68], [65, 65, 67])
self._AssertConfidence(0, [514], [514])
self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
self._AssertConfidence(0, [1999004, 1999627], [223355])
self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
def testParseDEPSStringManually(self): def testParseDEPSStringManually(self):
"""Tests DEPS parsing.""" """Tests DEPS parsing."""
deps_file_contents = """ deps_file_contents = """
...@@ -249,7 +181,7 @@ class BisectPerfRegressionTest(unittest.TestCase): ...@@ -249,7 +181,7 @@ class BisectPerfRegressionTest(unittest.TestCase):
bisect_options) bisect_options)
bisect_instance.opts.target_platform = target_platform bisect_instance.opts.target_platform = target_platform
git_revision = source_control.ResolveToRevision( git_revision = source_control.ResolveToRevision(
revision, 'chromium', bisect_perf_regression.DEPOT_DEPS_NAME, 100) revision, 'chromium', bisect_utils.DEPOT_DEPS_NAME, 100)
depot = 'chromium' depot = 'chromium'
command = bisect_instance.GetCompatibleCommand( command = bisect_instance.GetCompatibleCommand(
original_command, git_revision, depot) original_command, git_revision, depot)
...@@ -320,7 +252,6 @@ class BisectPerfRegressionTest(unittest.TestCase): ...@@ -320,7 +252,6 @@ class BisectPerfRegressionTest(unittest.TestCase):
results = _GenericDryRun(_GetExtendedOptions(1, -100)) results = _GenericDryRun(_GetExtendedOptions(1, -100))
self.assertIsNone(results.error) self.assertIsNone(results.error)
def testGetCommitPosition(self): def testGetCommitPosition(self):
cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531' cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'
self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev)) self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev))
...@@ -366,18 +297,18 @@ class DepotDirectoryRegistryTest(unittest.TestCase): ...@@ -366,18 +297,18 @@ class DepotDirectoryRegistryTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.old_chdir = os.chdir self.old_chdir = os.chdir
os.chdir = self.mockChdir os.chdir = self.mockChdir
self.old_depot_names = bisect_perf_regression.DEPOT_NAMES self.old_depot_names = bisect_utils.DEPOT_NAMES
bisect_perf_regression.DEPOT_NAMES = ['mock_depot'] bisect_utils.DEPOT_NAMES = ['mock_depot']
self.old_depot_deps_name = bisect_perf_regression.DEPOT_DEPS_NAME self.old_depot_deps_name = bisect_utils.DEPOT_DEPS_NAME
bisect_perf_regression.DEPOT_DEPS_NAME = {'mock_depot': {'src': 'src/foo'}} bisect_utils.DEPOT_DEPS_NAME = {'mock_depot': {'src': 'src/foo'}}
self.registry = bisect_perf_regression.DepotDirectoryRegistry('/mock/src') self.registry = bisect_perf_regression.DepotDirectoryRegistry('/mock/src')
self.cur_dir = None self.cur_dir = None
def tearDown(self): def tearDown(self):
os.chdir = self.old_chdir os.chdir = self.old_chdir
bisect_perf_regression.DEPOT_NAMES = self.old_depot_names bisect_utils.DEPOT_NAMES = self.old_depot_names
bisect_perf_regression.DEPOT_DEPS_NAME = self.old_depot_deps_name bisect_utils.DEPOT_DEPS_NAME = self.old_depot_deps_name
def mockChdir(self, new_dir): def mockChdir(self, new_dir):
self.cur_dir = new_dir self.cur_dir = new_dir
......
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This file contains printing-related functionality of the bisect."""
import datetime
import re
from bisect_results import BisectResults
import bisect_utils
import source_control
# The perf dashboard looks for a string like "Estimated Confidence: 95%"
# to decide whether or not to cc the author(s). If you change this, please
# update the perf dashboard as well.
RESULTS_BANNER = """
===== BISECT JOB RESULTS =====
Status: %(status)s
Test Command: %(command)s
Test Metric: %(metrics)s
Relative Change: %(change)s
Estimated Confidence: %(confidence).02f%%"""
# The perf dashboard specifically looks for the string
# "Author : " to parse out who to cc on a bug. If you change the
# formatting here, please update the perf dashboard as well.
RESULTS_REVISION_INFO = """
===== SUSPECTED CL(s) =====
Subject : %(subject)s
Author : %(author)s%(email_info)s%(commit_info)s
Commit : %(cl)s
Date : %(cl_date)s"""
RESULTS_THANKYOU = """
| O O | Visit http://www.chromium.org/developers/core-principles for Chrome's
| X | policy on perf regressions. Contact chrome-perf-dashboard-team with any
| / \ | questions or suggestions about bisecting. THANK YOU."""
REPRO_STEPS_LOCAL = """
==== INSTRUCTIONS TO REPRODUCE ====
To run locally:
- Use the test command given under 'BISECT JOB RESULTS' above.
- Consider using a profiler. Pass --profiler=list to list available profilers.
"""
REPRO_STEPS_TRYJOB = """
To reproduce on a performance try bot:
1. Edit run-perf-test.cfg
2. $ git try -b <bot> --svn_repo='svn://svn.chromium.org/chrome-try/try-perf'
Notes:
a) Follow the in-file instructions in run-perf-test.cfg.
b) run-perf-test.cfg is under tools/ or under third_party/WebKit/Tools.
c) Do your edits preferably under a new git branch.
d) --browser=release and --browser=android-chromium-testshell are supported
depending on the platform (desktop|android).
e) Strip any src/ directories from the head of relative path names.
f) Make sure to use the appropriate bot on step 3.
For more details please visit
https://sites.google.com/a/chromium.org/dev/developers/performance-try-bots"""
REPRO_STEPS_TRYJOB_TELEMETRY = """
To reproduce on a performance try bot:
%(command)s
(Where <bot-name> comes from tools/perf/run_benchmark --browser=list)
For more details please visit
https://sites.google.com/a/chromium.org/dev/developers/performance-try-bots
"""
class BisectPrinter(object):
def __init__(self, opts, depot_registry):
self.opts = opts
self.depot_registry = depot_registry
def FormatAndPrintResults(self, bisect_results):
"""Prints the results from a bisection run in a readable format.
Also prints annotations creating buildbot step "Results".
Args:
bisect_results: BisectResult object containing results to be printed.
"""
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepStart('Build Status Per Revision')
print
print 'Full results of bisection:'
for revision_state in bisect_results.state.GetRevisionStates():
build_status = revision_state.passed
if type(build_status) is bool:
if build_status:
build_status = 'Good'
else:
build_status = 'Bad'
print ' %20s %40s %s' % (revision_state.depot, revision_state.revision,
build_status)
print
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepClosed()
# The perf dashboard scrapes the "results" step in order to comment on
# bugs. If you change this, please update the perf dashboard as well.
bisect_utils.OutputAnnotationStepStart('Results')
self._PrintBanner(bisect_results)
self._PrintWarnings(bisect_results.warnings)
if bisect_results.culprit_revisions and bisect_results.confidence:
for culprit in bisect_results.culprit_revisions:
cl, info, depot = culprit
self._PrintRevisionInfo(cl, info, depot)
if bisect_results.other_regressions:
self._PrintOtherRegressions(bisect_results.other_regressions)
self._PrintTestedCommitsTable(bisect_results.state.GetRevisionStates(),
bisect_results.first_working_revision,
bisect_results.last_broken_revision,
bisect_results.confidence)
self._PrintStepTime(bisect_results.state.GetRevisionStates())
self._PrintReproSteps()
self._PrintThankYou()
if self.opts.output_buildbot_annotations:
bisect_utils.OutputAnnotationStepClosed()
def PrintPartialResults(self, bisect_state):
revision_states = bisect_state.GetRevisionStates()
first_working_rev, last_broken_rev = BisectResults.FindBreakingRevRange(
revision_states)
self._PrintTestedCommitsTable(revision_states, first_working_rev,
last_broken_rev, 100, final_step=False)
@staticmethod
def _PrintThankYou():
print RESULTS_THANKYOU
@staticmethod
def _PrintStepTime(revision_states):
"""Prints information about how long various steps took.
Args:
revision_states: Ordered list of revision states."""
step_perf_time_avg = 0.0
step_build_time_avg = 0.0
step_count = 0.0
for revision_state in revision_states:
if revision_state.value:
step_perf_time_avg += revision_state.perf_time
step_build_time_avg += revision_state.build_time
step_count += 1
if step_count:
step_perf_time_avg = step_perf_time_avg / step_count
step_build_time_avg = step_build_time_avg / step_count
print
print 'Average build time : %s' % datetime.timedelta(
seconds=int(step_build_time_avg))
print 'Average test time : %s' % datetime.timedelta(
seconds=int(step_perf_time_avg))
def _GetViewVCLinkFromDepotAndHash(self, revision_id, depot):
"""Gets link to the repository browser."""
info = source_control.QueryRevisionInfo(revision_id,
self.depot_registry.GetDepotDir(depot))
if depot and bisect_utils.DEPOT_DEPS_NAME[depot].has_key('viewvc'):
try:
# Format is "git-svn-id: svn://....@123456 <other data>"
svn_line = [i for i in info['body'].splitlines() if 'git-svn-id:' in i]
svn_revision = svn_line[0].split('@')
svn_revision = svn_revision[1].split(' ')[0]
return bisect_utils.DEPOT_DEPS_NAME[depot]['viewvc'] + svn_revision
except IndexError:
return ''
return ''
def _PrintRevisionInfo(self, cl, info, depot=None):
email_info = ''
if not info['email'].startswith(info['author']):
email_info = '\nEmail : %s' % info['email']
commit_link = self._GetViewVCLinkFromDepotAndHash(cl, depot)
if commit_link:
commit_info = '\nLink : %s' % commit_link
else:
commit_info = ('\nFailed to parse SVN revision from body:\n%s' %
info['body'])
print RESULTS_REVISION_INFO % {
'subject': info['subject'],
'author': info['author'],
'email_info': email_info,
'commit_info': commit_info,
'cl': cl,
'cl_date': info['date']
}
@staticmethod
def _PrintTableRow(column_widths, row_data):
"""Prints out a row in a formatted table that has columns aligned.
Args:
column_widths: A list of column width numbers.
row_data: A list of items for each column in this row.
"""
assert len(column_widths) == len(row_data)
text = ''
for i in xrange(len(column_widths)):
current_row_data = row_data[i].center(column_widths[i], ' ')
text += ('%%%ds' % column_widths[i]) % current_row_data
print text
def _PrintTestedCommitsHeader(self):
if self.opts.bisect_mode == bisect_utils.BISECT_MODE_MEAN:
self._PrintTableRow(
[20, 12, 70, 14, 12, 13],
['Depot', 'Position', 'SHA', 'Mean', 'Std. Error', 'State'])
elif self.opts.bisect_mode == bisect_utils.BISECT_MODE_STD_DEV:
self._PrintTableRow(
[20, 12, 70, 14, 12, 13],
['Depot', 'Position', 'SHA', 'Std. Error', 'Mean', 'State'])
elif self.opts.bisect_mode == bisect_utils.BISECT_MODE_RETURN_CODE:
self._PrintTableRow(
[20, 12, 70, 14, 13],
['Depot', 'Position', 'SHA', 'Return Code', 'State'])
else:
assert False, 'Invalid bisect_mode specified.'
def _PrintTestedCommitsEntry(self, revision_state, commit_position, cl_link,
state_str):
if self.opts.bisect_mode == bisect_utils.BISECT_MODE_MEAN:
std_error = '+-%.02f' % revision_state.value['std_err']
mean = '%.02f' % revision_state.value['mean']
self._PrintTableRow(
[20, 12, 70, 12, 14, 13],
[revision_state.depot, commit_position, cl_link, mean, std_error,
state_str])
elif self.opts.bisect_mode == bisect_utils.BISECT_MODE_STD_DEV:
std_error = '+-%.02f' % revision_state.value['std_err']
mean = '%.02f' % revision_state.value['mean']
self._PrintTableRow(
[20, 12, 70, 12, 14, 13],
[revision_state.depot, commit_position, cl_link, std_error, mean,
state_str])
elif self.opts.bisect_mode == bisect_utils.BISECT_MODE_RETURN_CODE:
mean = '%d' % revision_state.value['mean']
self._PrintTableRow(
[20, 12, 70, 14, 13],
[revision_state.depot, commit_position, cl_link, mean,
state_str])
def _PrintTestedCommitsTable(
self, revision_states, first_working_revision, last_broken_revision,
confidence, final_step=True):
print
if final_step:
print '===== TESTED COMMITS ====='
else:
print '===== PARTIAL RESULTS ====='
self._PrintTestedCommitsHeader()
state = 0
for revision_state in revision_states:
if revision_state.value:
if (revision_state == last_broken_revision or
revision_state == first_working_revision):
# If confidence is too low, don't add this empty line since it's
# used to put focus on a suspected CL.
if confidence and final_step:
print
state += 1
if state == 2 and not final_step:
# Just want a separation between "bad" and "good" cl's.
print
state_str = 'Bad'
if state == 1 and final_step:
state_str = 'Suspected CL'
elif state == 2:
state_str = 'Good'
# If confidence is too low, don't bother outputting good/bad.
if not confidence:
state_str = ''
state_str = state_str.center(13, ' ')
cl_link = self._GetViewVCLinkFromDepotAndHash(revision_state.revision,
revision_state.depot)
if not cl_link:
cl_link = revision_state.revision
commit_position = source_control.GetCommitPosition(
revision_state.revision,
self.depot_registry.GetDepotDir(revision_state.depot))
commit_position = str(commit_position)
if not commit_position:
commit_position = ''
self._PrintTestedCommitsEntry(revision_state, commit_position, cl_link,
state_str)
def _PrintReproSteps(self):
"""Prints out a section of the results explaining how to run the test.
This message includes the command used to run the test.
"""
command = '$ ' + self.opts.command
if bisect_utils.IsTelemetryCommand(self.opts.command):
command += ('\nAlso consider passing --profiler=list to see available '
'profilers.')
print REPRO_STEPS_LOCAL
if bisect_utils.IsTelemetryCommand(self.opts.command):
telemetry_command = re.sub(r'--browser=[^\s]+',
'--browser=<bot-name>',
command)
print REPRO_STEPS_TRYJOB_TELEMETRY % {'command': telemetry_command}
else:
print REPRO_STEPS_TRYJOB
def _PrintOtherRegressions(self, other_regressions):
"""Prints a section of the results about other potential regressions."""
print
print 'Other regressions may have occurred:'
print ' %8s %70s %10s' % ('Depot'.center(8, ' '),
'Range'.center(70, ' '), 'Confidence'.center(10, ' '))
for regression in other_regressions:
current_rev_state, prev_rev_state, confidence = regression
current_link = self._GetViewVCLinkFromDepotAndHash(
current_rev_state.revision,
current_rev_state.depot)
previous_link = self._GetViewVCLinkFromDepotAndHash(
prev_rev_state.revision,
prev_rev_state.depot)
# If we can't map it to a viewable URL, at least show the original hash.
if not current_link:
current_link = current_rev_state.revision
if not previous_link:
previous_link = prev_rev_state.revision
print ' %8s %70s %s' % (current_rev_state.depot, current_link,
('%d%%' % confidence).center(10, ' '))
print ' %8s %70s' % (prev_rev_state.depot, previous_link)
print
@staticmethod
def _ConfidenceLevelStatus(bisect_results):
if not bisect_results.confidence:
return None
confidence_status = 'Successful with %(level)s confidence%(warning)s.'
if bisect_results.confidence >= bisect_utils.HIGH_CONFIDENCE:
level = 'high'
else:
level = 'low'
warning = ' and warnings'
if not bisect_results.warnings:
warning = ''
return confidence_status % {'level': level, 'warning': warning}
def _PrintBanner(self, bisect_results):
if self.opts.bisect_mode == bisect_utils.BISECT_MODE_RETURN_CODE:
metrics = 'N/A'
change = 'Yes'
else:
metrics = '/'.join(self.opts.metric)
change = '%.02f%% (+/-%.02f%%)' % (
bisect_results.regression_size, bisect_results.regression_std_err)
if bisect_results.culprit_revisions and bisect_results.confidence:
status = self._ConfidenceLevelStatus(bisect_results)
else:
status = 'Failure, could not reproduce.'
change = 'Bisect could not reproduce a change.'
print RESULTS_BANNER % {
'status': status,
'command': self.opts.command,
'metrics': metrics,
'change': change,
'confidence': bisect_results.confidence,
}
@staticmethod
def _PrintWarnings(warnings):
"""Prints a list of warning strings if there are any."""
if not warnings:
return
print
print 'WARNINGS:'
for w in set(warnings):
print ' ! %s' % w
...@@ -11,7 +11,106 @@ import source_control ...@@ -11,7 +11,106 @@ import source_control
import ttest import ttest
def ConfidenceScore(good_results_lists, bad_results_lists): class BisectResults(object):
"""Contains results of the completed bisect.
Properties:
error: Error message if the bisect failed.
If the error is None, the following properties are present:
warnings: List of warnings from the bisect run.
state: BisectState object from which these results were generated.
first_working_revision: First good revision.
last_broken_revision: Last bad revision.
If both of above revisions are not None, the follow properties are present:
culprit_revisions: A list of revisions, which contain the bad change
introducing the failure.
other_regressions: A list of tuples representing other regressions, which
may have occured.
regression_size: For performance bisects, this is a relative change of
the mean metric value. For other bisects this field always contains
'zero-to-nonzero'.
regression_std_err: For performance bisects, it is a pooled standard error
for groups of good and bad runs. Not used for other bisects.
confidence: For performance bisects, it is a confidence that the good and
bad runs are distinct groups. Not used for non-performance bisects.
"""
def __init__(self, bisect_state=None, depot_registry=None, opts=None,
runtime_warnings=None, error=None):
"""Computes final bisect results after a bisect run is complete.
This constructor should be called in one of the following ways:
BisectResults(state, depot_registry, opts, runtime_warnings)
BisectResults(error=error)
First option creates an object representing successful bisect results, while
second option creates an error result.
Args:
bisect_state: BisectState object representing latest bisect state.
depot_registry: DepotDirectoryRegistry object with information on each
repository in the bisect_state.
opts: Options passed to the bisect run.
runtime_warnings: A list of warnings from the bisect run.
error: Error message. When error is not None, other arguments are ignored.
"""
self.error = error
if error is not None:
return
assert (bisect_state is not None and depot_registry is not None and
opts is not None and runtime_warnings is not None), (
'Incorrect use of the BisectResults constructor. When error is '
'None, all other arguments are required')
self.state = bisect_state
rev_states = bisect_state.GetRevisionStates()
first_working_rev, last_broken_rev = self.FindBreakingRevRange(rev_states)
self.first_working_revision = first_working_rev
self.last_broken_revision = last_broken_rev
if first_working_rev is not None and last_broken_rev is not None:
statistics = self._ComputeRegressionStatistics(
rev_states, first_working_rev, last_broken_rev)
self.regression_size = statistics['regression_size']
self.regression_std_err = statistics['regression_std_err']
self.confidence = statistics['confidence']
self.culprit_revisions = self._FindCulpritRevisions(
rev_states, depot_registry, first_working_rev, last_broken_rev)
self.other_regressions = self._FindOtherRegressions(
rev_states, statistics['bad_greater_than_good'])
self.warnings = runtime_warnings + self._GetResultBasedWarnings(
self.culprit_revisions, opts, self.confidence)
@staticmethod
def _GetResultBasedWarnings(culprit_revisions, opts, confidence):
warnings = []
if len(culprit_revisions) > 1:
warnings.append('Due to build errors, regression range could '
'not be narrowed down to a single commit.')
if opts.repeat_test_count == 1:
warnings.append('Tests were only set to run once. This may '
'be insufficient to get meaningful results.')
if 0 < confidence < bisect_utils.HIGH_CONFIDENCE:
warnings.append('Confidence is not high. Try bisecting again '
'with increased repeat_count, larger range, or '
'on another metric.')
if not confidence:
warnings.append('Confidence score is 0%. Try bisecting again on '
'another platform or another metric.')
return warnings
@staticmethod
def ConfidenceScore(good_results_lists, bad_results_lists,
accept_single_bad_or_good=False):
"""Calculates a confidence score. """Calculates a confidence score.
This score is a percentage which represents our degree of confidence in the This score is a percentage which represents our degree of confidence in the
...@@ -22,13 +121,19 @@ def ConfidenceScore(good_results_lists, bad_results_lists): ...@@ -22,13 +121,19 @@ def ConfidenceScore(good_results_lists, bad_results_lists):
Args: Args:
good_results_lists: A list of lists of "good" result numbers. good_results_lists: A list of lists of "good" result numbers.
bad_results_lists: A list of lists of "bad" result numbers. bad_results_lists: A list of lists of "bad" result numbers.
accept_single_bad_or_good: If True, computes confidence even if there is
just one bad or good revision, otherwise single good or bad revision
always returns 0.0 confidence. This flag will probably get away when
we will implement expanding the bisect range by one more revision for
such case.
Returns: Returns:
A number in the range [0, 100]. A number in the range [0, 100].
""" """
# If there's only one item in either list, this means only one revision was # If there's only one item in either list, this means only one revision was
# classified good or bad; this isn't good enough evidence to make a decision. # classified good or bad; this isn't good enough evidence to make a
# If an empty list was passed, that also implies zero confidence. # decision. If an empty list was passed, that also implies zero confidence.
if not accept_single_bad_or_good:
if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1: if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1:
return 0.0 return 0.0
...@@ -46,20 +151,12 @@ def ConfidenceScore(good_results_lists, bad_results_lists): ...@@ -46,20 +151,12 @@ def ConfidenceScore(good_results_lists, bad_results_lists):
_, _, p_value = ttest.WelchsTTest(sample1, sample2) _, _, p_value = ttest.WelchsTTest(sample1, sample2)
return 100.0 * (1.0 - p_value) return 100.0 * (1.0 - p_value)
@classmethod
class BisectResults(object): def _FindOtherRegressions(cls, revision_states, bad_greater_than_good):
def __init__(self, depot_registry):
self._depot_registry = depot_registry
self.revision_data = {}
self.error = None
@staticmethod
def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good):
"""Compiles a list of other possible regressions from the revision data. """Compiles a list of other possible regressions from the revision data.
Args: Args:
revision_data_sorted: Sorted list of (revision, revision data) pairs. revision_states: Sorted list of RevisionState objects.
bad_greater_than_good: Whether the result value at the "bad" revision is bad_greater_than_good: Whether the result value at the "bad" revision is
numerically greater than the result value at the "good" revision. numerically greater than the result value at the "good" revision.
...@@ -69,13 +166,13 @@ class BisectResults(object): ...@@ -69,13 +166,13 @@ class BisectResults(object):
""" """
other_regressions = [] other_regressions = []
previous_values = [] previous_values = []
previous_id = None prev_state = None
for current_id, current_data in revision_data_sorted: for revision_state in revision_states:
current_values = current_data['value'] if revision_state.value:
if current_values: current_values = revision_state.value['values']
current_values = current_values['values']
if previous_values: if previous_values:
confidence = ConfidenceScore(previous_values, [current_values]) confidence = cls.ConfidenceScore(previous_values, [current_values],
accept_single_bad_or_good=True)
mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
mean_of_current_runs = math_utils.Mean(current_values) mean_of_current_runs = math_utils.Mean(current_values)
...@@ -83,96 +180,58 @@ class BisectResults(object): ...@@ -83,96 +180,58 @@ class BisectResults(object):
# the overall regression. If the mean of the previous runs < the # the overall regression. If the mean of the previous runs < the
# mean of the current runs, this local regression is in same # mean of the current runs, this local regression is in same
# direction. # direction.
prev_less_than_current = mean_of_prev_runs < mean_of_current_runs prev_greater_than_current = mean_of_prev_runs > mean_of_current_runs
is_same_direction = (prev_less_than_current if is_same_direction = (prev_greater_than_current if
bad_greater_than_good else not prev_less_than_current) bad_greater_than_good else not prev_greater_than_current)
# Only report potential regressions with high confidence. # Only report potential regressions with high confidence.
if is_same_direction and confidence > 50: if is_same_direction and confidence > 50:
other_regressions.append([current_id, previous_id, confidence]) other_regressions.append([revision_state, prev_state, confidence])
previous_values.append(current_values) previous_values.append(current_values)
previous_id = current_id prev_state = revision_state
return other_regressions return other_regressions
def GetResultsDict(self): @staticmethod
"""Prepares and returns information about the final resulsts as a dict. def FindBreakingRevRange(revision_states):
first_working_revision = None
last_broken_revision = None
Returns: for revision_state in revision_states:
A dictionary with the following fields if revision_state.passed == 1 and not first_working_revision:
first_working_revision = revision_state
'first_working_revision': First good revision. if not revision_state.passed:
'last_broken_revision': Last bad revision. last_broken_revision = revision_state
'culprit_revisions': A list of revisions, which contain the bad change
introducing the failure.
'other_regressions': A list of tuples representing other regressions,
which may have occured.
'regression_size': For performance bisects, this is a relative change of
the mean metric value. For other bisects this field always contains
'zero-to-nonzero'.
'regression_std_err': For performance bisects, it is a pooled standard
error for groups of good and bad runs. Not used for other bisects.
'confidence': For performance bisects, it is a confidence that the good
and bad runs are distinct groups. Not used for non-performance
bisects.
'revision_data_sorted': dict mapping revision ids to data about that
revision. Each piece of revision data consists of a dict with the
following keys:
'passed': Represents whether the performance test was successful at
that revision. Possible values include: 1 (passed), 0 (failed),
'?' (skipped), 'F' (build failed).
'depot': The depot that this revision is from (i.e. WebKit)
'external': If the revision is a 'src' revision, 'external' contains
the revisions of each of the external libraries.
'sort': A sort value for sorting the dict in order of commits.
For example:
{
'CL #1':
{
'passed': False,
'depot': 'chromium',
'external': None,
'sort': 0
}
}
"""
revision_data_sorted = sorted(self.revision_data.iteritems(),
key = lambda x: x[1]['sort'])
# Find range where it possibly broke. return first_working_revision, last_broken_revision
first_working_revision = None
first_working_revision_index = -1 @staticmethod
last_broken_revision = None def _FindCulpritRevisions(revision_states, depot_registry, first_working_rev,
last_broken_revision_index = -1 last_broken_rev):
cwd = os.getcwd()
culprit_revisions = [] culprit_revisions = []
other_regressions = [] for i in xrange(last_broken_rev.index, first_working_rev.index):
regression_size = 0.0 depot_registry.ChangeToDepotDir(revision_states[i].depot)
regression_std_err = 0.0 info = source_control.QueryRevisionInfo(revision_states[i].revision)
confidence = 0.0 culprit_revisions.append((revision_states[i].revision, info,
revision_states[i].depot))
for i in xrange(len(revision_data_sorted)):
k, v = revision_data_sorted[i] os.chdir(cwd)
if v['passed'] == 1: return culprit_revisions
if not first_working_revision:
first_working_revision = k @classmethod
first_working_revision_index = i def _ComputeRegressionStatistics(cls, rev_states, first_working_rev,
last_broken_rev):
if not v['passed']: # TODO(sergiyb): We assume that value has "values" key, which may not be
last_broken_revision = k # the case for failure-bisects, where there is a single value only.
last_broken_revision_index = i broken_means = [state.value['values']
for state in rev_states[:last_broken_rev.index+1]
if last_broken_revision != None and first_working_revision != None: if state.value]
broken_means = []
for i in xrange(0, last_broken_revision_index + 1): working_means = [state.value['values']
if revision_data_sorted[i][1]['value']: for state in rev_states[first_working_rev.index:]
broken_means.append(revision_data_sorted[i][1]['value']['values']) if state.value]
working_means = []
for i in xrange(first_working_revision_index, len(revision_data_sorted)):
if revision_data_sorted[i][1]['value']:
working_means.append(revision_data_sorted[i][1]['value']['values'])
# Flatten the lists to calculate mean of all values. # Flatten the lists to calculate mean of all values.
working_mean = sum(working_means, []) working_mean = sum(working_means, [])
...@@ -194,67 +253,11 @@ class BisectResults(object): ...@@ -194,67 +253,11 @@ class BisectResults(object):
# Give a "confidence" in the bisect. At the moment we use how distinct the # Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the # values are before and after the last broken revision, and how noisy the
# overall graph is. # overall graph is.
confidence = ConfidenceScore(working_means, broken_means) confidence = cls.ConfidenceScore(working_means, broken_means)
culprit_revisions = []
cwd = os.getcwd()
self._depot_registry.ChangeToDepotDir(
self.revision_data[last_broken_revision]['depot'])
if self.revision_data[last_broken_revision]['depot'] == 'cros':
# Want to get a list of all the commits and what depots they belong
# to so that we can grab info about each.
cmd = ['repo', 'forall', '-c',
'pwd ; git log --pretty=oneline --before=%d --after=%d' % (
last_broken_revision, first_working_revision + 1)]
output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd)
changes = []
assert not return_code, ('An error occurred while running '
'"%s"' % ' '.join(cmd))
last_depot = None
cwd = os.getcwd()
for l in output.split('\n'):
if l:
# Output will be in form:
# /path_to_depot
# /path_to_other_depot
# <SHA1>
# /path_again
# <SHA1>
# etc.
if l[0] == '/':
last_depot = l
else:
contents = l.split(' ')
if len(contents) > 1:
changes.append([last_depot, contents[0]])
for c in changes:
os.chdir(c[0])
info = source_control.QueryRevisionInfo(c[1])
culprit_revisions.append((c[1], info, None))
else:
for i in xrange(last_broken_revision_index, len(revision_data_sorted)):
k, v = revision_data_sorted[i]
if k == first_working_revision:
break
self._depot_registry.ChangeToDepotDir(v['depot'])
info = source_control.QueryRevisionInfo(k)
culprit_revisions.append((k, info, v['depot']))
os.chdir(cwd)
# Check for any other possible regression ranges. bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs
other_regressions = self._FindOtherRegressions(
revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)
return { return {'regression_size': regression_size,
'first_working_revision': first_working_revision,
'last_broken_revision': last_broken_revision,
'culprit_revisions': culprit_revisions,
'other_regressions': other_regressions,
'regression_size': regression_size,
'regression_std_err': regression_std_err, 'regression_std_err': regression_std_err,
'confidence': confidence, 'confidence': confidence,
'revision_data_sorted': revision_data_sorted 'bad_greater_than_good': bad_greater_than_good}
}
...@@ -2,38 +2,256 @@ ...@@ -2,38 +2,256 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
import os
import unittest import unittest
import bisect_results from bisect_results import BisectResults
import ttest import source_control
class ConfidenceScoreTest(unittest.TestCase): class MockDepotRegistry(object):
def ChangeToDepotDir(self, depot):
pass
class MockRevisionState(object):
def __init__(self, revision, index, depot='chromium', value=None,
perf_time=0, build_time=0, passed='?', external=None):
self.depot = depot
self.revision = revision
self.index = index
self.value = value
self.perf_time = perf_time
self.build_time = build_time
self.passed = passed
self.external = external
class MockBisectState(object):
def __init__(self):
self.mock_revision_states = []
mock_bad_val = {'values': [100, 105, 95]}
for i, rev in enumerate(['a', 'b']):
mock_rev_state = MockRevisionState(rev, i, value=mock_bad_val, passed=0)
self.mock_revision_states.append(mock_rev_state)
mock_good_val = {'values': [1, 2, 3]}
for i, rev in enumerate(['c', 'd', 'e'], start=2):
mock_rev_state = MockRevisionState(rev, i, value=mock_good_val, passed=1)
self.mock_revision_states.append(mock_rev_state)
def GetRevisionStates(self):
return self.mock_revision_states
class MockBisectOptions(object):
def __init__(self):
self.repeat_test_count = 3
class BisectResultsTest(unittest.TestCase):
def setUp(self):
self.mock_bisect_state = MockBisectState()
self.mock_depot_registry = MockDepotRegistry()
self.mock_opts = MockBisectOptions()
self.mock_warnings = []
self.original_getcwd = os.getcwd
self.original_chdir = os.chdir
self.original_query_revision_info = source_control.QueryRevisionInfo
os.getcwd = lambda: '/path'
os.chdir = lambda _: None
revision_infos = {'b': {'test': 'b'}, 'c': {'test': 'c'}}
source_control.QueryRevisionInfo = lambda rev: revision_infos[rev]
def tearDown(self):
os.getcwd = self.original_getcwd
os.chdir = self.original_chdir
source_control.QueryRevisionInfo = self.original_query_revision_info
def _AssertConfidence(self, score, bad_values, good_values):
"""Checks whether the given sets of values have a given confidence score.
The score represents our confidence that the two sets of values wouldn't
be as different as they are just by chance; that is, that some real change
occurred between the two sets of values.
Args:
score: Expected confidence score.
bad_values: First list of numbers.
good_values: Second list of numbers.
"""
# ConfidenceScore takes a list of lists but these lists are flattened
# inside the function.
confidence = BisectResults.ConfidenceScore(
[[v] for v in bad_values],
[[v] for v in good_values])
self.assertEqual(score, confidence)
def testConfidenceScoreIsZeroOnTooFewLists(self): def testConfidenceScoreIsZeroOnTooFewLists(self):
self.assertEqual(bisect_results.ConfidenceScore([], [[1], [2]]), 0.0) self._AssertConfidence(0.0, [], [[1], [2]])
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], []), 0.0) self._AssertConfidence(0.0, [[1], [2]], [])
self.assertEqual(bisect_results.ConfidenceScore([[1]], [[1], [2]]), 0.0) self._AssertConfidence(0.0, [[1]], [[1], [2]])
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], [[1]]), 0.0) self._AssertConfidence(0.0, [[1], [2]], [[1]])
def testConfidenceScoreIsZeroOnEmptyLists(self): def testConfidenceScoreIsZeroOnEmptyLists(self):
self.assertEqual(bisect_results.ConfidenceScore([[], []], [[1], [2]]), 0.0) self.assertEqual(BisectResults.ConfidenceScore([[], []], [[1], [2]]), 0.0)
self.assertEqual(bisect_results.ConfidenceScore([[1], [2]], [[], []]), 0.0) self.assertEqual(BisectResults.ConfidenceScore([[1], [2]], [[], []]), 0.0)
def testConfidenceScoreIsUsingTTestWelchsTTest(self): def testConfidenceScore_ZeroConfidence(self):
original_WelchsTTest = ttest.WelchsTTest # The good and bad sets contain the same values, so the confidence that
try: # they're different should be zero.
ttest.WelchsTTest = lambda _sample1, _sample2: (0, 0, 0.42) self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
self.assertAlmostEqual(
bisect_results.ConfidenceScore([[1], [1]], [[2], [2]]), 58.0) def testConfidenceScore_MediumConfidence(self):
finally: self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
ttest.WelchsTTest = original_WelchsTTest
def testConfidenceScore_HighConfidence(self):
self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
class BisectResulstsTest(unittest.TestCase):
# TODO(sergiyb): Write tests for GetResultDicts when it is broken into smaller def testConfidenceScore_VeryHighConfidence(self):
# pieces. # Confidence is high if the two sets of values have no internal variance.
pass self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
def testConfidenceScore_UnbalancedSampleSize(self):
# The second set of numbers only contains one number, so confidence is 0.
self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
def testConfidenceScore_EmptySample(self):
# Confidence is zero if either or both samples are empty.
self._AssertConfidence(0.0, [], [])
self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
def testConfidenceScore_FunctionalTestResults(self):
self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
def testConfidenceScore_RealWorldCases(self):
"""This method contains a set of data from actual bisect results.
The confidence scores asserted below were all copied from the actual
results, so the purpose of this test method is mainly to show what the
results for real cases are, and compare when we change the confidence
score function in the future.
"""
self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
self._AssertConfidence(80, [67, 68], [65, 65, 67])
self._AssertConfidence(0, [514], [514])
self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
self._AssertConfidence(0, [1999004, 1999627], [223355])
self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
def testCorrectlyFindsBreakingRange(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[1].passed = 0
revision_states[2].passed = 1
revision_states[3].passed = 1
revision_states[4].passed = 1
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(revision_states[2], results.first_working_revision)
self.assertEqual(revision_states[1], results.last_broken_revision)
def testCorrectlyComputesRegressionStatistics(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[0].value = {'values': [1000, 999, 998]}
revision_states[1].passed = 0
revision_states[1].value = {'values': [980, 1000, 999]}
revision_states[2].passed = 1
revision_states[2].value = {'values': [50, 45, 55]}
revision_states[3].passed = 1
revision_states[3].value = {'values': [45, 56, 45]}
revision_states[4].passed = 1
revision_states[4].value = {'values': [51, 41, 58]}
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertAlmostEqual(99.9, results.confidence)
self.assertAlmostEqual(1909.86547085, results.regression_size)
self.assertAlmostEqual(7.16625904, results.regression_std_err)
def testFindsCulpritRevisions(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[1].depot = 'chromium'
revision_states[2].depot = 'webkit'
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(1, len(results.culprit_revisions))
self.assertEqual(('b', {'test': 'b'}, 'chromium'),
results.culprit_revisions[0])
def testFindsOtherRegressions(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[0].passed = 0
revision_states[0].value = {'values': [100, 100, 100]}
revision_states[1].passed = 0
revision_states[1].value = {'values': [100, 100, 100]}
revision_states[2].passed = 1
revision_states[2].value = {'values': [10, 10, 10]}
revision_states[3].passed = 1
revision_states[3].value = {'values': [100, 100, 100]}
revision_states[4].passed = 1
revision_states[4].value = {'values': [60, 60, 60]}
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
expected_regressions = [[revision_states[2], revision_states[1], 99.9],
[revision_states[4], revision_states[3], 80.0]]
self.assertEqual(expected_regressions, results.other_regressions)
def testNoResultBasedWarningsForNormalState(self):
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(0, len(results.warnings))
def testWarningForMultipleCulpritRevisions(self):
self.mock_bisect_state.mock_revision_states[2].passed = 'Skipped'
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(1, len(results.warnings))
def testWarningForTooLowRetryLimit(self):
self.mock_opts.repeat_test_count = 1
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(1, len(results.warnings))
def testWarningForTooLowConfidence(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[2].value = {'values': [95, 100, 90]}
revision_states[3].value = {'values': [95, 100, 90]}
revision_states[4].value = {'values': [95, 100, 90]}
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertGreater(results.confidence, 0)
self.assertEqual(1, len(results.warnings))
def testWarningForZeroConfidence(self):
revision_states = self.mock_bisect_state.mock_revision_states
revision_states[2].value = {'values': [100, 105, 95]}
revision_states[3].value = {'values': [100, 105, 95]}
revision_states[4].value = {'values': [100, 105, 95]}
results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
self.mock_opts, self.mock_warnings)
self.assertEqual(0, results.confidence)
self.assertEqual(1, len(results.warnings))
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
class RevisionState(object):
"""Contains bisect state for a given revision.
Properties:
depot: The depot that this revision is from (e.g. WebKit).
revision: Revision number (Git hash or SVN number).
index: Position of the state in the list of all revisions.
value: Value(s) returned from the test.
perf_time: Time that a test took.
build_time: Time that a build took.
passed: Represents whether the performance test was successful at that
revision. Possible values include: 1 (passed), 0 (failed),
'?' (skipped), 'F' (build failed).
external: If the revision is a 'src' revision, 'external' contains the
revisions of each of the external libraries.
"""
def __init__(self, depot, revision, index):
self.depot = depot
self.revision = revision
self.index = index
self.value = None
self.perf_time = 0
self.build_time = 0
self.passed = '?'
self.external = None
# TODO(sergiyb): Update() to parse run_results from the RunTest.
class BisectState(object):
"""Represents a state of the bisect as a collection of revision states."""
def __init__(self, depot, revisions):
"""Initializes a new BisectState object with a set of revision states.
Args:
depot: Name of the depot used for initial set of revision states.
revisions: List of revisions used for initial set of revision states.
"""
self.revision_states = []
self.revision_index = {}
index = 0
for revision in revisions:
new_state = self._InitRevisionState(depot, revision, index)
self.revision_states.append(new_state)
index += 1
@staticmethod
def _RevisionKey(depot, revision):
return "%s:%s" % (depot, revision)
def _InitRevisionState(self, depot, revision, index):
key = self._RevisionKey(depot, revision)
self.revision_index[key] = index
return RevisionState(depot, revision, index)
def GetRevisionState(self, depot, revision):
"""Returns a mutable revision state."""
key = self._RevisionKey(depot, revision)
index = self.revision_index.get(key)
return self.revision_states[index] if index else None
def CreateRevisionStatesAfter(self, depot, revisions, reference_depot,
reference_revision):
"""Creates a set of new revision states after a specified reference state.
Args:
depot: Name of the depot for the new revision states.
revisions: List of revisions for the new revision states.
reference_depot: Name of the depot for the reference revision state.
reference_revision: Revision for the reference revision state.
Returns:
A list containing all created revision states in order as they were added.
"""
ref_key = self._RevisionKey(reference_depot, reference_revision)
ref_index = self.revision_index[ref_key]
num_new_revisions = len(revisions)
for entry in self.revision_states:
if entry.index > ref_index:
entry.index += num_new_revisions
first_index = ref_index + 1
for index, revision in enumerate(revisions, start=first_index):
new_state = self._InitRevisionState(depot, revision, index)
self.revision_states.insert(index, new_state)
return self.revision_states[first_index:first_index + num_new_revisions]
def GetRevisionStates(self):
"""Returns a copy of the list of the revision states."""
return list(self.revision_states)
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
from bisect_state import BisectState
class BisectStateTest(unittest.TestCase):
def testCreatesRevisionsStateAfterAReferenceRevision(self):
bisect_state = BisectState('chromium', ['a', 'b', 'c', 'd'])
bisect_state.CreateRevisionStatesAfter('webkit', [1, 2, 3], 'chromium', 'b')
bisect_state.CreateRevisionStatesAfter('v8', [100, 200], 'webkit', 2)
actual_revisions = bisect_state.GetRevisionStates()
expected_revisions = [('chromium', 'a'), ('chromium', 'b'), ('webkit', 1),
('webkit', 2), ('v8', 100), ('v8', 200),
('webkit', 3), ('chromium', 'c'), ('chromium', 'd')]
self.assertEqual(len(expected_revisions), len(actual_revisions))
for i in xrange(len(actual_revisions)):
self.assertEqual(i, actual_revisions[i].index)
self.assertEqual(expected_revisions[i][0], actual_revisions[i].depot)
self.assertEqual(expected_revisions[i][1], actual_revisions[i].revision)
# TODO(sergiyb): More tests for the remaining functions.
if __name__ == '__main__':
unittest.main()
...@@ -87,6 +87,116 @@ REPO_PARAMS = [ ...@@ -87,6 +87,116 @@ REPO_PARAMS = [
# Bisect working directory. # Bisect working directory.
BISECT_DIR = 'bisect' BISECT_DIR = 'bisect'
# The percentage at which confidence is considered high.
HIGH_CONFIDENCE = 95
# Below is the map of "depot" names to information about each depot. Each depot
# is a repository, and in the process of bisecting, revision ranges in these
# repositories may also be bisected.
#
# Each depot information dictionary may contain:
# src: Path to the working directory.
# recurse: True if this repository will get bisected.
# depends: A list of other repositories that are actually part of the same
# repository in svn. If the repository has any dependent repositories
# (e.g. skia/src needs skia/include and skia/gyp to be updated), then
# they are specified here.
# svn: URL of SVN repository. Needed for git workflow to resolve hashes to
# SVN revisions.
# from: Parent depot that must be bisected before this is bisected.
# deps_var: Key name in vars variable in DEPS file that has revision
# information.
DEPOT_DEPS_NAME = {
'chromium': {
'src': 'src',
'recurse': True,
'depends': None,
'from': ['cros', 'android-chrome'],
'viewvc':
'http://src.chromium.org/viewvc/chrome?view=revision&revision=',
'deps_var': 'chromium_rev'
},
'webkit': {
'src': 'src/third_party/WebKit',
'recurse': True,
'depends': None,
'from': ['chromium'],
'viewvc':
'http://src.chromium.org/viewvc/blink?view=revision&revision=',
'deps_var': 'webkit_revision'
},
'angle': {
'src': 'src/third_party/angle',
'src_old': 'src/third_party/angle_dx11',
'recurse': True,
'depends': None,
'from': ['chromium'],
'platform': 'nt',
'deps_var': 'angle_revision'
},
'v8': {
'src': 'src/v8',
'recurse': True,
'depends': None,
'from': ['chromium'],
'custom_deps': GCLIENT_CUSTOM_DEPS_V8,
'viewvc': 'https://code.google.com/p/v8/source/detail?r=',
'deps_var': 'v8_revision'
},
'v8_bleeding_edge': {
'src': 'src/v8_bleeding_edge',
'recurse': True,
'depends': None,
'svn': 'https://v8.googlecode.com/svn/branches/bleeding_edge',
'from': ['v8'],
'viewvc': 'https://code.google.com/p/v8/source/detail?r=',
'deps_var': 'v8_revision'
},
'skia/src': {
'src': 'src/third_party/skia/src',
'recurse': True,
'svn': 'http://skia.googlecode.com/svn/trunk/src',
'depends': ['skia/include', 'skia/gyp'],
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'skia_revision'
},
'skia/include': {
'src': 'src/third_party/skia/include',
'recurse': False,
'svn': 'http://skia.googlecode.com/svn/trunk/include',
'depends': None,
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'None'
},
'skia/gyp': {
'src': 'src/third_party/skia/gyp',
'recurse': False,
'svn': 'http://skia.googlecode.com/svn/trunk/gyp',
'depends': None,
'from': ['chromium'],
'viewvc': 'https://code.google.com/p/skia/source/detail?r=',
'deps_var': 'None'
}
}
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
# The possible values of the --bisect_mode flag, which determines what to
# use when classifying a revision as "good" or "bad".
BISECT_MODE_MEAN = 'mean'
BISECT_MODE_STD_DEV = 'std_dev'
BISECT_MODE_RETURN_CODE = 'return_code'
def AddAdditionalDepotInfo(depot_info):
"""Adds additional depot info to the global depot variables."""
global DEPOT_DEPS_NAME
global DEPOT_NAMES
DEPOT_DEPS_NAME = dict(DEPOT_DEPS_NAME.items() + depot_info.items())
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
def OutputAnnotationStepStart(name): def OutputAnnotationStepStart(name):
"""Outputs annotation to signal the start of a step to a try bot. """Outputs annotation to signal the start of a step to a try bot.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment