Commit 580e4037 authored by Stephen Martinis's avatar Stephen Martinis Committed by Commit Bot

//tools/infra: Add a simple version of find_bad_builds.py

This CL adds a simple version of the find_bad_builds.py script. It finds
builds which may contain a given CL which was landed and then reverted.

Future CLs add more features to this script. See https://crrev.com/c/2464165
for a fuller description.

Bug: 1137072
Change-Id: I3b4469b74d2be69af7655c20082900bbe0b5377f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2469226
Commit-Queue: Stephen Martinis <martiniss@chromium.org>
Reviewed-by: default avatarBen Pastene <bpastene@chromium.org>
Cr-Commit-Position: refs/heads/master@{#818706}
parent ebc45775
...@@ -2,6 +2,16 @@ Directory of scripts needed for troopering. ...@@ -2,6 +2,16 @@ Directory of scripts needed for troopering.
#### Mass cancelling builds and tasks #### Mass cancelling builds and tasks
If you're cancelling builds because of a bad chromium/src revision, use the
`find_bad_builds.py` script. Example:
```
# Assuming that deadbeef is the git revision of the revert of 12345678, which
# landed and broke something.
./find_bad_builds.py deadbeef 12345678 | bb cancel -reason "CQ outage, see
crbug.com/XXXXXX"
```
To cancel many builds at once use the following command: To cancel many builds at once use the following command:
``` ```
......
#!/usr/bin/env vpython
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Find builds which contain a bad CL.
A 'bad CL' is (usually) a CL which was later reverted. The simplest usage of
this script is simply to call it with a revert CL. The script will find the
associated CL in your git checkout, and then search for buildbucket try builds
which executed with the original CL, but not the reverted CL.
This script also has filter parameters for various attributes of builds, like
build duration or project/bucket/builder.
This script uses your chromium/src checkout, so you must keep it updated if you
want this to be able to cancel recent builds.
"""
from __future__ import print_function
import argparse
import datetime
import functools
import json
import logging
import multiprocessing
import subprocess
import sys
import git_utils
def _find_builds(predicate):
"""Finds buildbucket builds which satisfy the given predicate."""
logging.debug('Query buildbucket with predicate: %s',
json.dumps(predicate, indent=2))
pred_json = json.dumps(predicate)
bb_args = ['bb', 'ls', '-json', '-predicate', pred_json]
return subprocess.check_output(bb_args).strip().splitlines()
def _parse_args(raw_args):
"""Parses command line arguments."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'good_revision',
help=
"A known good revision. Builds which start at revisions after this will"
" not be canceled. If this revision is the revert of an earlier revision,"
" that revision will be set to bad_revision. If this isn't a revert,"
" bad_revision is required.")
parser.add_argument(
'bad_revision',
help=
"A known bad revision. This is usually automatically calculated from the"
" good revision (assuming it's a revert).")
# FIXME: This is imperfect in some scenarios. For example, if we want to
# cancel all linux builds, we'd have to manually specify ~15 different
# builders (at least). We should potentially allow for filtering based on
# the swarming task dimensions.
parser.add_argument(
'--builder',
'-b',
action='append',
help='Which builder should we find builds for. If not set, finds all'
' builds in the given project/bucket. May be used multiple times. If'
' multiple builders are specified, this script has to fetch all builds'
' in the bucket, which is a bit slow. Specifying one builder is fast,'
' however.')
parser.add_argument('--project',
default='chromium',
help='The buildbucket project to search for builds in.')
parser.add_argument('--bucket',
default='try',
help='The buildbucket bucket to search for builds in')
parser.add_argument(
'--verbose',
'-v',
action='count',
default=0,
help=
'Use for more logging. Can use multiple times to increase logging level.')
return parser.parse_args(raw_args)
# FIXME: Add support for time based cancellations. This could be used for
# issues which don't show up via chromium/src commits.
def main(raw_args, print_fn):
"""Runs the script.
Args:
raw_args: The raw command line arguments.
print_fn: Function to print a line to the screen. Overridden for tests.
Returns:
The exit code of the program.
"""
args = _parse_args(raw_args)
# With 0 verbose, uses ERROR level. Min level is DEBUG. See logging module for
# the constants.
level = max(40 - args.verbose * 10, 10)
logging.basicConfig(level=level)
good_commit = args.good_revision
bad_commit = args.bad_revision
# FIXME: Handle only bad revision? Not sure if a reasonable scenario where
# we'd want to do that exists.
revert_date = git_utils.get_commit_date(good_commit)
orig_date = git_utils.get_commit_date(bad_commit)
# Add 20 minutes to account for git replication delay. Sometimes gerrit
# doesn't realize a commit has landed for a few minutes, so builds which start
# after the 'good' commit landed might still not contain it. We filter by
# commit below, but want to make sure we have a buffer of builds when we
# search in buildbucket.
revert_date += datetime.timedelta(minutes=20)
logging.debug('Good commit: %s\t%s', good_commit, revert_date)
logging.debug('Bad Commit: %s\t%s', bad_commit, orig_date)
predicate = {
'builder': {
'bucket': args.bucket,
'project': args.project,
},
'createTime': {
# We already assumed UTC, so add it in the format buildbucket expects.
'startTime': orig_date.strftime('%Y-%m-%dT%H:%M:%S') + '+00:00',
'endTime': revert_date.strftime('%Y-%m-%dT%H:%M:%S') + '+00:00',
},
'status': 'STARTED',
}
# If we have one builder, buildbucket can filter when we do the RPC. If we
# have more than one, we have to fetch builds for all builders, then filter
# after. Theoretically we could run multiple `bb` invocations and merge them
# together, but that doesn't seem super necessary.
if args.builder and len(args.builder) == 1:
predicate['builder']['builder'] = args.builder[0]
resp = _find_builds(predicate)
build_jsons = [json.loads(x) for x in resp]
# TODO: Filter builds found by buildbucket.
ids = [build['id'] for build in build_jsons]
for bid in ids:
print_fn(bid)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:], print))
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Git utilities."""
import datetime
import json
import logging
import subprocess
def _run_git(*cmd):
"""Runs a git command and returns the output."""
cmd = ['git'] + list(cmd)
return subprocess.check_output(cmd)
def _get_commit_message(rev):
"""Gets the commit message for a revision."""
return _run_git('log', '--format=%B', '-n', '1', rev)
def get_commit_date(rev):
"""Gets the date a commit was committed."""
raw_date = _run_git('show', '--no-patch', '--no-notes', '--pretty=%cd',
rev).strip()
# The last space separate section is timezone. '%z' doesn't let us parse this
# because python datetime (in 2.7, at least) doesn't support parsing timezones
# by default.
split = raw_date.split(' ')
raw_date, tz = ' '.join(split[:-1]), split[-1]
# `git log` seems to always give us dates in UTC. Parsing the UTC timezone
# itself is hard, so just enforce that we always get UTC for now.
assert tz == '+0000', 'Expected git timezone %s, got %s.' % ('+0000', tz)
return datetime.datetime.strptime(raw_date.strip(), '%a %b %d %H:%M:%S %Y')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment