Add culprit CL trim script

Adds a script that can be used to trim down the list of culprit CLs in a blamelist by determining which ran a particular CQ bot and passed before submission. Bug: 1120104 Change-Id: I469a74535a64cc9d03cbbfee64da65b87ed7723b Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2372729 Commit-Queue: Brian Sheedy <bsheedy@chromium.org> Commit-Queue: Zhenyao Mo <zmo@chromium.org> Auto-Submit: Brian Sheedy <bsheedy@chromium.org> Reviewed-by: Zhenyao Mo <zmo@chromium.org> Cr-Commit-Position: refs/heads/master@{#801189}

Add culprit CL trim script
Adds a script that can be used to trim down the list of culprit CLs in a blamelist by determining which ran a particular CQ bot and passed before submission. Bug: 1120104 Change-Id: I469a74535a64cc9d03cbbfee64da65b87ed7723b Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2372729 Commit-Queue: Brian Sheedy <bsheedy@chromium.org> Commit-Queue: Zhenyao Mo <zmo@chromium.org> Auto-Submit: Brian Sheedy <bsheedy@chromium.org> Reviewed-by: Zhenyao Mo <zmo@chromium.org> Cr-Commit-Position: refs/heads/master@{#801189}
9b07681c · Brian Sheedy · Commit Bot · 53a58ad0 · 9b07681c
Commit 9b07681c authored Aug 24, 2020 by Brian Sheedy Committed by Commit Bot Aug 24, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 230 additions and 0 deletions

content/test/gpu/trim_culprit_cls.py content/test/gpu/trim_culprit_cls.py +230 -0

No files found.
--- a/content/test/gpu/trim_culprit_cls.py
+++ b/content/test/gpu/trim_culprit_cls.py
+#!/usr/bin/env vpython
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""
+Script for determining which CLs in a blamelist ran on a certain trybot.
+
+There are cases where CLs can be absolved of a CI failure if they ran on a
+similar trybot before being submitted. This CL will go through each CL in a
+given blamelist and determine whether they ran on a specified trybot or not.
+"""
+
+import argparse
+import json
+import re
+import subprocess
+
+# Schemas:
+# - go/buildbucket-bq and go/buildbucket-proto/build.proto
+# - go/luci/cq/bq and
+#   https://source.chromium.org/chromium/infra/infra/+/master:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
+#
+# Original author: maruel@
+QUERY_TEMPLATE = """\
+WITH cq_builds AS (
+  SELECT
+    build.id,
+    build.critical,
+    start_time,
+    TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
+    cl.change,
+    cl.patchset
+  FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
+  WHERE
+    cl.host = 'chromium-review.googlesource.com'
+    AND cl.project = 'chromium/src'
+    AND cl.change = {cl_number}
+),
+
+builds AS (
+  SELECT
+    patchset,
+    bb.builder.project||'/'||bb.builder.bucket||'/'||bb.builder.builder AS builder,
+    'ci.chromium.org/b/'||bb.id AS url,
+    cq.critical,
+    bb.status,
+    cq.start_time,
+    duration
+  FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
+  WHERE
+    # Performance optimization.
+    bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
+)
+
+SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
+"""
+
+GERRIT_URL_REGEX = re.compile(r'^\s*Reviewed-on: (?P<gerrit_url>.*)$',
+                              re.MULTILINE)
+
+
+class ChangeList(object):
+  """Class for storing relevant information for a CL."""
+
+  def __init__(self):
+    self.revision = None
+    self.gerrit_url = None
+    self._cl_number = None
+    self.largest_patchset = None
+    self.ran_trybot = None
+
+  @property
+  def cl_number(self):
+    assert self.gerrit_url
+    if not self._cl_number:
+      self._cl_number = self.gerrit_url.split('/')[-1]
+    return self._cl_number
+
+  def __str__(self):
+    assert self.revision is not None
+    assert self.gerrit_url is not None
+    assert self.largest_patchset is not None
+    assert self.ran_trybot is not None
+    s = '%s (%s)' % (self.revision, self.gerrit_url)
+    if not self.ran_trybot:
+      s += ' <<<< Did not run trybot'
+    return s
+
+
+def QueryTrybotsForCl(cl_number, project):
+  """Queries BigQuery for the tryjobs run for a CL.
+
+  Args:
+    cl_number: An int or string containing the CL number to query.
+    project: A string containing the billing project to use for queries.
+
+  Returns:
+    A list of dicts, each entry containing data for one trybot run.
+  """
+  query = QUERY_TEMPLATE.format(cl_number=cl_number)
+
+  cmd = [
+      'bq',
+      'query',
+      '--format=json',
+      '--project_id=%s' % project,
+      '--max_rows=500',
+      '--use_legacy_sql=false',
+      query,
+  ]
+  with open('/dev/null', 'w') as devnull:
+    stdout = subprocess.check_output(cmd, stderr=devnull)
+  return json.loads(stdout)
+
+
+def FillTrybotRuns(blamelist, trybot, project):
+  """Fills the trybot data for the entries in |blamelist|
+
+  Args:
+    blamelist: A list of ChangeList objects with their gerrit_url fields filled.
+    trybot: A string containing the name of the trybot to check for.
+    project: A string containing the billing project to use for queries.
+  """
+  total_cls = len(blamelist)
+  for i, entry in enumerate(blamelist):
+    print 'Getting data for CL %s/%s' % (i + 1, total_cls)
+    largest_patchset = 0
+    all_trybots = QueryTrybotsForCl(entry.cl_number, project)
+    assert all_trybots
+    # Query orders results by patchset, ensuring that we get relevant results
+    # even if the number of tryjobs exceeds the row limit, but loading the JSON
+    # into a dict doesn't preserve ordering, so find the largest patchset now.
+    for tryjob in all_trybots:
+      patchset = int(tryjob['patchset'])
+      if patchset > largest_patchset:
+        largest_patchset = patchset
+    entry.largest_patchset = largest_patchset
+
+    for tryjob in all_trybots:
+      if largest_patchset != int(tryjob['patchset']):
+        continue
+      # 'builder' field is in the form project/bucket/builder, e.g.
+      # chromium/try/android-marshmallow-arm64-rel
+      if trybot == tryjob['builder'].split('/')[-1]:
+        entry.ran_trybot = True
+        break
+    if entry.ran_trybot is None:
+      entry.ran_trybot = False
+
+
+def FillGerritUrls(blamelist):
+  """Fills the Gerrit URLs for the entries in |blamelist|
+
+  Args:
+    blamelist: A list of ChangeList objects with their revision fields filled.
+  """
+  cmd_template = [
+      'git',
+      'show',
+      '--name-only',
+  ]
+  for entry in blamelist:
+    assert entry.revision
+    stdout = subprocess.check_output(cmd_template + [entry.revision],
+                                     stderr=subprocess.STDOUT)
+    match = GERRIT_URL_REGEX.search(stdout)
+    assert match
+    entry.gerrit_url = match.groupdict()['gerrit_url']
+    assert entry.gerrit_url
+
+
+def GetBlamelist(start_revision, end_revision):
+  """Gets a revision blamelist between the two given revisions.
+
+  Args:
+    start_revision: A string containing the earliest revision in the blamelist.
+    end_revision: A string containing the latest revision in the blamelist.
+
+  Returns:
+    A list of ChangeList objects with their revision fields filled in, each
+    corresponding to a revision in the blamelist. The first entry is the
+    latest in the blamelist.
+  """
+  cmd = [
+      'git',
+      'log',
+      '--pretty=oneline',
+      '%s~1..%s' % (start_revision, end_revision),
+  ]
+  stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+
+  blamelist = []
+  for line in stdout.splitlines():
+    cl = ChangeList()
+    cl.revision = line.split()[0]
+    blamelist.append(cl)
+  return blamelist
+
+
+def ParseArgs():
+  parser = argparse.ArgumentParser(
+      description='Script to determine which CLs in a blamelist did not run a '
+      'particular trybot.')
+  parser.add_argument('--start-revision',
+                      required=True,
+                      help='The earliest revision in the blamelist.')
+  parser.add_argument('--end-revision',
+                      required=True,
+                      help='The latest revision in the blamelist.')
+  parser.add_argument('--project',
+                      required=True,
+                      help='A billing project to use for queries.')
+  parser.add_argument('--trybot',
+                      required=True,
+                      help='The name of the trybot to look for.')
+  return parser.parse_args()
+
+
+def main():
+  args = ParseArgs()
+  blamelist = GetBlamelist(args.start_revision, args.end_revision)
+  FillGerritUrls(blamelist)
+  FillTrybotRuns(blamelist, args.trybot, args.project)
+  print '\n\nBlamelist (latest first):\n'
+  for entry in blamelist:
+    print entry
+
+
+if __name__ == '__main__':
+  main()