Commit 5267f1f6 authored by Andrew Grieve's avatar Andrew Grieve Committed by Commit Bot

Refactor generate_milestone_report.py

This is mostly clean-up/rewrite, but motivated by adding bundle logic,
which will be a bit tricky since we'll be comparing bundles to non-bundles.

Changes:
 * Moved out of libsupersize directory.
 * Remove all imports to libsupersize
   * Makes things faster because of pypy.
   * Hides logs from supersize.
 * Downloads each .size file only once
 * Prints progress
 * Removed --verbose flag
 * Encapsulates more logic in the "Report" class
 * git cl format --python --full

Bug: 873714
Change-Id: If9b2938d3cb3c33d6dbb3f4c8049c026c4bee496
Reviewed-on: https://chromium-review.googlesource.com/c/1469271
Commit-Queue: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarEric Stevenson <estevenson@chromium.org>
Cr-Commit-Position: refs/heads/master@{#632057}
parent e8aa563c
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generate report files to view and/or compare (diff) milestones.
Size files are located in a Google Cloud Storage bucket for various Chrome
versions. This script generates various HTML report files to view a single
milesone, or to compare two milestones with the same CPU and APK.
Desired CPUs, APKs, and milestone versions are set in constants below. If
specified by the --skip-existing flag, the script checks what HTML report files
have already been uploaded to the GCS bucket, then works on generating the
remaining desired files.
Size files are fetched by streaming them from the source bucket, then the
html_report module handles creating a report file to diff two size files.
Reports are saved to a local directory, and once all reports are created they
can be uploaded to the destination bucket.
Reports can be uploaded automatically with the --sync flag. Otherwise, they can
be uploaded at a later point.
"""
import argparse
import collections
import contextlib
import errno
import itertools
import json
import logging
import multiprocessing
import os
import re
import shutil
import sys
import subprocess
import tempfile
_PUSH_URL = 'gs://chrome-supersize/milestones/'
_DESIRED_CPUS = ['arm', 'arm_64']
_DESIRED_APKS = ['Monochrome.apk', 'ChromeModern.apk', 'AndroidWebview.apk']
# Versions are manually gathered from
# https://omahaproxy.appspot.com/history?os=android&channel=stable
_DESIRED_VERSIONS = [
'60.0.3112.116',
'61.0.3163.98',
'62.0.3202.84',
'63.0.3239.111',
'64.0.3282.137',
'65.0.3325.85',
'66.0.3359.158',
'67.0.3396.87',
'68.0.3440.85',
'69.0.3497.91',
'70.0.3538.64',
'71.0.3578.83', # Beta
'72.0.3626.7', # Beta
]
def _GetDesiredVersions(apk):
if apk != 'AndroidWebview.apk':
return _DESIRED_VERSIONS
# Webview .size files do not exist before M71.
return [v for v in _DESIRED_VERSIONS if int(v.split('.')[0]) >= 71]
def _RequestedReports():
cpu_and_apk_combos = list(itertools.product(_DESIRED_CPUS, _DESIRED_APKS))
for cpu, apk in cpu_and_apk_combos:
apk_versions = _GetDesiredVersions(apk)
for after_version in apk_versions:
yield Report(cpu, apk, None, after_version)
for i, before_version in enumerate(apk_versions):
for after_version in apk_versions[i + 1:]:
yield Report(cpu, apk, before_version, after_version)
def _TemplateToRegex(template):
# Transform '{cpu}/{apk}/... -> (?P<cpu>[^/]+)/(?P<apk>[^/]+)/...
pattern = re.sub(r'{(.*?)}', r'(?P<\1>[^/]+)', template)
return re.compile(pattern)
class Report(
collections.namedtuple('Report',
['cpu', 'apk', 'before_version', 'after_version'])):
_NDJSON_TEMPLATE_VIEW = '{cpu}/{apk}/report_{after_version}.ndjson'
_NDJSON_TEMPLATE_COMPARE = (
'{cpu}/{apk}/report_{before_version}_{after_version}.ndjson')
_PUSH_URL_REGEX_VIEW = _TemplateToRegex(_PUSH_URL + _NDJSON_TEMPLATE_VIEW)
_PUSH_URL_REGEX_COMPARE = _TemplateToRegex(_PUSH_URL +
_NDJSON_TEMPLATE_COMPARE)
_SIZE_URL_TEMPLATE = '{version}/{cpu}/{apk}.size'
@classmethod
def FromUrl(cls, url):
# Perform this match first since it's more restrictive.
match = cls._PUSH_URL_REGEX_COMPARE.match(url)
if match:
return cls(**match.groupdict())
match = cls._PUSH_URL_REGEX_VIEW.match(url)
if match:
return cls(before_version=None, **match.groupdict())
return None
@property
def before_size_file_subpath(self):
if self.before_version:
return self._SIZE_URL_TEMPLATE.format(
version=self.before_version, **self._asdict())
return None
@property
def after_size_file_subpath(self):
return self._SIZE_URL_TEMPLATE.format(
version=self.after_version, **self._asdict())
@property
def ndjson_subpath(self):
if self.before_version:
return self._NDJSON_TEMPLATE_COMPARE.format(**self._asdict())
return self._NDJSON_TEMPLATE_VIEW.format(**self._asdict())
def _MakeDirectory(path):
# Function is safe even from racing fork()ed processes.
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def _Shard(func, arg_tuples):
pool = multiprocessing.Pool()
try:
for x in pool.imap_unordered(func, arg_tuples):
yield x
finally:
pool.close()
def _DownloadOneSizeFile(arg_tuples):
subpath, temp_dir, base_url = arg_tuples
src = '{}/{}'.format(base_url, subpath)
dest = os.path.join(temp_dir, subpath)
_MakeDirectory(os.path.dirname(dest))
subprocess.check_call(['gsutil.py', '-q', 'cp', src, dest])
@contextlib.contextmanager
def _DownloadSizeFiles(base_url, reports):
temp_dir = tempfile.mkdtemp()
try:
subpaths = set(x.after_size_file_subpath for x in reports)
subpaths.update(x.before_size_file_subpath
for x in reports
if x.before_size_file_subpath)
logging.warning('Downloading %d .size files', len(subpaths))
arg_tuples = ((p, temp_dir, base_url) for p in subpaths)
for _ in _Shard(_DownloadOneSizeFile, arg_tuples):
pass
yield temp_dir
finally:
shutil.rmtree(temp_dir)
def _FetchExistingMilestoneReports():
milestones = subprocess.check_output(
['gsutil.py', 'ls', '-R', _PUSH_URL + '*'])
for path in milestones.splitlines()[1:]:
report = Report.FromUrl(path)
if report:
yield report
def _WriteMilestonesJson(path):
with open(path, 'w') as out_file:
pushed_reports_obj = {
'pushed': {
'apk': _DESIRED_APKS,
'cpu': _DESIRED_CPUS,
'version': _DESIRED_VERSIONS,
},
}
json.dump(pushed_reports_obj, out_file, sort_keys=True, indent=2)
def _BuildOneReport(arg_tuples):
report, output_directory, size_file_directory = arg_tuples
ndjson_path = os.path.join(output_directory, report.ndjson_subpath)
_MakeDirectory(os.path.dirname(ndjson_path))
script = os.path.join(os.path.dirname(__file__), 'supersize')
after_size_file = os.path.join(size_file_directory,
report.after_size_file_subpath)
args = [script, 'html_report', after_size_file, ndjson_path]
if report.before_version:
before_size_file = os.path.join(size_file_directory,
report.before_size_file_subpath)
args += ['--diff-with', before_size_file]
subprocess.check_output(args, stderr=subprocess.STDOUT)
def _CreateReportObjects(skip_existing):
desired_reports = set(_RequestedReports())
logging.warning('Querying storage bucket for existing reports.')
existing_reports = set(_FetchExistingMilestoneReports())
missing_reports = desired_reports - existing_reports
stale_reports = existing_reports - desired_reports
if stale_reports:
# Stale reports happen when we remove a version
# (e.g. update a beta to a stable).
# It's probably best to leave them in case people have linked to them.
logging.warning('Number of stale reports: %d', len(stale_reports))
if skip_existing:
return sorted(missing_reports)
return sorted(desired_reports)
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'directory', help='Directory to save report files to (must not exist).')
parser.add_argument(
'--size-file-bucket',
required=True,
help='GCS bucket to find size files in. (e.g. "gs://bucket/subdir")')
parser.add_argument(
'--sync',
action='store_true',
help='Sync data files to GCS (otherwise just prints out command to run).')
parser.add_argument(
'--skip-existing', action='store_true', help='Skip existing reports.')
args = parser.parse_args()
# Anything lower than WARNING gives screens full of supersize logs.
logging.basicConfig(
level=logging.WARNING,
format='%(levelname).1s %(relativeCreated)6d %(message)s')
size_file_bucket = args.size_file_bucket.rstrip('/')
if not size_file_bucket.startswith('gs://'):
parser.error('Size file bucket must start with gs://')
_MakeDirectory(args.directory)
if os.listdir(args.directory):
parser.error('Directory must be empty')
reports_to_make = _CreateReportObjects(args.skip_existing)
if not reports_to_make:
logging.warning('No reports need to be created (due to --skip-existing).')
return
with _DownloadSizeFiles(args.size_file_bucket, reports_to_make) as sizes_dir:
logging.warning('Generating %d reports.', len(reports_to_make))
arg_tuples = ((r, args.directory, sizes_dir) for r in reports_to_make)
for i, _ in enumerate(_Shard(_BuildOneReport, arg_tuples)):
sys.stdout.write('\rGenerated {} of {}'.format(i + 1,
len(reports_to_make)))
sys.stdout.flush()
sys.stdout.write('\n')
_WriteMilestonesJson(os.path.join(args.directory, 'milestones.json'))
logging.warning('Reports saved to %s', args.directory)
cmd = [
'gsutil.py', '-m', 'rsync', '-J', '-a', 'public-read', '-r',
args.directory, _PUSH_URL,
]
if args.sync:
subprocess.check_call(cmd)
else:
print
print 'Sync files by running:'
print ' ', ' '.join(cmd)
if __name__ == '__main__':
main()
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generate report files to view and/or compare (diff) milestones.
Size files are located in a Google Cloud Storage bucket for various Chrome
versions. This script generates various HTML report files to view a single
milesone, or to compare two milestones with the same CPU and APK.
Desired CPUs, APKs, and milestone versions are set in constants below. If
specified by the --skip-existing flag, the script checks what HTML report files
have already been uploaded to the GCS bucket, then works on generating the
remaining desired files.
Size files are fetched by streaming them from the source bucket, then the
html_report module handles creating a report file to diff two size files.
Reports are saved to a local directory, and once all reports are created they
can be uploaded to the destination bucket.
Reports can be uploaded automatically with the --sync flag. Otherwise, they can
be uploaded at a later point.
"""
import argparse
import codecs
import collections
import cStringIO
import errno
import itertools
import json
import logging
import multiprocessing
import os
import re
import subprocess
import archive
import diff
import html_report
PUSH_URL = 'gs://chrome-supersize/milestones/'
REPORT_URL_TEMPLATE_VIEW = '{cpu}/{apk}/report_{version2}.ndjson'
REPORT_URL_TEMPLATE_COMP = '{cpu}/{apk}/report_{version1}_{version2}.ndjson'
DESIRED_CPUS = ['arm', 'arm_64']
DESIRED_APKS = ['Monochrome.apk', 'ChromeModern.apk', 'AndroidWebview.apk']
# Versions are manually gathered from
# https://omahaproxy.appspot.com/history?os=android&channel=stable
DESIRED_VERSIONS = [
'60.0.3112.116',
'61.0.3163.98',
'62.0.3202.84',
'63.0.3239.111',
'64.0.3282.137',
'65.0.3325.85',
'66.0.3359.158',
'67.0.3396.87',
'68.0.3440.85',
'69.0.3497.91',
'70.0.3538.64',
'71.0.3578.83', # Beta
'72.0.3626.7', # Beta
]
def _GetDesiredVersions(apk):
if apk != 'AndroidWebview.apk':
return DESIRED_VERSIONS
# Webview .size files do not exist before M71.
return [v for v in DESIRED_VERSIONS if int(v.split('.')[0]) >= 71]
class Report(collections.namedtuple(
'Report', ['cpu', 'apk', 'version1', 'version2'])):
PUSH_URL_REGEX_VIEW = re.compile((PUSH_URL + REPORT_URL_TEMPLATE_VIEW).format(
cpu=r'(?P<cpu>[\w.]+)',
apk=r'(?P<apk>[\w.]+)',
version2=r'(?P<version2>[\w.]+)'
))
PUSH_URL_REGEX_COMP = re.compile((PUSH_URL + REPORT_URL_TEMPLATE_COMP).format(
cpu=r'(?P<cpu>[\w.]+)',
apk=r'(?P<apk>[\w.]+)',
version1=r'(?P<version1>[\w.]+)',
version2=r'(?P<version2>[\w.]+)'
))
@classmethod
def FromUrl(cls, url):
# Perform this match first since it's more restrictive.
match = cls.PUSH_URL_REGEX_COMP.match(url)
if match:
return cls(
match.group('cpu'),
match.group('apk'),
match.group('version1'),
match.group('version2'),
)
match = cls.PUSH_URL_REGEX_VIEW.match(url)
if match:
return cls(
match.group('cpu'),
match.group('apk'),
None,
match.group('version2'),
)
return None
def _FetchExistingMilestoneReports():
milestones = subprocess.check_output(['gsutil.py', 'ls', '-R',
PUSH_URL + '*'])
for path in milestones.splitlines()[1:]:
report = Report.FromUrl(path)
if report:
yield report
def _SizeInfoFromGsPath(path):
size_contents = subprocess.check_output(['gsutil.py', 'cat', path])
file_obj = cStringIO.StringIO(size_contents)
ret = archive.LoadAndPostProcessSizeInfo(path, file_obj=file_obj)
file_obj.close()
return ret
def _PossibleReportFiles():
cpu_and_apk_combos = list(itertools.product(DESIRED_CPUS, DESIRED_APKS))
for cpu, apk in cpu_and_apk_combos:
apk_versions = _GetDesiredVersions(apk)
for version2 in apk_versions:
yield Report(cpu, apk, None, version2)
for i, version1 in enumerate(apk_versions):
for version2 in apk_versions[i + 1:]:
yield Report(cpu, apk, version1, version2)
def _SetPushedReports(directory):
outpath = os.path.join(directory, 'milestones.json')
with codecs.open(outpath, 'w', encoding='ascii') as out_file:
pushed_reports_obj = {
'pushed': {
'cpu': DESIRED_CPUS,
'apk': DESIRED_APKS,
'version': DESIRED_VERSIONS,
},
}
json.dump(pushed_reports_obj, out_file)
out_file.write('\n')
def _GetReportPaths(directory, template, report):
report_dict = report._asdict()
after_size_path = template.format(version=report.version2, **report_dict)
if report.version1 is None:
before_size_path = None
out_rel = os.path.join(directory,
REPORT_URL_TEMPLATE_VIEW.format(**report_dict))
else:
before_size_path = template.format(version=report.version1,
**report_dict)
out_rel = os.path.join(directory,
REPORT_URL_TEMPLATE_COMP.format(**report_dict))
out_abs = os.path.abspath(out_rel)
return (before_size_path, after_size_path, out_abs)
def _BuildReport(paths):
before_size_path, after_size_path, outpath = paths
try:
os.makedirs(os.path.dirname(outpath))
except OSError as e:
if e.errno != errno.EEXIST:
raise
size_info = _SizeInfoFromGsPath(after_size_path)
if before_size_path:
size_info = diff.Diff(_SizeInfoFromGsPath(before_size_path), size_info)
html_report.BuildReportFromSizeInfo(outpath, size_info, all_symbols=False)
return outpath
def _BuildReports(directory, bucket, skip_existing):
try:
if os.listdir(directory):
raise Exception('Directory must be empty')
except OSError as e:
if e.errno == errno.ENOENT:
os.makedirs(directory)
else:
raise
# GCS URL template used to get size files.
template = bucket + '/{version}/{cpu}/{apk}.size'
def GetReportsToMake():
desired_reports = set(_PossibleReportFiles())
existing_reports = set(_FetchExistingMilestoneReports())
missing_reports = desired_reports - existing_reports
stale_reports = existing_reports - desired_reports
logging.info('Number of desired reports: %d' % len(desired_reports))
logging.info('Number of existing reports: %d' % len(existing_reports))
if stale_reports:
logging.warning('Number of stale reports: %d' % len(stale_reports))
if skip_existing:
logging.info('Generate %d missing reports:' % len(missing_reports))
return sorted(missing_reports)
logging.info('Generate all %d desired reports:' %
len(desired_reports))
return sorted(desired_reports)
reports_to_make = GetReportsToMake()
if not reports_to_make:
return
paths = (_GetReportPaths(directory, template, r) for r in reports_to_make)
processes = min(len(reports_to_make), multiprocessing.cpu_count())
pool = multiprocessing.Pool(processes=processes)
for path in pool.imap_unordered(_BuildReport, paths):
logging.info('Saved %s', path)
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('directory',
help='Directory to save report files to '
'(must not exist).')
parser.add_argument('--size-file-bucket', required=True,
help='GCS bucket to find size files in.'
'(e.g. "gs://bucket/subdir")')
parser.add_argument('--sync', action='store_true',
help='Sync data files to GCS '
'(otherwise just prints out command to run).')
parser.add_argument('--skip-existing', action="store_true",
help='Skip existing reports.')
parser.add_argument('-v',
'--verbose',
default=0,
action='count',
help='Verbose level (multiple times for more)')
args = parser.parse_args()
logging.basicConfig(level=logging.WARNING - args.verbose * 10,
format='%(levelname).1s %(relativeCreated)6d %(message)s')
size_file_bucket = args.size_file_bucket
if not size_file_bucket.startswith('gs://'):
parser.error('Size file bucket must be located in Google Cloud Storage.')
elif size_file_bucket.endswith('/'):
# Remove trailing slash
size_file_bucket = size_file_bucket[:-1]
_BuildReports(args.directory, size_file_bucket,
skip_existing=args.skip_existing)
_SetPushedReports(args.directory)
logging.warning('Reports saved to %s', args.directory)
cmd = ['gsutil.py', '-m', 'rsync', '-J', '-a', 'public-read', '-r',
args.directory, PUSH_URL]
if args.sync:
subprocess.check_call(cmd)
else:
logging.warning('Sync files by running: \n%s', ' '.join(cmd))
if __name__ == '__main__':
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment