Commit 57a41a1c authored by Wei-Yin Chen (陳威尹)'s avatar Wei-Yin Chen (陳威尹) Committed by Commit Bot

[Instant Start] Add scripts for benchmarking

Bug: 1121369
Change-Id: I17e0bda0b30c05d1a1f751d8b77215bd7b9cff38
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2373287
Commit-Queue: Wei-Yin Chen (陳威尹) <wychen@chromium.org>
Reviewed-by: default avatarAndrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarXi Han <hanxi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#811619}
parent f940a2a1
......@@ -6,6 +6,7 @@ package org.chromium.chrome.features.start_surface;
import androidx.annotation.VisibleForTesting;
import org.chromium.base.Log;
import org.chromium.base.SysUtils;
import org.chromium.base.metrics.RecordHistogram;
import org.chromium.chrome.browser.flags.BooleanCachedFieldTrialParameter;
......@@ -25,6 +26,7 @@ import org.chromium.components.user_prefs.UserPrefs;
* which variation should be used.
*/
public class StartSurfaceConfiguration {
private static final String TAG = "StartSurfaceConfig";
public static final StringCachedFieldTrialParameter START_SURFACE_VARIATION =
new StringCachedFieldTrialParameter(
ChromeFeatureList.START_SURFACE_ANDROID, "start_surface_variation", "");
......@@ -144,7 +146,7 @@ public class StartSurfaceConfiguration {
*/
public static void recordHistogram(String name, long timeDurationMs, boolean isInstantStart) {
if (timeDurationMs < 0) return;
Log.i(TAG, "Recorded %s = %d ms", getHistogramName(name, isInstantStart), timeDurationMs);
RecordHistogram.recordTimesHistogram(
getHistogramName(name, isInstantStart), timeDurationMs);
}
......
[style]
based_on_style = pep8
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# //.vpython doesn't have scipy, so we have to use our own .vpython file.
# The wheel specs are from //v8/tools/callstats.py.vpython and //.vpython.
# pandas for python3 is not yet available in cipd.
# TODO(crbug.com/1130251): update to python3
python_version: "2.7"
wheel: <
name: "infra/python/wheels/scipy/${vpython_platform}"
version: "version:0.19.0"
match_tag: <
abi: "cp27mu"
platform: "manylinux1_i686"
>
match_tag: <
abi: "cp27mu"
platform: "manylinux1_x86_64"
>
>
wheel: <
name: "infra/python/wheels/numpy/${vpython_platform}"
version: "version:1.11.3"
>
wheel: <
name: "infra/python/wheels/six-py2_py3"
version: "version:1.15.0"
>
wheel: <
name: "infra/python/wheels/pandas/${vpython_platform}"
version: "version:0.23.4"
match_tag: <
platform: "win32"
>
match_tag: <
platform: "win_amd64"
>
match_tag: <
abi: "cp27mu"
platform: "manylinux1_i686"
>
match_tag: <
abi: "cp27mu"
platform: "manylinux1_x86_64"
>
match_tag: <
platform: "macosx_10_6_intel"
>
>
wheel: <
name: "infra/python/wheels/pytz-py2_py3"
version: "version:2018.4"
>
wheel: <
name: "infra/python/wheels/python-dateutil-py2_py3"
version: "version:2.7.3"
>
# Benchmarking and analyzing scripts for Instant Start
## Introduction
In order to understand the performance implication of a CL, we can do a local
benchmark to compare the before/after metrics.
## Usage
Build two APKs for before and after a CL, on gn target `monochrome_apk`, and
make sure they are on different Chrome channels, like Canary (gn args
`android_channel = "canary"`) and default (unspecified) because they will be
installed side by side. Depending on your preferred workflow, you can use two
separate workspaces like `~/code/clankium/src` and `~/code/clankium2/src`, or
use the same workspace but two different output directories like `./out/Release`
and `./out/Release2`, or simply use the same output directory but rename the APK
like `out/Release/bin/monochrome_before_apk` and
`out/Release/bin/monochrome_after_apk`.
On the device, uninstall Chrome of these two channels to make sure the
environment is clean. Otherwise, chrome://flags changes and Finch trials could
introduce undesirable differences. You can use the `--reinstall` option to
automate this. When running benchmark.py, first-run experience (FRE) would be
skipped, but you'll need to manually create one tab, make sure Feed is loaded,
and swipe away the login prompt in the dry-run step. Follow the instructions of
the script.
The command line looks like this:
```bash
./tools/android/instant_start/benchmark.py --control-apk out/Release/bin/monochrome_before_apk --experiment-apk out/Release/bin/monochrome_after_apk -v --repeat 100 --reinstall
```
The metrics are persisted to `runs.pickle` by default, and the filename can be
specified by `--data-output` option. This can later be analyzed like this:
```bash
./tools/android/instant_start/analyze.py runs.pickle
```
The output looks like:
```
Reading runs-pixel3xl.pickle with {'model': 'Pixel 3 XL', 'start_time': datetime.datetime(2020, 9, 19, 14, 55, 34, 596731)}
100 samples on Pixel 3 XL
Median Diff with control p-value
FirstDrawCompletedTime 155.0ms -13.5ms (-8.71%) 0.000000
SingleTabTitleAvailableTime 117.0ms -13.0ms (-11.11%) 0.000000
FeedStreamCreatedTime 356.0ms -35.5ms (-9.97%) 0.000001
FeedsLoadingPlaceholderShown 94.5ms -2.0ms (-2.12%) 0.007312
FeedContentFirstLoadedTime 924.0ms -6.5ms (-0.70%) 0.034100
```
#!/usr/bin/env vpython
#
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Analyze benchmark results for Instant start."""
from __future__ import print_function
import argparse
import pickle
import sys
import stats.analyze
def main():
"""Main program"""
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('pickles',
nargs='+',
help='The pickle files saved by benchmark.py.')
args = parser.parse_args()
runs = []
for filename in args.pickles:
with open(filename, 'rb') as file:
metadata = pickle.load(file)
print('Reading "%s" with %s' % (filename, metadata))
runs.extend(pickle.load(file))
stats.analyze.print_report(runs, metadata['model'])
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env vpython
#
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Run benchmark for Instant start."""
from __future__ import print_function
import argparse
from datetime import datetime
import logging
import os
import pickle
import random
import re
import subprocess
import sys
import time
import stats.analyze
def get_timestamp(adb_log_line):
"""Parse the timestamp in the adb log"""
# adb log doesn't have the year field printed out.
parsed = datetime.strptime(adb_log_line[0:18], '%m-%d %H:%M:%S.%f')
return parsed.replace(year=datetime.now().year)
def keep_awake():
"""Keep the device awake. This works for non-rooted devices as well."""
os.system("adb shell svc power stayon true")
os.system("adb shell input keyevent mouse")
def get_model():
"""Get the device model."""
return subprocess.check_output(
['adb', 'shell', 'getprop', 'ro.product.model']).rstrip()
def run_apk(variant, dry_run=False, reinstall=False, check_state=False):
"""Run Chrome and return metrics"""
keep_awake()
variant_name, apk_script, extra_cmd = variant
logging.warning('Running variant "%s"', variant_name)
assert os.path.exists(apk_script), "Script '%s' doesn't exist" % apk_script
features = '--enable-features=' + ','.join([
'TabGroupsAndroid<Study', 'TabSwitcherOnReturn<Study',
'StartSurfaceAndroid<Study', 'InstantStart<Study'
])
args = '--args=' + ' '.join([
'--disable-fre', '--disable-field-trial-config', features,
'--force-fieldtrials=Study/Group',
'--force-fieldtrial-params=Study.Group:'
'tab_switcher_on_return_time_ms/0'
'/start_surface_variation/single'
'/show_last_active_tab_only/true'
'/open_ntp_instead_of_start/true'
'/exclude_mv_tiles/true'
] + extra_cmd)
if reinstall:
logging.warning('Uninstalling')
cmd = [apk_script, 'uninstall']
logging.info('Running %s', cmd)
logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))
# Use "unbuffer" to force flushing the output of |apk_script|.
cmd = ['unbuffer', apk_script, 'run', '-vvv', args]
logging.info('Running %s', cmd)
# Use unbuffered pipe to avoid blocking.
proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=0)
latencies = []
events_re = re.compile(
r"Startup.Android.(?P<name>[0-9a-zA-Z_]+)[^ ]* = (?P<value>[0-9.]+)")
# Avoid buffering in proc.stdout.next().
# "for line in prod.stdout" might block.
# See https://stackoverflow.com/questions/1183643/
for line in iter(proc.stdout.readline, b''):
if isinstance(line, bytes):
line = line.decode(encoding='utf8')
logging.debug(line.rstrip())
if ('ActivityTaskManager' in line
or 'ActivityManager' in line) and 'START' in line:
start_timestamp = get_timestamp(line)
logging.info('Chrome started at %s', start_timestamp)
if dry_run:
time.sleep(5)
if check_state:
logging.warning('Make sure there is at least one tab, '
'and the Feed is loaded. '
'Press Enter to continue.')
sys.stdin.readline()
break
groups = events_re.search(line)
if groups:
latency = {}
latency['variant_name'] = variant_name
latency['metric_name'] = groups.group('name')
latency['value'] = groups.group('value')
latencies.append(latency)
logging.info(line.rstrip())
logging.info('Got %s = %s', groups.group('name'),
groups.group('value'))
if len(latencies) >= 5:
break
proc.kill()
return latencies
def main():
"""Entry point of the benchmark script"""
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--control-apk',
default='out/Release/bin/monochrome_apk',
help='The APK script file for control behavior.')
parser.add_argument('--experiment-apk',
default='out/Release/bin/monochrome_apk',
help='The APK script file for experiment behavior.')
parser.add_argument('--reinstall',
action='store_true',
help='Uninstall before installing the APKs.')
parser.add_argument('--repeat',
type=int,
default=3,
help='How many times to repeat running.')
parser.add_argument('--data-output',
default='runs.pickle',
help='The output file for benchmark data.')
parser.add_argument('-v',
'--verbose',
action='count',
default=0,
help='Be more verbose.')
args, _ = parser.parse_known_args()
level = logging.WARNING
if args.verbose == 1:
level = logging.INFO
elif args.verbose >= 2:
level = logging.DEBUG
logging.basicConfig(level=level,
format='%(asctime)-2s %(levelname)-8s %(message)s')
logging.addLevelName(
logging.WARNING,
"\033[1;31m%s\033[1;0m" % logging.getLevelName(logging.WARNING))
logging.addLevelName(
logging.ERROR,
"\033[1;41m%s\033[1;0m" % logging.getLevelName(logging.ERROR))
try:
subprocess.check_output('which unbuffer', shell=True)
except subprocess.CalledProcessError:
sys.exit('ERROR: "unbuffer" not found. ' +
'Install by running "sudo apt install expect".')
logging.warning('Make sure the device screen is unlocked. '
'Otherwise the benchmark might get stuck.')
# List control/experiment APKs for side-by-side comparison.
variants = []
variants.append(('control', args.control_apk, []))
variants.append(('experiment', args.experiment_apk, []))
metadata = {'model': get_model(), 'start_time': datetime.now()}
logging.warning('Pre-run for flag caching.')
for variant in variants:
run_apk(variant, dry_run=True, reinstall=args.reinstall)
logging.warning('Dry-run for manual state checking.')
for variant in variants:
run_apk(variant, dry_run=True, check_state=True)
runs = []
for i in range(args.repeat):
logging.warning('Run %d/%d', i + 1, args.repeat)
random.shuffle(variants)
for variant in variants:
result = run_apk(variant)
logging.info('Results: %s', result)
runs.extend(result)
time.sleep(10) # try to avoid overloading the device.
with open(args.data_output, 'wb') as pickle_file:
pickle.dump(metadata, pickle_file)
pickle.dump(runs, pickle_file)
logging.info('Saved "%s"', args.data_output)
stats.analyze.print_report(runs, metadata['model'])
if __name__ == '__main__':
sys.exit(main())
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Library for analyzing benchmark results for Instant start."""
import pandas
from scipy import stats
def print_report(runs, model, control='control', experiment='experiment'):
"""Print stats of A/B testing"""
all_df = pandas.DataFrame(runs, dtype=float)
report = pandas.DataFrame(
columns=['Median', 'Diff with control', 'p-value'])
for metric in sorted(set(all_df['metric_name'])):
mdf = all_df[all_df['metric_name'] == metric]
df = pandas.DataFrame()
for variant in sorted(set(all_df['variant_name'])):
df[variant] = mdf[mdf['variant_name'] == variant]\
.value.reset_index(drop=True)
diff_df = pandas.DataFrame()
diff_df = df[experiment] - df[control]
n = len(diff_df)
row = {}
row['Median'] = '%.1fms' % df[experiment].median()
row['Diff with control'] = '%.1fms (%.2f%%)' % (
diff_df.median(), diff_df.median() / df[experiment].median() * 100)
row['p-value'] = '%f' % (stats.ttest_rel(df[experiment],
df[control])[1])
report = report.append(pandas.Series(data=row, name=metric))
print('%d samples on %s' % (n, model))
print(report.sort_values(by='p-value'))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment