[Instant Start] Add scripts for benchmarking

Bug: 1121369 Change-Id: I17e0bda0b30c05d1a1f751d8b77215bd7b9cff38 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2373287 Commit-Queue: Wei-Yin Chen (陳威尹) <wychen@chromium.org> Reviewed-by: Andrew Grieve <agrieve@chromium.org> Reviewed-by: Xi Han <hanxi@chromium.org> Cr-Commit-Position: refs/heads/master@{#811619}

[Instant Start] Add scripts for benchmarking
Bug: 1121369 Change-Id: I17e0bda0b30c05d1a1f751d8b77215bd7b9cff38 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2373287 Commit-Queue: Wei-Yin Chen (陳威尹) <wychen@chromium.org> Reviewed-by: Andrew Grieve <agrieve@chromium.org> Reviewed-by: Xi Han <hanxi@chromium.org> Cr-Commit-Position: refs/heads/master@{#811619}
57a41a1c · Wei-Yin Chen (陳威尹) · Commit Bot · f940a2a1 · 57a41a1c · 57a41a1c
Commit 57a41a1c authored Sep 29, 2020 by Wei-Yin Chen (陳威尹) Committed by Commit Bot Sep 29, 2020
9 changed files
--- a/chrome/android/features/start_surface/public/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java
+++ b/chrome/android/features/start_surface/public/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java
@@ -6,6 +6,7 @@ package org.chromium.chrome.features.start_surface;

 import androidx.annotation.VisibleForTesting;

+import org.chromium.base.Log;
 import org.chromium.base.SysUtils;
 import org.chromium.base.metrics.RecordHistogram;
 import org.chromium.chrome.browser.flags.BooleanCachedFieldTrialParameter;
@@ -25,6 +26,7 @@ import org.chromium.components.user_prefs.UserPrefs;
 * which variation should be used.
 */
 public class StartSurfaceConfiguration {
+    private static final String TAG = "StartSurfaceConfig";
    public static final StringCachedFieldTrialParameter START_SURFACE_VARIATION =
            new StringCachedFieldTrialParameter(
                    ChromeFeatureList.START_SURFACE_ANDROID, "start_surface_variation", "");
@@ -144,7 +146,7 @@ public class StartSurfaceConfiguration {
     */
    public static void recordHistogram(String name, long timeDurationMs, boolean isInstantStart) {
        if (timeDurationMs < 0) return;
-
+        Log.i(TAG, "Recorded %s = %d ms", getHistogramName(name, isInstantStart), timeDurationMs);
        RecordHistogram.recordTimesHistogram(
                getHistogramName(name, isInstantStart), timeDurationMs);
    }

--- a/tools/android/instant_start/.style.yapf
+++ b/tools/android/instant_start/.style.yapf
+[style]
+based_on_style = pep8
--- a/tools/android/instant_start/.vpython
+++ b/tools/android/instant_start/.vpython
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# //.vpython doesn't have scipy, so we have to use our own .vpython file.
+# The wheel specs are from //v8/tools/callstats.py.vpython and //.vpython.
+
+# pandas for python3 is not yet available in cipd.
+# TODO(crbug.com/1130251): update to python3
+python_version: "2.7"
+
+wheel: <
+  name: "infra/python/wheels/scipy/${vpython_platform}"
+  version: "version:0.19.0"
+  match_tag: <
+    abi: "cp27mu"
+    platform: "manylinux1_i686"
+  >
+  match_tag: <
+    abi: "cp27mu"
+    platform: "manylinux1_x86_64"
+  >
+>
+wheel: <
+  name: "infra/python/wheels/numpy/${vpython_platform}"
+  version: "version:1.11.3"
+>
+wheel: <
+  name: "infra/python/wheels/six-py2_py3"
+  version: "version:1.15.0"
+>
+wheel: <
+  name: "infra/python/wheels/pandas/${vpython_platform}"
+  version: "version:0.23.4"
+  match_tag: <
+    platform: "win32"
+  >
+  match_tag: <
+    platform: "win_amd64"
+  >
+  match_tag: <
+    abi: "cp27mu"
+    platform: "manylinux1_i686"
+  >
+  match_tag: <
+    abi: "cp27mu"
+    platform: "manylinux1_x86_64"
+  >
+  match_tag: <
+    platform: "macosx_10_6_intel"
+  >
+>
+wheel: <
+  name: "infra/python/wheels/pytz-py2_py3"
+  version: "version:2018.4"
+>
+wheel: <
+  name: "infra/python/wheels/python-dateutil-py2_py3"
+  version: "version:2.7.3"
+>
+
--- a/tools/android/instant_start/OWNERS
+++ b/tools/android/instant_start/OWNERS
+wychen@chromium.org
--- a/tools/android/instant_start/README.md
+++ b/tools/android/instant_start/README.md
+# Benchmarking and analyzing scripts for Instant Start
+
+## Introduction
+
+In order to understand the performance implication of a CL, we can do a local
+benchmark to compare the before/after metrics.
+
+## Usage
+
+Build two APKs for before and after a CL, on gn target `monochrome_apk`, and
+make sure they are on different Chrome channels, like Canary (gn args
+`android_channel = "canary"`) and default (unspecified) because they will be
+installed side by side. Depending on your preferred workflow, you can use two
+separate workspaces like `~/code/clankium/src` and `~/code/clankium2/src`, or
+use the same workspace but two different output directories like `./out/Release`
+and `./out/Release2`, or simply use the same output directory but rename the APK
+like `out/Release/bin/monochrome_before_apk` and
+`out/Release/bin/monochrome_after_apk`.
+
+On the device, uninstall Chrome of these two channels to make sure the
+environment is clean. Otherwise, chrome://flags changes and Finch trials could
+introduce undesirable differences. You can use the `--reinstall` option to
+automate this.  When running benchmark.py, first-run experience (FRE) would be
+skipped, but you'll need to manually create one tab, make sure Feed is loaded,
+and swipe away the login prompt in the dry-run step. Follow the instructions of
+the script.
+
+The command line looks like this:
+
+```bash
+./tools/android/instant_start/benchmark.py --control-apk out/Release/bin/monochrome_before_apk --experiment-apk out/Release/bin/monochrome_after_apk -v --repeat 100 --reinstall
+```
+
+The metrics are persisted to `runs.pickle` by default, and the filename can be
+specified by `--data-output` option. This can later be analyzed like this:
+
+```bash
+./tools/android/instant_start/analyze.py runs.pickle
+```
+
+The output looks like:
+
+```
+Reading runs-pixel3xl.pickle with {'model': 'Pixel 3 XL', 'start_time': datetime.datetime(2020, 9, 19, 14, 55, 34, 596731)}
+100 samples on Pixel 3 XL
+                               Median  Diff with control   p-value
+FirstDrawCompletedTime        155.0ms   -13.5ms (-8.71%)  0.000000
+SingleTabTitleAvailableTime   117.0ms  -13.0ms (-11.11%)  0.000000
+FeedStreamCreatedTime         356.0ms   -35.5ms (-9.97%)  0.000001
+FeedsLoadingPlaceholderShown   94.5ms    -2.0ms (-2.12%)  0.007312
+FeedContentFirstLoadedTime    924.0ms    -6.5ms (-0.70%)  0.034100
+```
--- a/tools/android/instant_start/analyze.py
+++ b/tools/android/instant_start/analyze.py
+#!/usr/bin/env vpython
+#
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Analyze benchmark results for Instant start."""
+
+from __future__ import print_function
+
+import argparse
+import pickle
+import sys
+
+import stats.analyze
+
+
+def main():
+    """Main program"""
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('pickles',
+                        nargs='+',
+                        help='The pickle files saved by benchmark.py.')
+    args = parser.parse_args()
+
+    runs = []
+    for filename in args.pickles:
+        with open(filename, 'rb') as file:
+            metadata = pickle.load(file)
+            print('Reading "%s" with %s' % (filename, metadata))
+            runs.extend(pickle.load(file))
+    stats.analyze.print_report(runs, metadata['model'])
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/tools/android/instant_start/benchmark.py
+++ b/tools/android/instant_start/benchmark.py
+#!/usr/bin/env vpython
+#
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Run benchmark for Instant start."""
+
+from __future__ import print_function
+
+import argparse
+from datetime import datetime
+import logging
+import os
+import pickle
+import random
+import re
+import subprocess
+import sys
+import time
+
+import stats.analyze
+
+
+def get_timestamp(adb_log_line):
+    """Parse the timestamp in the adb log"""
+    # adb log doesn't have the year field printed out.
+    parsed = datetime.strptime(adb_log_line[0:18], '%m-%d %H:%M:%S.%f')
+    return parsed.replace(year=datetime.now().year)
+
+
+def keep_awake():
+    """Keep the device awake. This works for non-rooted devices as well."""
+    os.system("adb shell svc power stayon true")
+    os.system("adb shell input keyevent mouse")
+
+
+def get_model():
+    """Get the device model."""
+    return subprocess.check_output(
+        ['adb', 'shell', 'getprop', 'ro.product.model']).rstrip()
+
+
+def run_apk(variant, dry_run=False, reinstall=False, check_state=False):
+    """Run Chrome and return metrics"""
+
+    keep_awake()
+
+    variant_name, apk_script, extra_cmd = variant
+    logging.warning('Running variant "%s"', variant_name)
+    assert os.path.exists(apk_script), "Script '%s' doesn't exist" % apk_script
+
+    features = '--enable-features=' + ','.join([
+        'TabGroupsAndroid<Study', 'TabSwitcherOnReturn<Study',
+        'StartSurfaceAndroid<Study', 'InstantStart<Study'
+    ])
+
+    args = '--args=' + ' '.join([
+        '--disable-fre', '--disable-field-trial-config', features,
+        '--force-fieldtrials=Study/Group',
+        '--force-fieldtrial-params=Study.Group:'
+        'tab_switcher_on_return_time_ms/0'
+        '/start_surface_variation/single'
+        '/show_last_active_tab_only/true'
+        '/open_ntp_instead_of_start/true'
+        '/exclude_mv_tiles/true'
+    ] + extra_cmd)
+
+    if reinstall:
+        logging.warning('Uninstalling')
+        cmd = [apk_script, 'uninstall']
+        logging.info('Running %s', cmd)
+        logging.info(subprocess.check_output(cmd, stderr=subprocess.STDOUT))
+
+    # Use "unbuffer" to force flushing the output of |apk_script|.
+    cmd = ['unbuffer', apk_script, 'run', '-vvv', args]
+    logging.info('Running %s', cmd)
+    # Use unbuffered pipe to avoid blocking.
+    proc = subprocess.Popen(cmd,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.STDOUT,
+                            bufsize=0)
+    latencies = []
+    events_re = re.compile(
+        r"Startup.Android.(?P<name>[0-9a-zA-Z_]+)[^ ]* = (?P<value>[0-9.]+)")
+
+    # Avoid buffering in proc.stdout.next().
+    # "for line in prod.stdout" might block.
+    # See https://stackoverflow.com/questions/1183643/
+    for line in iter(proc.stdout.readline, b''):
+        if isinstance(line, bytes):
+            line = line.decode(encoding='utf8')
+        logging.debug(line.rstrip())
+        if ('ActivityTaskManager' in line
+                or 'ActivityManager' in line) and 'START' in line:
+            start_timestamp = get_timestamp(line)
+            logging.info('Chrome started at %s', start_timestamp)
+            if dry_run:
+                time.sleep(5)
+                if check_state:
+                    logging.warning('Make sure there is at least one tab, '
+                                    'and the Feed is loaded. '
+                                    'Press Enter to continue.')
+                    sys.stdin.readline()
+                break
+        groups = events_re.search(line)
+        if groups:
+            latency = {}
+            latency['variant_name'] = variant_name
+            latency['metric_name'] = groups.group('name')
+            latency['value'] = groups.group('value')
+            latencies.append(latency)
+            logging.info(line.rstrip())
+            logging.info('Got %s = %s', groups.group('name'),
+                         groups.group('value'))
+        if len(latencies) >= 5:
+            break
+
+    proc.kill()
+    return latencies
+
+
+def main():
+    """Entry point of the benchmark script"""
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--control-apk',
+                        default='out/Release/bin/monochrome_apk',
+                        help='The APK script file for control behavior.')
+    parser.add_argument('--experiment-apk',
+                        default='out/Release/bin/monochrome_apk',
+                        help='The APK script file for experiment behavior.')
+    parser.add_argument('--reinstall',
+                        action='store_true',
+                        help='Uninstall before installing the APKs.')
+    parser.add_argument('--repeat',
+                        type=int,
+                        default=3,
+                        help='How many times to repeat running.')
+    parser.add_argument('--data-output',
+                        default='runs.pickle',
+                        help='The output file for benchmark data.')
+    parser.add_argument('-v',
+                        '--verbose',
+                        action='count',
+                        default=0,
+                        help='Be more verbose.')
+    args, _ = parser.parse_known_args()
+
+    level = logging.WARNING
+    if args.verbose == 1:
+        level = logging.INFO
+    elif args.verbose >= 2:
+        level = logging.DEBUG
+    logging.basicConfig(level=level,
+                        format='%(asctime)-2s %(levelname)-8s %(message)s')
+    logging.addLevelName(
+        logging.WARNING,
+        "\033[1;31m%s\033[1;0m" % logging.getLevelName(logging.WARNING))
+    logging.addLevelName(
+        logging.ERROR,
+        "\033[1;41m%s\033[1;0m" % logging.getLevelName(logging.ERROR))
+
+    try:
+        subprocess.check_output('which unbuffer', shell=True)
+    except subprocess.CalledProcessError:
+        sys.exit('ERROR: "unbuffer" not found. ' +
+                 'Install by running "sudo apt install expect".')
+
+    logging.warning('Make sure the device screen is unlocked. '
+                    'Otherwise the benchmark might get stuck.')
+
+    # List control/experiment APKs for side-by-side comparison.
+    variants = []
+    variants.append(('control', args.control_apk, []))
+    variants.append(('experiment', args.experiment_apk, []))
+
+    metadata = {'model': get_model(), 'start_time': datetime.now()}
+
+    logging.warning('Pre-run for flag caching.')
+    for variant in variants:
+        run_apk(variant, dry_run=True, reinstall=args.reinstall)
+
+    logging.warning('Dry-run for manual state checking.')
+    for variant in variants:
+        run_apk(variant, dry_run=True, check_state=True)
+
+    runs = []
+    for i in range(args.repeat):
+        logging.warning('Run %d/%d', i + 1, args.repeat)
+        random.shuffle(variants)
+        for variant in variants:
+            result = run_apk(variant)
+            logging.info('Results: %s', result)
+            runs.extend(result)
+            time.sleep(10)  # try to avoid overloading the device.
+        with open(args.data_output, 'wb') as pickle_file:
+            pickle.dump(metadata, pickle_file)
+            pickle.dump(runs, pickle_file)
+            logging.info('Saved "%s"', args.data_output)
+        stats.analyze.print_report(runs, metadata['model'])
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/tools/android/instant_start/stats/__init__.py
+++ b/tools/android/instant_start/stats/__init__.py
--- a/tools/android/instant_start/stats/analyze.py
+++ b/tools/android/instant_start/stats/analyze.py
+# Copyright 2020 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Library for analyzing benchmark results for Instant start."""
+
+import pandas
+from scipy import stats
+
+
+def print_report(runs, model, control='control', experiment='experiment'):
+    """Print stats of A/B testing"""
+    all_df = pandas.DataFrame(runs, dtype=float)
+    report = pandas.DataFrame(
+        columns=['Median', 'Diff with control', 'p-value'])
+    for metric in sorted(set(all_df['metric_name'])):
+        mdf = all_df[all_df['metric_name'] == metric]
+        df = pandas.DataFrame()
+        for variant in sorted(set(all_df['variant_name'])):
+            df[variant] = mdf[mdf['variant_name'] == variant]\
+                .value.reset_index(drop=True)
+
+        diff_df = pandas.DataFrame()
+        diff_df = df[experiment] - df[control]
+        n = len(diff_df)
+
+        row = {}
+        row['Median'] = '%.1fms' % df[experiment].median()
+        row['Diff with control'] = '%.1fms (%.2f%%)' % (
+            diff_df.median(), diff_df.median() / df[experiment].median() * 100)
+        row['p-value'] = '%f' % (stats.ttest_rel(df[experiment],
+                                                 df[control])[1])
+        report = report.append(pandas.Series(data=row, name=metric))
+    print('%d samples on %s' % (n, model))
+    print(report.sort_values(by='p-value'))