Commit 37cb4d23 authored by Stephan Stross's avatar Stephan Stross Committed by Commit Bot

Added flag support to comparative_tester.py

Bug: 839491

Change-Id: I9682eef59b6d1ec2157108a72976dc1b868575f9
Reviewed-on: https://chromium-review.googlesource.com/1110853
Commit-Queue: Stephan Stross <stephanstross@google.com>
Reviewed-by: default avatarSergey Ulanov <sergeyu@chromium.org>
Cr-Commit-Position: refs/heads/master@{#570118}
parent 25013557
......@@ -42,7 +42,9 @@ def AddCommonArgs(arg_parser):
common_args.add_argument('--ssh-config', '-F',
help='The path to the SSH configuration used for '
'connecting to the target device.')
common_args.add_argument('--include-system-logs', default=True, type=bool,
common_args.add_argument('--exclude-system-logs',
action='store_false',
dest='include_system_logs',
help='Do not show system log data.')
common_args.add_argument('--verbose', '-v', default=False,
action='store_true',
......
# Fuchsia OOM bug crbug.com/851698
-MessageLoopPerfTest.PostTaskRate/1_Posting_Thread
# crbug.com/848499
-WaitableEventPerfTest.Throughput
\ No newline at end of file
-WaitableEventPerfTest.Throughput
# Can cause hangs on SSH connection to test box
# https://fuchsia.atlassian.net/browse/NET-1010
-TaskObserverPerfTest.TaskPingPong
-JSONPerfTest.StressTest
\ No newline at end of file
# These two are disabled until crbug.com/851083 is fixed
-DiskCachePerfTest.CacheBackendPerformance
-DiskCachePerfTest.SimpleCacheBackendPerformance
\ No newline at end of file
-DiskCachePerfTest.SimpleCacheBackendPerformance
# Flaky test blocked until crbug.com/852937 is fixed
-URLRequestQuicPerfTest.TestGetRequest
# Too long-running for repeat execution
-SimpleIndexPerfTest.EvictionPerformance
\ No newline at end of file
......@@ -7,6 +7,8 @@
# Fuchsia devices and then compares their output to each other, extracting the
# relevant performance data from the output of gtest.
import argparse
import logging
import os
import re
import subprocess
......@@ -76,10 +78,9 @@ class TestTarget(object):
def ExecFuchsia(self, out_dir: str, run_locally: bool) -> str:
runner_name = "{}/bin/run_{}".format(out_dir, self._name)
command = [runner_name, self._filter_flag, "--include-system-logs=False"]
command = [runner_name, self._filter_flag, "--exclude-system-logs"]
if not run_locally:
command.append("-d")
command.append(self._filter_flag)
return RunCommand(command,
"Test {} failed on fuchsia!".format(self._target))
......@@ -166,11 +167,14 @@ def RunTest(target: TestTarget, run_locally: bool = False) -> None:
print("Wrote result files")
def RunGnForDirectory(dir_name: str, target_os: str) -> None:
def RunGnForDirectory(dir_name: str, target_os: str, is_debug: bool) -> None:
if not os.path.exists(dir_name):
os.makedirs(dir_name)
debug_str = str(is_debug).lower()
with open("{}/{}".format(dir_name, "args.gn"), "w") as args_file:
args_file.write("is_debug = false\n")
args_file.write("is_debug = {}\n".format(debug_str))
args_file.write("dcheck_always_on = false\n")
args_file.write("is_component_build = false\n")
args_file.write("use_goma = true\n")
......@@ -179,7 +183,8 @@ def RunGnForDirectory(dir_name: str, target_os: str) -> None:
subprocess.run(["gn", "gen", dir_name]).check_returncode()
def GenerateTestData(runs: int):
def GenerateTestData(do_config: bool, do_build: bool, num_reps: int,
is_debug: bool):
DIR_SOURCE_ROOT = os.path.abspath(
os.path.join(os.path.dirname(__file__), *([os.pardir] * 3)))
os.chdir(DIR_SOURCE_ROOT)
......@@ -192,22 +197,27 @@ def GenerateTestData(runs: int):
test_input = [] # type: List[TestTarget]
for target in target_spec.test_targets:
test_input.append(TestTarget(target))
print("Test targets collected:\n{}".format("\n".join(
print("Test targets collected:\n{}".format(",".join(
[test._target for test in test_input])))
RunGnForDirectory(linux_dir, "linux")
RunGnForDirectory(fuchsia_dir, "fuchsia")
# Build test targets in both output directories.
for directory in [linux_dir, fuchsia_dir]:
build_command = ["autoninja", "-C", directory] \
+ [test._target for test in test_input]
RunCommand(build_command,
"autoninja failed in directory {}".format(directory))
print("Builds completed.")
if do_config:
RunGnForDirectory(linux_dir, "linux", is_debug)
RunGnForDirectory(fuchsia_dir, "fuchsia", is_debug)
print("Ran GN")
elif is_debug:
logging.warning("The --is_debug flag is ignored unless --do_config is also \
specified")
if do_build:
# Build test targets in both output directories.
for directory in [linux_dir, fuchsia_dir]:
build_command = ["autoninja", "-C", directory] \
+ [test._target for test in test_input]
RunCommand(build_command,
"autoninja failed in directory {}".format(directory))
print("Builds completed.")
# Execute the tests, one at a time, per system, and collect their results.
for i in range(0, runs):
for i in range(0, num_reps):
print("Running Test set {}".format(i))
for test_target in test_input:
print("Running Target {}".format(test_target._name))
......@@ -218,7 +228,29 @@ def GenerateTestData(runs: int):
def main() -> int:
GenerateTestData(1)
cmd_flags = argparse.ArgumentParser(
description="Execute tests repeatedly and collect performance data.")
cmd_flags.add_argument(
"--do-config",
action="store_true",
help="WARNING: This flag over-writes args.gn in the directories "
"configured. GN is executed before running the tests.")
cmd_flags.add_argument(
"--do-build",
action="store_true",
help="Build the tests before running them.")
cmd_flags.add_argument(
"--is-debug",
action="store_true",
help="This config-and-build cycle is a debug build")
cmd_flags.add_argument(
"--num-repetitions",
type=int,
default=1,
help="The number of times to execute each test target.")
cmd_flags.parse_args()
GenerateTestData(cmd_flags.do_config, cmd_flags.do_build,
cmd_flags.num_repetitions, cmd_flags.is_debug)
return 0
......
#!/usr/bin/env python3
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""generate_perf_report.py is to be used after comparative_tester.py has been
executed and written some test data into the location specified by
target_spec.py. It writes to results_dir and reads all present test info from
raw_data_dir. Using this script should just be a matter of invoking it from
chromium/src while raw test data exists in raw_data_dir."""
import json
import logging
import math
import os
import sys
from typing import List, Dict, Set, Tuple, Optional, Any, TypeVar, Callable
import target_spec
from test_results import (TargetResult, ReadTargetFromJson, TestResult,
ResultLine)
class LineStats(object):
def __init__(self, desc: str, unit: str, time_avg: float, time_dev: float,
cv: float, samples: int) -> None:
self.desc = desc
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.unit = unit
self.sample_num = samples
def ToString(self) -> str:
if self.sample_num > 1:
return "{}: {:.5f} σ={:.5f} {} with n={} cv={}".format(
self.desc, self.time_avg, self.time_dev, self.unit, self.sample_num,
self.cv)
else:
return "{}: {:.5f} with only one sample".format(self.desc, self.time_avg)
def LineFromList(lines: List[ResultLine]) -> Optional[LineStats]:
desc = lines[0].desc
unit = lines[0].unit
times = [line.meas for line in lines]
avg, dev, cv = GenStats(times)
return LineStats(desc, unit, avg, dev, cv, len(lines))
class TestStats(object):
def __init__(self, name: str, time_avg: float, time_dev: float, cv: float,
samples: int, lines: List[LineStats]) -> None:
self.name = name
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.sample_num = samples
self.lines = lines
def ToLines(self) -> List[str]:
lines = []
if self.sample_num > 1:
lines.append("{}: {:.5f} σ={:.5f}ms with n={} cv={}".format(
self.name, self.time_avg, self.time_dev, self.sample_num, self.cv))
else:
lines.append("{}: {:.5f} with only one sample".format(
self.name, self.time_avg))
for line in self.lines:
lines.append(" {}".format(line.ToString()))
return lines
def TestFromList(tests: List[TestResult]) -> TestStats:
name = tests[0].name
avg, dev, cv = GenStats([test.time for test in tests])
lines = {} # type: Dict[str, List[ResultLine]]
for test in tests:
assert test.name == name
for line in test.lines:
if not line.desc in lines:
lines[line.desc] = [line]
else:
lines[line.desc].append(line)
test_lines = []
for _, line_list in lines.items():
stat_line = LineFromList(line_list)
if stat_line:
test_lines.append(stat_line)
return TestStats(name, avg, dev, cv, len(tests), test_lines)
class TargetStats(object):
def __init__(self, name: str, samples: int, tests: List[TestStats]) -> None:
self.name = name
self.sample_num = samples
self.tests = tests
def ToLines(self) -> List[str]:
lines = []
if self.sample_num > 1:
lines.append("{}: ".format(self.name))
else:
lines.append("{}: with only one sample".format(self.name))
for test in self.tests:
for line in test.ToLines():
lines.append(" {}".format(line))
return lines
def __format__(self, format_spec):
return "\n".join(self.ToLines())
def TargetFromList(results: List[TargetResult]) -> TargetStats:
name = results[0].name
sample_num = len(results)
tests = {} # type: Dict[str, List[TestResult]]
for result in results:
assert result.name == name
# This groups tests by name so that they can be considered independently,
# so that in the event tests flake out, their average times can
# still be accurately calculated
for test in result.tests:
if not test.name in tests.keys():
tests[test.name] = [test]
tests[test.name].append(test)
test_stats = [TestFromList(test_list) for _, test_list in tests.items()]
return TargetStats(name, sample_num, test_stats)
def GenStats(corpus: List[float]) -> Tuple[float, float, float]:
avg = sum(corpus) / len(corpus)
adjusted_sum = 0.0
for item in corpus:
adjusted = item - avg
adjusted_sum += adjusted * adjusted
dev = math.sqrt(adjusted_sum / len(corpus))
cv = dev / avg
return avg, dev, cv
def DirectoryStats(directory: str) -> List[TargetStats]:
resultMap = {} # type: Dict[str, List[TargetResult]]
for file in os.listdir(directory):
results = ReadTargetFromJson("{}/{}".format(directory, file))
if not results.name in resultMap.keys():
resultMap[results.name] = [results]
else:
resultMap[results.name].append(results)
targets = []
for _, resultList in resultMap.items():
targets.append(TargetFromList(resultList))
return targets
def CompareTargets(linux: TargetStats, fuchsia: TargetStats) -> Dict[str, Any]:
"""Compare takes a corpus of statistics from both Fuchsia and Linux, and then
lines up the values, compares them to each other, and writes them into a
dictionary that can be JSONified.
"""
assert linux.name == fuchsia.name
paired_tests = ZipListsByPredicate(linux.tests, fuchsia.tests,
lambda test: test.name)
paired_tests = MapDictValues(paired_tests, CompareTests)
return {"name": linux.name, "tests": paired_tests}
def CompareTests(linux: TestStats, fuchsia: TestStats) -> Dict[str, Any]:
assert linux != None or fuchsia != None
if linux != None and fuchsia != None:
assert linux.name == fuchsia.name
paired_lines = ZipListsByPredicate(linux.lines, fuchsia.lines,
lambda line: line.desc)
paired_lines = MapDictValues(paired_lines, CompareLines)
result = {"lines": paired_lines, "unit": "ms"} # type: Dict[str, Any]
if linux:
result["name"] = linux.name
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
if fuchsia == None:
logging.warning("Fuchsia is missing test case {}".format(linux.name))
else:
result["name"] = fuchsia.name
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
def CompareLines(linux: LineStats, fuchsia: LineStats) -> Dict[str, Any]:
"""CompareLines wraps two LineStats objects up as a JSON-dumpable dict, with
missing values written as -1 (which specifically doesn't make sense for time
elapsed measurements). It also logs a warning every time a line is given which
can't be matched up. If both lines passed are None, or their units or
descriptions are not the same(which should never happen) this function fails.
"""
assert linux != None or fuchsia != None
result = {} # type: Dict[str, Any]
if linux != None and fuchsia != None:
assert linux.desc == fuchsia.desc
assert linux.unit == fuchsia.unit
if linux:
result["desc"] = linux.desc
result["unit"] = linux.unit
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
if fuchsia == None:
logging.warning("Fuchsia is missing test line {}".format(linux.desc))
else:
result["desc"] = fuchsia.desc
result["unit"] = fuchsia.unit
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
return result
T = TypeVar("T")
R = TypeVar("R")
def ZipListsByPredicate(left: List[T], right: List[T],
pred: Callable[[T], R]) -> Dict[R, Tuple[T, T]]:
"""This function takes two lists, and a predicate. The predicate is applied to
the values in both lists to obtain a keying value from them. Each item is then
inserted into the returned dictionary using the obtained key. Finally, after
all items have been added to the dict, any items that do not have a pair are
discarded after warning the user, and the new dictionary is returned. The
predicate should not map multiple values from one list to the same key.
"""
paired_items = {} # type: Dict [R, Tuple[T, T]]
for item in left:
key = pred(item)
# the first list shouldn't cause any key collisions
assert key not in paired_items.keys()
paired_items[key] = item, None
for item in right:
key = pred(item)
if key in paired_items.keys():
# elem 1 of the tuple is always None if the key exists in the map
prev, _ = paired_items[key]
paired_items[key] = prev, item
else:
paired_items[key] = None, item
return paired_items
U = TypeVar("U")
V = TypeVar("V")
def MapDictValues(dct: Dict[T, Tuple[R, U]],
predicate: Callable[[R, U], V]) -> Dict[T, V]:
"""This function applies the predicate to all the values in the dictionary,
returning a new dictionary with the new values.
"""
out_dict = {}
for key, val in dct.items():
out_dict[key] = predicate(*val)
return out_dict
def main():
linux_avgs = DirectoryStats(target_spec.raw_linux_dir)
fuchsia_avgs = DirectoryStats(target_spec.raw_fuchsia_dir)
paired_targets = ZipListsByPredicate(linux_avgs, fuchsia_avgs,
lambda target: target.name)
for name, targets in paired_targets.items():
comparison_dict = CompareTargets(*targets)
with open("{}/{}.json".format(target_spec.results_dir, name),
"w") as outfile:
json.dump(comparison_dict, outfile, indent=2)
if __name__ == "__main__":
sys.exit(main())
......@@ -2,8 +2,6 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from typing import Dict
# Fields for use when working with a physical linux device connected locally
linux_device_ip = "192.168.42.32"
linux_device_user = "potat"
......@@ -18,8 +16,8 @@ fuchsia_out_dir = "out/fuchsia"
results_dir = "results"
# The location in src that stores the information from each comparative
# invocation of a perftest
raw_linux_dir = "results/linux_raw"
raw_fuchsia_dir = "results/fuchsia_raw"
raw_linux_dir = results_dir + "/linux_raw"
raw_fuchsia_dir = results_dir + "/fuchsia_raw"
# A list of test targets to deploy to both devices. Stick to *_perftests.
test_targets = [
......
......@@ -10,8 +10,10 @@ from typing import Any, Dict, List, Tuple, Optional
def UnitStringIsValid(unit: str) -> bool:
return (unit == "us/hop" or unit == "us/task" or unit == "ns/sample" or
unit == "ms" or unit == "s" or unit == "count")
accepted_units = [
"us/hop", "us/task", "ns/sample", "ms", "s", "count", "KB", "MB/s", "us"
]
return unit in accepted_units
class ResultLine(object):
......@@ -32,6 +34,10 @@ class ResultLine(object):
}
def ReadResultLineFromJson(dct: Dict[str, Any]):
return ResultLine(dct["description"], float(dct["measurement"]), dct["unit"])
def ResultLineFromStdout(line: str) -> Optional[ResultLine]:
if "pkgsvr" in line:
return None
......@@ -40,12 +46,12 @@ def ResultLineFromStdout(line: str) -> Optional[ResultLine]:
# for the line description, so at least 3 total
if len(chunks) < 3:
logging.warning("The line {} contains too few space-separated pieces to be "
"parsed as a ResultLine".format(line))
"parsed as a ResultLine".format(line))
return None
unit = chunks[-1]
if not UnitStringIsValid(unit):
logging.warning("The unit string parsed from {} was {}, which was not "
"expected".format(line, unit))
"expected".format(line, unit))
return None
try:
measure = float(chunks[-2])
......@@ -53,7 +59,7 @@ def ResultLineFromStdout(line: str) -> Optional[ResultLine]:
return ResultLine(desc, measure, unit)
except ValueError as e:
logging.warning("The chunk {} could not be parsed as a valid measurement "
"because of {}".format(chunks[-2], str(e)))
"because of {}".format(chunks[-2], str(e)))
return None
......@@ -75,27 +81,33 @@ class TestResult(object):
}
def ExtractCaseInfo(line: str) -> Tuple[str, float, str]:
def ReadTestFromJson(obj_dict: Dict[str, Any]) -> TestResult:
name = obj_dict["name"]
time = obj_dict["time_in_ms"]
lines = [ReadResultLineFromJson(line) for line in obj_dict["lines"]]
return TestResult(name, time, lines)
def ExtractTestInfo(line: str) -> Tuple[str, float]:
# Trim off the [ OK ] part of the line
trimmed = line.lstrip("[ OK ]").strip()
try:
test_name, rest = trimmed.split("(") # Isolate the measurement
except Exception as e:
err_text = "Could not extract the case name from {} because of error {}"\
.format(
rest, str(e))
.format(trimmed, str(e))
raise Exception(err_text)
try:
measure, units = rest.split(")")[0].split()
measure, _ = rest.split(")", 1)[0].split()
except Exception as e:
err_text = "Could not extract measure and units from {}\
because of error {}".format(rest, str(e))
raise Exception(err_text)
return test_name.strip(), float(measure), units.strip()
return test_name.strip(), float(measure)
def TaggedTestFromLines(lines: List[str]) -> TestResult:
test_name, time, units = ExtractCaseInfo(lines[-1])
test_name, time = ExtractTestInfo(lines[-1])
res_lines = []
for line in lines[:-1]:
res_line = ResultLineFromStdout(line)
......@@ -113,15 +125,13 @@ class TargetResult(object):
run.
"""
def __init__(self, name: str, time: float, tests: List[TestResult]) -> None:
def __init__(self, name: str, tests: List[TestResult]) -> None:
self.name = name
self.time = time
self.tests = tests
def ToJsonDict(self) -> Dict[str, Any]:
return {
"name": self.name,
"time_in_ms": self.time,
"tests": [test.ToJsonDict() for test in self.tests]
}
......@@ -130,6 +140,13 @@ class TargetResult(object):
json.dump(self.ToJsonDict(), outfile, indent=2)
def ReadTargetFromJson(path: str):
with open(path, "r") as json_file:
dct = json.load(json_file)
return TargetResult(
dct["name"], [ReadTestFromJson(test_dct) for test_dct in dct["tests"]])
def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult:
"""TargetResultFromStdout attempts to associate GTest names to the lines of
output that they produce. Example input looks something like the following:
......@@ -166,7 +183,4 @@ def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult:
test_cases = [
TaggedTestFromLines(test_lines) for test_lines in test_line_lists
]
target_time = 0 # type: float
for test in test_cases:
target_time += test.time
return TargetResult(name, target_time, test_cases)
return TargetResult(name, test_cases)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment