Commit 37cb4d23 authored by Stephan Stross's avatar Stephan Stross Committed by Commit Bot

Added flag support to comparative_tester.py

Bug: 839491

Change-Id: I9682eef59b6d1ec2157108a72976dc1b868575f9
Reviewed-on: https://chromium-review.googlesource.com/1110853
Commit-Queue: Stephan Stross <stephanstross@google.com>
Reviewed-by: default avatarSergey Ulanov <sergeyu@chromium.org>
Cr-Commit-Position: refs/heads/master@{#570118}
parent 25013557
...@@ -42,7 +42,9 @@ def AddCommonArgs(arg_parser): ...@@ -42,7 +42,9 @@ def AddCommonArgs(arg_parser):
common_args.add_argument('--ssh-config', '-F', common_args.add_argument('--ssh-config', '-F',
help='The path to the SSH configuration used for ' help='The path to the SSH configuration used for '
'connecting to the target device.') 'connecting to the target device.')
common_args.add_argument('--include-system-logs', default=True, type=bool, common_args.add_argument('--exclude-system-logs',
action='store_false',
dest='include_system_logs',
help='Do not show system log data.') help='Do not show system log data.')
common_args.add_argument('--verbose', '-v', default=False, common_args.add_argument('--verbose', '-v', default=False,
action='store_true', action='store_true',
......
...@@ -2,3 +2,8 @@ ...@@ -2,3 +2,8 @@
-MessageLoopPerfTest.PostTaskRate/1_Posting_Thread -MessageLoopPerfTest.PostTaskRate/1_Posting_Thread
# crbug.com/848499 # crbug.com/848499
-WaitableEventPerfTest.Throughput -WaitableEventPerfTest.Throughput
# Can cause hangs on SSH connection to test box
# https://fuchsia.atlassian.net/browse/NET-1010
-TaskObserverPerfTest.TaskPingPong
-JSONPerfTest.StressTest
\ No newline at end of file
# These two are disabled until crbug.com/851083 is fixed # These two are disabled until crbug.com/851083 is fixed
-DiskCachePerfTest.CacheBackendPerformance -DiskCachePerfTest.CacheBackendPerformance
-DiskCachePerfTest.SimpleCacheBackendPerformance -DiskCachePerfTest.SimpleCacheBackendPerformance
# Flaky test blocked until crbug.com/852937 is fixed
-URLRequestQuicPerfTest.TestGetRequest
# Too long-running for repeat execution
-SimpleIndexPerfTest.EvictionPerformance
\ No newline at end of file
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
# Fuchsia devices and then compares their output to each other, extracting the # Fuchsia devices and then compares their output to each other, extracting the
# relevant performance data from the output of gtest. # relevant performance data from the output of gtest.
import argparse
import logging
import os import os
import re import re
import subprocess import subprocess
...@@ -76,10 +78,9 @@ class TestTarget(object): ...@@ -76,10 +78,9 @@ class TestTarget(object):
def ExecFuchsia(self, out_dir: str, run_locally: bool) -> str: def ExecFuchsia(self, out_dir: str, run_locally: bool) -> str:
runner_name = "{}/bin/run_{}".format(out_dir, self._name) runner_name = "{}/bin/run_{}".format(out_dir, self._name)
command = [runner_name, self._filter_flag, "--include-system-logs=False"] command = [runner_name, self._filter_flag, "--exclude-system-logs"]
if not run_locally: if not run_locally:
command.append("-d") command.append("-d")
command.append(self._filter_flag)
return RunCommand(command, return RunCommand(command,
"Test {} failed on fuchsia!".format(self._target)) "Test {} failed on fuchsia!".format(self._target))
...@@ -166,11 +167,14 @@ def RunTest(target: TestTarget, run_locally: bool = False) -> None: ...@@ -166,11 +167,14 @@ def RunTest(target: TestTarget, run_locally: bool = False) -> None:
print("Wrote result files") print("Wrote result files")
def RunGnForDirectory(dir_name: str, target_os: str) -> None: def RunGnForDirectory(dir_name: str, target_os: str, is_debug: bool) -> None:
if not os.path.exists(dir_name): if not os.path.exists(dir_name):
os.makedirs(dir_name) os.makedirs(dir_name)
debug_str = str(is_debug).lower()
with open("{}/{}".format(dir_name, "args.gn"), "w") as args_file: with open("{}/{}".format(dir_name, "args.gn"), "w") as args_file:
args_file.write("is_debug = false\n") args_file.write("is_debug = {}\n".format(debug_str))
args_file.write("dcheck_always_on = false\n") args_file.write("dcheck_always_on = false\n")
args_file.write("is_component_build = false\n") args_file.write("is_component_build = false\n")
args_file.write("use_goma = true\n") args_file.write("use_goma = true\n")
...@@ -179,7 +183,8 @@ def RunGnForDirectory(dir_name: str, target_os: str) -> None: ...@@ -179,7 +183,8 @@ def RunGnForDirectory(dir_name: str, target_os: str) -> None:
subprocess.run(["gn", "gen", dir_name]).check_returncode() subprocess.run(["gn", "gen", dir_name]).check_returncode()
def GenerateTestData(runs: int): def GenerateTestData(do_config: bool, do_build: bool, num_reps: int,
is_debug: bool):
DIR_SOURCE_ROOT = os.path.abspath( DIR_SOURCE_ROOT = os.path.abspath(
os.path.join(os.path.dirname(__file__), *([os.pardir] * 3))) os.path.join(os.path.dirname(__file__), *([os.pardir] * 3)))
os.chdir(DIR_SOURCE_ROOT) os.chdir(DIR_SOURCE_ROOT)
...@@ -192,12 +197,17 @@ def GenerateTestData(runs: int): ...@@ -192,12 +197,17 @@ def GenerateTestData(runs: int):
test_input = [] # type: List[TestTarget] test_input = [] # type: List[TestTarget]
for target in target_spec.test_targets: for target in target_spec.test_targets:
test_input.append(TestTarget(target)) test_input.append(TestTarget(target))
print("Test targets collected:\n{}".format("\n".join( print("Test targets collected:\n{}".format(",".join(
[test._target for test in test_input]))) [test._target for test in test_input])))
if do_config:
RunGnForDirectory(linux_dir, "linux") RunGnForDirectory(linux_dir, "linux", is_debug)
RunGnForDirectory(fuchsia_dir, "fuchsia") RunGnForDirectory(fuchsia_dir, "fuchsia", is_debug)
print("Ran GN")
elif is_debug:
logging.warning("The --is_debug flag is ignored unless --do_config is also \
specified")
if do_build:
# Build test targets in both output directories. # Build test targets in both output directories.
for directory in [linux_dir, fuchsia_dir]: for directory in [linux_dir, fuchsia_dir]:
build_command = ["autoninja", "-C", directory] \ build_command = ["autoninja", "-C", directory] \
...@@ -207,7 +217,7 @@ def GenerateTestData(runs: int): ...@@ -207,7 +217,7 @@ def GenerateTestData(runs: int):
print("Builds completed.") print("Builds completed.")
# Execute the tests, one at a time, per system, and collect their results. # Execute the tests, one at a time, per system, and collect their results.
for i in range(0, runs): for i in range(0, num_reps):
print("Running Test set {}".format(i)) print("Running Test set {}".format(i))
for test_target in test_input: for test_target in test_input:
print("Running Target {}".format(test_target._name)) print("Running Target {}".format(test_target._name))
...@@ -218,7 +228,29 @@ def GenerateTestData(runs: int): ...@@ -218,7 +228,29 @@ def GenerateTestData(runs: int):
def main() -> int: def main() -> int:
GenerateTestData(1) cmd_flags = argparse.ArgumentParser(
description="Execute tests repeatedly and collect performance data.")
cmd_flags.add_argument(
"--do-config",
action="store_true",
help="WARNING: This flag over-writes args.gn in the directories "
"configured. GN is executed before running the tests.")
cmd_flags.add_argument(
"--do-build",
action="store_true",
help="Build the tests before running them.")
cmd_flags.add_argument(
"--is-debug",
action="store_true",
help="This config-and-build cycle is a debug build")
cmd_flags.add_argument(
"--num-repetitions",
type=int,
default=1,
help="The number of times to execute each test target.")
cmd_flags.parse_args()
GenerateTestData(cmd_flags.do_config, cmd_flags.do_build,
cmd_flags.num_repetitions, cmd_flags.is_debug)
return 0 return 0
......
#!/usr/bin/env python3
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""generate_perf_report.py is to be used after comparative_tester.py has been
executed and written some test data into the location specified by
target_spec.py. It writes to results_dir and reads all present test info from
raw_data_dir. Using this script should just be a matter of invoking it from
chromium/src while raw test data exists in raw_data_dir."""
import json
import logging
import math
import os
import sys
from typing import List, Dict, Set, Tuple, Optional, Any, TypeVar, Callable
import target_spec
from test_results import (TargetResult, ReadTargetFromJson, TestResult,
ResultLine)
class LineStats(object):
def __init__(self, desc: str, unit: str, time_avg: float, time_dev: float,
cv: float, samples: int) -> None:
self.desc = desc
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.unit = unit
self.sample_num = samples
def ToString(self) -> str:
if self.sample_num > 1:
return "{}: {:.5f} σ={:.5f} {} with n={} cv={}".format(
self.desc, self.time_avg, self.time_dev, self.unit, self.sample_num,
self.cv)
else:
return "{}: {:.5f} with only one sample".format(self.desc, self.time_avg)
def LineFromList(lines: List[ResultLine]) -> Optional[LineStats]:
desc = lines[0].desc
unit = lines[0].unit
times = [line.meas for line in lines]
avg, dev, cv = GenStats(times)
return LineStats(desc, unit, avg, dev, cv, len(lines))
class TestStats(object):
def __init__(self, name: str, time_avg: float, time_dev: float, cv: float,
samples: int, lines: List[LineStats]) -> None:
self.name = name
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.sample_num = samples
self.lines = lines
def ToLines(self) -> List[str]:
lines = []
if self.sample_num > 1:
lines.append("{}: {:.5f} σ={:.5f}ms with n={} cv={}".format(
self.name, self.time_avg, self.time_dev, self.sample_num, self.cv))
else:
lines.append("{}: {:.5f} with only one sample".format(
self.name, self.time_avg))
for line in self.lines:
lines.append(" {}".format(line.ToString()))
return lines
def TestFromList(tests: List[TestResult]) -> TestStats:
name = tests[0].name
avg, dev, cv = GenStats([test.time for test in tests])
lines = {} # type: Dict[str, List[ResultLine]]
for test in tests:
assert test.name == name
for line in test.lines:
if not line.desc in lines:
lines[line.desc] = [line]
else:
lines[line.desc].append(line)
test_lines = []
for _, line_list in lines.items():
stat_line = LineFromList(line_list)
if stat_line:
test_lines.append(stat_line)
return TestStats(name, avg, dev, cv, len(tests), test_lines)
class TargetStats(object):
def __init__(self, name: str, samples: int, tests: List[TestStats]) -> None:
self.name = name
self.sample_num = samples
self.tests = tests
def ToLines(self) -> List[str]:
lines = []
if self.sample_num > 1:
lines.append("{}: ".format(self.name))
else:
lines.append("{}: with only one sample".format(self.name))
for test in self.tests:
for line in test.ToLines():
lines.append(" {}".format(line))
return lines
def __format__(self, format_spec):
return "\n".join(self.ToLines())
def TargetFromList(results: List[TargetResult]) -> TargetStats:
name = results[0].name
sample_num = len(results)
tests = {} # type: Dict[str, List[TestResult]]
for result in results:
assert result.name == name
# This groups tests by name so that they can be considered independently,
# so that in the event tests flake out, their average times can
# still be accurately calculated
for test in result.tests:
if not test.name in tests.keys():
tests[test.name] = [test]
tests[test.name].append(test)
test_stats = [TestFromList(test_list) for _, test_list in tests.items()]
return TargetStats(name, sample_num, test_stats)
def GenStats(corpus: List[float]) -> Tuple[float, float, float]:
avg = sum(corpus) / len(corpus)
adjusted_sum = 0.0
for item in corpus:
adjusted = item - avg
adjusted_sum += adjusted * adjusted
dev = math.sqrt(adjusted_sum / len(corpus))
cv = dev / avg
return avg, dev, cv
def DirectoryStats(directory: str) -> List[TargetStats]:
resultMap = {} # type: Dict[str, List[TargetResult]]
for file in os.listdir(directory):
results = ReadTargetFromJson("{}/{}".format(directory, file))
if not results.name in resultMap.keys():
resultMap[results.name] = [results]
else:
resultMap[results.name].append(results)
targets = []
for _, resultList in resultMap.items():
targets.append(TargetFromList(resultList))
return targets
def CompareTargets(linux: TargetStats, fuchsia: TargetStats) -> Dict[str, Any]:
"""Compare takes a corpus of statistics from both Fuchsia and Linux, and then
lines up the values, compares them to each other, and writes them into a
dictionary that can be JSONified.
"""
assert linux.name == fuchsia.name
paired_tests = ZipListsByPredicate(linux.tests, fuchsia.tests,
lambda test: test.name)
paired_tests = MapDictValues(paired_tests, CompareTests)
return {"name": linux.name, "tests": paired_tests}
def CompareTests(linux: TestStats, fuchsia: TestStats) -> Dict[str, Any]:
assert linux != None or fuchsia != None
if linux != None and fuchsia != None:
assert linux.name == fuchsia.name
paired_lines = ZipListsByPredicate(linux.lines, fuchsia.lines,
lambda line: line.desc)
paired_lines = MapDictValues(paired_lines, CompareLines)
result = {"lines": paired_lines, "unit": "ms"} # type: Dict[str, Any]
if linux:
result["name"] = linux.name
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
if fuchsia == None:
logging.warning("Fuchsia is missing test case {}".format(linux.name))
else:
result["name"] = fuchsia.name
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
def CompareLines(linux: LineStats, fuchsia: LineStats) -> Dict[str, Any]:
"""CompareLines wraps two LineStats objects up as a JSON-dumpable dict, with
missing values written as -1 (which specifically doesn't make sense for time
elapsed measurements). It also logs a warning every time a line is given which
can't be matched up. If both lines passed are None, or their units or
descriptions are not the same(which should never happen) this function fails.
"""
assert linux != None or fuchsia != None
result = {} # type: Dict[str, Any]
if linux != None and fuchsia != None:
assert linux.desc == fuchsia.desc
assert linux.unit == fuchsia.unit
if linux:
result["desc"] = linux.desc
result["unit"] = linux.unit
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
if fuchsia == None:
logging.warning("Fuchsia is missing test line {}".format(linux.desc))
else:
result["desc"] = fuchsia.desc
result["unit"] = fuchsia.unit
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
return result
T = TypeVar("T")
R = TypeVar("R")
def ZipListsByPredicate(left: List[T], right: List[T],
pred: Callable[[T], R]) -> Dict[R, Tuple[T, T]]:
"""This function takes two lists, and a predicate. The predicate is applied to
the values in both lists to obtain a keying value from them. Each item is then
inserted into the returned dictionary using the obtained key. Finally, after
all items have been added to the dict, any items that do not have a pair are
discarded after warning the user, and the new dictionary is returned. The
predicate should not map multiple values from one list to the same key.
"""
paired_items = {} # type: Dict [R, Tuple[T, T]]
for item in left:
key = pred(item)
# the first list shouldn't cause any key collisions
assert key not in paired_items.keys()
paired_items[key] = item, None
for item in right:
key = pred(item)
if key in paired_items.keys():
# elem 1 of the tuple is always None if the key exists in the map
prev, _ = paired_items[key]
paired_items[key] = prev, item
else:
paired_items[key] = None, item
return paired_items
U = TypeVar("U")
V = TypeVar("V")
def MapDictValues(dct: Dict[T, Tuple[R, U]],
predicate: Callable[[R, U], V]) -> Dict[T, V]:
"""This function applies the predicate to all the values in the dictionary,
returning a new dictionary with the new values.
"""
out_dict = {}
for key, val in dct.items():
out_dict[key] = predicate(*val)
return out_dict
def main():
linux_avgs = DirectoryStats(target_spec.raw_linux_dir)
fuchsia_avgs = DirectoryStats(target_spec.raw_fuchsia_dir)
paired_targets = ZipListsByPredicate(linux_avgs, fuchsia_avgs,
lambda target: target.name)
for name, targets in paired_targets.items():
comparison_dict = CompareTargets(*targets)
with open("{}/{}.json".format(target_spec.results_dir, name),
"w") as outfile:
json.dump(comparison_dict, outfile, indent=2)
if __name__ == "__main__":
sys.exit(main())
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
from typing import Dict
# Fields for use when working with a physical linux device connected locally # Fields for use when working with a physical linux device connected locally
linux_device_ip = "192.168.42.32" linux_device_ip = "192.168.42.32"
linux_device_user = "potat" linux_device_user = "potat"
...@@ -18,8 +16,8 @@ fuchsia_out_dir = "out/fuchsia" ...@@ -18,8 +16,8 @@ fuchsia_out_dir = "out/fuchsia"
results_dir = "results" results_dir = "results"
# The location in src that stores the information from each comparative # The location in src that stores the information from each comparative
# invocation of a perftest # invocation of a perftest
raw_linux_dir = "results/linux_raw" raw_linux_dir = results_dir + "/linux_raw"
raw_fuchsia_dir = "results/fuchsia_raw" raw_fuchsia_dir = results_dir + "/fuchsia_raw"
# A list of test targets to deploy to both devices. Stick to *_perftests. # A list of test targets to deploy to both devices. Stick to *_perftests.
test_targets = [ test_targets = [
......
...@@ -10,8 +10,10 @@ from typing import Any, Dict, List, Tuple, Optional ...@@ -10,8 +10,10 @@ from typing import Any, Dict, List, Tuple, Optional
def UnitStringIsValid(unit: str) -> bool: def UnitStringIsValid(unit: str) -> bool:
return (unit == "us/hop" or unit == "us/task" or unit == "ns/sample" or accepted_units = [
unit == "ms" or unit == "s" or unit == "count") "us/hop", "us/task", "ns/sample", "ms", "s", "count", "KB", "MB/s", "us"
]
return unit in accepted_units
class ResultLine(object): class ResultLine(object):
...@@ -32,6 +34,10 @@ class ResultLine(object): ...@@ -32,6 +34,10 @@ class ResultLine(object):
} }
def ReadResultLineFromJson(dct: Dict[str, Any]):
return ResultLine(dct["description"], float(dct["measurement"]), dct["unit"])
def ResultLineFromStdout(line: str) -> Optional[ResultLine]: def ResultLineFromStdout(line: str) -> Optional[ResultLine]:
if "pkgsvr" in line: if "pkgsvr" in line:
return None return None
...@@ -75,27 +81,33 @@ class TestResult(object): ...@@ -75,27 +81,33 @@ class TestResult(object):
} }
def ExtractCaseInfo(line: str) -> Tuple[str, float, str]: def ReadTestFromJson(obj_dict: Dict[str, Any]) -> TestResult:
name = obj_dict["name"]
time = obj_dict["time_in_ms"]
lines = [ReadResultLineFromJson(line) for line in obj_dict["lines"]]
return TestResult(name, time, lines)
def ExtractTestInfo(line: str) -> Tuple[str, float]:
# Trim off the [ OK ] part of the line # Trim off the [ OK ] part of the line
trimmed = line.lstrip("[ OK ]").strip() trimmed = line.lstrip("[ OK ]").strip()
try: try:
test_name, rest = trimmed.split("(") # Isolate the measurement test_name, rest = trimmed.split("(") # Isolate the measurement
except Exception as e: except Exception as e:
err_text = "Could not extract the case name from {} because of error {}"\ err_text = "Could not extract the case name from {} because of error {}"\
.format( .format(trimmed, str(e))
rest, str(e))
raise Exception(err_text) raise Exception(err_text)
try: try:
measure, units = rest.split(")")[0].split() measure, _ = rest.split(")", 1)[0].split()
except Exception as e: except Exception as e:
err_text = "Could not extract measure and units from {}\ err_text = "Could not extract measure and units from {}\
because of error {}".format(rest, str(e)) because of error {}".format(rest, str(e))
raise Exception(err_text) raise Exception(err_text)
return test_name.strip(), float(measure), units.strip() return test_name.strip(), float(measure)
def TaggedTestFromLines(lines: List[str]) -> TestResult: def TaggedTestFromLines(lines: List[str]) -> TestResult:
test_name, time, units = ExtractCaseInfo(lines[-1]) test_name, time = ExtractTestInfo(lines[-1])
res_lines = [] res_lines = []
for line in lines[:-1]: for line in lines[:-1]:
res_line = ResultLineFromStdout(line) res_line = ResultLineFromStdout(line)
...@@ -113,15 +125,13 @@ class TargetResult(object): ...@@ -113,15 +125,13 @@ class TargetResult(object):
run. run.
""" """
def __init__(self, name: str, time: float, tests: List[TestResult]) -> None: def __init__(self, name: str, tests: List[TestResult]) -> None:
self.name = name self.name = name
self.time = time
self.tests = tests self.tests = tests
def ToJsonDict(self) -> Dict[str, Any]: def ToJsonDict(self) -> Dict[str, Any]:
return { return {
"name": self.name, "name": self.name,
"time_in_ms": self.time,
"tests": [test.ToJsonDict() for test in self.tests] "tests": [test.ToJsonDict() for test in self.tests]
} }
...@@ -130,6 +140,13 @@ class TargetResult(object): ...@@ -130,6 +140,13 @@ class TargetResult(object):
json.dump(self.ToJsonDict(), outfile, indent=2) json.dump(self.ToJsonDict(), outfile, indent=2)
def ReadTargetFromJson(path: str):
with open(path, "r") as json_file:
dct = json.load(json_file)
return TargetResult(
dct["name"], [ReadTestFromJson(test_dct) for test_dct in dct["tests"]])
def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult: def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult:
"""TargetResultFromStdout attempts to associate GTest names to the lines of """TargetResultFromStdout attempts to associate GTest names to the lines of
output that they produce. Example input looks something like the following: output that they produce. Example input looks something like the following:
...@@ -166,7 +183,4 @@ def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult: ...@@ -166,7 +183,4 @@ def TargetResultFromStdout(lines: List[str], name: str) -> TargetResult:
test_cases = [ test_cases = [
TaggedTestFromLines(test_lines) for test_lines in test_line_lists TaggedTestFromLines(test_lines) for test_lines in test_line_lists
] ]
target_time = 0 # type: float return TargetResult(name, test_cases)
for test in test_cases:
target_time += test.time
return TargetResult(name, target_time, test_cases)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment