Commit 928ffadc authored by gabadie's avatar gabadie Committed by Commit bot

sandwich: Make metrics extraction more customizable.

BUG=582080

Review-Url: https://codereview.chromium.org/2009883002
Cr-Commit-Position: refs/heads/master@{#397134}
parent 5f265d3b
...@@ -32,19 +32,9 @@ import sandwich_misc ...@@ -32,19 +32,9 @@ import sandwich_misc
import tracing import tracing
CSV_FIELD_NAMES = [ COMMON_CSV_COLUMN_NAMES = [
'repeat_id',
'url',
'chromium_commit', 'chromium_commit',
'platform', 'platform',
'subresource_discoverer',
'subresource_count',
# The amount of subresources detected at SetupBenchmark step.
'subresource_count_theoretic',
# Amount of subresources for caching as suggested by the subresource
# discoverer.
'cached_subresource_count_theoretic',
'cached_subresource_count',
'first_layout', 'first_layout',
'first_contentful_paint', 'first_contentful_paint',
'total_load', 'total_load',
...@@ -222,30 +212,6 @@ def _ExtractMemoryMetrics(loading_trace): ...@@ -222,30 +212,6 @@ def _ExtractMemoryMetrics(loading_trace):
} }
def _ExtractBenchmarkStatistics(benchmark_setup, loading_trace):
"""Extracts some useful statistics from a benchmark run.
Args:
benchmark_setup: benchmark_setup: dict representing the benchmark setup
JSON. The JSON format is according to:
PrefetchBenchmarkBuilder.PopulateLoadBenchmark.SetupBenchmark.
loading_trace: loading_trace_module.LoadingTrace.
Returns:
Dictionary with all extracted fields set.
"""
return {
'subresource_discoverer': benchmark_setup['subresource_discoverer'],
'subresource_count': len(sandwich_misc.ListUrlRequests(
loading_trace, sandwich_misc.RequestOutcome.All)),
'subresource_count_theoretic': len(benchmark_setup['url_resources']),
'cached_subresource_count': len(sandwich_misc.ListUrlRequests(
loading_trace, sandwich_misc.RequestOutcome.ServedFromCache)),
'cached_subresource_count_theoretic':
len(benchmark_setup['cache_whitelist']),
}
def _ExtractCompletenessRecordFromVideo(video_path): def _ExtractCompletenessRecordFromVideo(video_path):
"""Extracts the completeness record from a video. """Extracts the completeness record from a video.
...@@ -283,7 +249,7 @@ def _ExtractCompletenessRecordFromVideo(video_path): ...@@ -283,7 +249,7 @@ def _ExtractCompletenessRecordFromVideo(video_path):
return [(time, FrameProgress(hist)) for time, hist in histograms] return [(time, FrameProgress(hist)) for time, hist in histograms]
def ComputeSpeedIndex(completeness_record): def _ComputeSpeedIndex(completeness_record):
"""Computes the speed-index from a completeness record. """Computes the speed-index from a completeness record.
Args: Args:
...@@ -305,82 +271,41 @@ def ComputeSpeedIndex(completeness_record): ...@@ -305,82 +271,41 @@ def ComputeSpeedIndex(completeness_record):
return speed_index return speed_index
def _ExtractMetricsFromRunDirectory(benchmark_setup, run_directory_path): def ExtractCommonMetricsFromRepeatDirectory(repeat_dir, trace):
"""Extracts all the metrics from traces and video of a sandwich run. """Extracts all the metrics from traces and video of a sandwich run repeat
directory.
Args: Args:
benchmark_setup: benchmark_setup: dict representing the benchmark setup repeat_dir: Path of the repeat directory within a run directory.
JSON. The JSON format is according to: trace: preloaded LoadingTrace in |repeat_dir|
PrefetchBenchmarkBuilder.PopulateLoadBenchmark.SetupBenchmark.
run_directory_path: Path of the run directory. Contract:
trace == LoadingTrace.FromJsonFile(
os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME))
Returns: Returns:
Dictionary of extracted metrics. Dictionary of extracted metrics.
""" """
trace_path = os.path.join(run_directory_path, 'trace.json')
logging.info('processing trace \'%s\'' % trace_path)
loading_trace = loading_trace_module.LoadingTrace.FromJsonFile(trace_path)
run_metrics = { run_metrics = {
'url': loading_trace.url, 'chromium_commit': trace.metadata['chromium_commit'],
'chromium_commit': loading_trace.metadata['chromium_commit'], 'platform': (trace.metadata['platform']['os'] + '-' +
'platform': (loading_trace.metadata['platform']['os'] + '-' + trace.metadata['platform']['product_model'])
loading_trace.metadata['platform']['product_model'])
} }
run_metrics.update(_ExtractDefaultMetrics(loading_trace)) run_metrics.update(_ExtractDefaultMetrics(trace))
run_metrics.update(_ExtractMemoryMetrics(loading_trace)) run_metrics.update(_ExtractMemoryMetrics(trace))
if benchmark_setup: video_path = os.path.join(repeat_dir, sandwich_runner.VIDEO_FILENAME)
run_metrics.update(
_ExtractBenchmarkStatistics(benchmark_setup, loading_trace))
video_path = os.path.join(run_directory_path, 'video.mp4')
if os.path.isfile(video_path): if os.path.isfile(video_path):
logging.info('processing speed-index video \'%s\'' % video_path) logging.info('processing speed-index video \'%s\'' % video_path)
try: try:
completeness_record = _ExtractCompletenessRecordFromVideo(video_path) completeness_record = _ExtractCompletenessRecordFromVideo(video_path)
run_metrics['speed_index'] = ComputeSpeedIndex(completeness_record) run_metrics['speed_index'] = _ComputeSpeedIndex(completeness_record)
except video.BoundingBoxNotFoundException: except video.BoundingBoxNotFoundException:
# Sometimes the bounding box for the web content area is not present. Skip # Sometimes the bounding box for the web content area is not present. Skip
# calculating Speed Index. # calculating Speed Index.
run_metrics['speed_index'] = _FAILED_CSV_VALUE run_metrics['speed_index'] = _FAILED_CSV_VALUE
else: else:
run_metrics['speed_index'] = _UNAVAILABLE_CSV_VALUE run_metrics['speed_index'] = _UNAVAILABLE_CSV_VALUE
for key, value in loading_trace.metadata['network_emulation'].iteritems(): for key, value in trace.metadata['network_emulation'].iteritems():
run_metrics['net_emul.' + key] = value run_metrics['net_emul.' + key] = value
assert set(run_metrics.keys()) == set(COMMON_CSV_COLUMN_NAMES)
return run_metrics return run_metrics
def ExtractMetricsFromRunnerOutputDirectory(benchmark_setup_path,
output_directory_path):
"""Extracts all the metrics from all the traces of a sandwich runner output
directory.
Args:
benchmark_setup_path: Path of the JSON of the benchmark setup.
output_directory_path: The sandwich runner's output directory to extract the
metrics from.
Returns:
List of dictionaries.
"""
benchmark_setup = None
if benchmark_setup_path:
benchmark_setup = json.load(open(benchmark_setup_path))
assert os.path.isdir(output_directory_path)
metrics = []
for node_name in os.listdir(output_directory_path):
if not os.path.isdir(os.path.join(output_directory_path, node_name)):
continue
try:
repeat_id = int(node_name)
except ValueError:
continue
run_directory_path = os.path.join(output_directory_path, node_name)
run_metrics = _ExtractMetricsFromRunDirectory(
benchmark_setup, run_directory_path)
run_metrics['repeat_id'] = repeat_id
# TODO(gabadie): Make common metrics extraction with benchmark type
# specific CSV column.
# assert set(run_metrics.keys()) == set(CSV_FIELD_NAMES)
metrics.append(run_metrics)
assert len(metrics) > 0, ('Looks like \'{}\' was not a sandwich runner ' +
'output directory.').format(output_directory_path)
return metrics
...@@ -232,7 +232,7 @@ class PageTrackTest(unittest.TestCase): ...@@ -232,7 +232,7 @@ class PageTrackTest(unittest.TestCase):
point(400, 1.0), point(400, 1.0),
] ]
self.assertEqual(120 + 70 * 0.6 + 90 * 0.25, self.assertEqual(120 + 70 * 0.6 + 90 * 0.25,
puller.ComputeSpeedIndex(completness_record)) puller._ComputeSpeedIndex(completness_record))
completness_record = [ completness_record = [
point(70, 0.0), point(70, 0.0),
...@@ -242,7 +242,7 @@ class PageTrackTest(unittest.TestCase): ...@@ -242,7 +242,7 @@ class PageTrackTest(unittest.TestCase):
point(240, 1.0), point(240, 1.0),
] ]
self.assertEqual(80 + 60 * 0.7 + 10 * 0.4 + 20 * 0.1, self.assertEqual(80 + 60 * 0.7 + 10 * 0.4 + 20 * 0.1,
puller.ComputeSpeedIndex(completness_record)) puller._ComputeSpeedIndex(completness_record))
completness_record = [ completness_record = [
point(90, 0.0), point(90, 0.0),
...@@ -251,7 +251,7 @@ class PageTrackTest(unittest.TestCase): ...@@ -251,7 +251,7 @@ class PageTrackTest(unittest.TestCase):
point(230, 1.0), point(230, 1.0),
] ]
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
puller.ComputeSpeedIndex(completness_record) puller._ComputeSpeedIndex(completness_record)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -254,3 +254,24 @@ class SandwichRunner(object): ...@@ -254,3 +254,24 @@ class SandwichRunner(object):
self._PullCacheFromDevice() self._PullCacheFromDevice()
self._chrome_ctl = None self._chrome_ctl = None
def WalkRepeatedRuns(runner_output_dir):
"""Yields unordered (repeat id, path of the repeat directory).
Args:
runner_output_dir: Same as for SandwichRunner.output_dir.
"""
repeated_run_count = 0
for node_name in os.listdir(runner_output_dir):
repeat_dir = os.path.join(runner_output_dir, node_name)
if not os.path.isdir(repeat_dir):
continue
try:
repeat_id = int(node_name)
except ValueError:
continue
yield repeat_id, repeat_dir
repeated_run_count += 1
assert repeated_run_count > 0, ('Error: not a sandwich runner output '
'directory: {}').format(runner_output_dir)
...@@ -101,6 +101,8 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder): ...@@ -101,6 +101,8 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder):
depends on: <transformer_list_name>/{swr,worstcase}-run/ depends on: <transformer_list_name>/{swr,worstcase}-run/
depends on: some tasks saved by PopulateCommonPipelines() depends on: some tasks saved by PopulateCommonPipelines()
""" """
additional_column_names = ['url', 'repeat_id']
task_prefix = os.path.join(transformer_list_name, '') task_prefix = os.path.join(transformer_list_name, '')
if enable_swr: if enable_swr:
task_prefix += 'swr' task_prefix += 'swr'
...@@ -124,15 +126,27 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder): ...@@ -124,15 +126,27 @@ class StaleWhileRevalidateBenchmarkBuilder(task_manager.Builder):
@self.RegisterTask(task_prefix + '-metrics.csv', [RunBenchmark]) @self.RegisterTask(task_prefix + '-metrics.csv', [RunBenchmark])
def ExtractMetrics(): def ExtractMetrics():
trace_metrics_list = \ run_metrics_list = []
sandwich_metrics.ExtractMetricsFromRunnerOutputDirectory( for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
None, RunBenchmark.path) RunBenchmark.path):
trace_metrics_list.sort(key=lambda e: e['repeat_id']) trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
logging.info('processing trace: %s', trace_path)
trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
run_metrics = {
'url': trace.url,
'repeat_id': repeat_id,
}
run_metrics.update(
sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
repeat_dir, trace))
run_metrics_list.append(run_metrics)
run_metrics_list.sort(key=lambda e: e['repeat_id'])
with open(ExtractMetrics.path, 'w') as csv_file: with open(ExtractMetrics.path, 'w') as csv_file:
writer = csv.DictWriter(csv_file, writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
fieldnames=sandwich_metrics.CSV_FIELD_NAMES) sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
writer.writeheader() writer.writeheader()
for trace_metrics in trace_metrics_list: for trace_metrics in run_metrics_list:
writer.writerow(trace_metrics) writer.writerow(trace_metrics)
self._common_builder.default_final_tasks.append(ExtractMetrics) self._common_builder.default_final_tasks.append(ExtractMetrics)
...@@ -5,12 +5,14 @@ ...@@ -5,12 +5,14 @@
import csv import csv
import logging import logging
import json import json
import logging
import os import os
import shutil import shutil
import chrome_cache import chrome_cache
import common_util import common_util
import emulation import emulation
import loading_trace
import sandwich_metrics import sandwich_metrics
import sandwich_misc import sandwich_misc
import sandwich_runner import sandwich_runner
...@@ -186,6 +188,18 @@ class PrefetchBenchmarkBuilder(task_manager.Builder): ...@@ -186,6 +188,18 @@ class PrefetchBenchmarkBuilder(task_manager.Builder):
depends on: common/<subresource_discoverer>-setup.json depends on: common/<subresource_discoverer>-setup.json
depends on: some tasks saved by PopulateCommonPipelines() depends on: some tasks saved by PopulateCommonPipelines()
""" """
additional_column_names = [
'url',
'repeat_id',
'subresource_discoverer',
'subresource_count',
# The amount of subresources detected at SetupBenchmark step.
'subresource_count_theoretic',
# Amount of subresources for caching as suggested by the subresource
# discoverer.
'cached_subresource_count_theoretic',
'cached_subresource_count']
assert subresource_discoverer in sandwich_misc.SUBRESOURCE_DISCOVERERS assert subresource_discoverer in sandwich_misc.SUBRESOURCE_DISCOVERERS
assert 'common' not in sandwich_misc.SUBRESOURCE_DISCOVERERS assert 'common' not in sandwich_misc.SUBRESOURCE_DISCOVERERS
shared_task_prefix = os.path.join('common', subresource_discoverer) shared_task_prefix = os.path.join('common', subresource_discoverer)
...@@ -233,17 +247,42 @@ class PrefetchBenchmarkBuilder(task_manager.Builder): ...@@ -233,17 +247,42 @@ class PrefetchBenchmarkBuilder(task_manager.Builder):
@self.RegisterTask(task_prefix + '-metrics.csv', @self.RegisterTask(task_prefix + '-metrics.csv',
dependencies=[RunBenchmark]) dependencies=[RunBenchmark])
def ExtractMetrics(): def ExtractMetrics():
# TODO(gabadie): Performance improvement: load each trace only once and
# use it for validation and extraction of metrics later.
sandwich_misc.VerifyBenchmarkOutputDirectory( sandwich_misc.VerifyBenchmarkOutputDirectory(
SetupBenchmark.path, RunBenchmark.path) SetupBenchmark.path, RunBenchmark.path)
trace_metrics_list = \
sandwich_metrics.ExtractMetricsFromRunnerOutputDirectory( benchmark_setup = json.load(open(SetupBenchmark.path))
SetupBenchmark.path, RunBenchmark.path) run_metrics_list = []
trace_metrics_list.sort(key=lambda e: e['repeat_id']) for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
RunBenchmark.path):
trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
logging.info('processing trace: %s', trace_path)
trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
run_metrics = {
'url': trace.url,
'repeat_id': repeat_id,
'subresource_discoverer': benchmark_setup['subresource_discoverer'],
'subresource_count': len(sandwich_misc.ListUrlRequests(
trace, sandwich_misc.RequestOutcome.All)),
'subresource_count_theoretic':
len(benchmark_setup['url_resources']),
'cached_subresource_count': len(sandwich_misc.ListUrlRequests(
trace, sandwich_misc.RequestOutcome.ServedFromCache)),
'cached_subresource_count_theoretic':
len(benchmark_setup['cache_whitelist']),
}
run_metrics.update(
sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
repeat_dir, trace))
run_metrics_list.append(run_metrics)
run_metrics_list.sort(key=lambda e: e['repeat_id'])
with open(ExtractMetrics.path, 'w') as csv_file: with open(ExtractMetrics.path, 'w') as csv_file:
writer = csv.DictWriter(csv_file, writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
fieldnames=sandwich_metrics.CSV_FIELD_NAMES) sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
writer.writeheader() writer.writeheader()
for trace_metrics in trace_metrics_list: for trace_metrics in run_metrics_list:
writer.writerow(trace_metrics) writer.writerow(trace_metrics)
self._common_builder.default_final_tasks.append(ExtractMetrics) self._common_builder.default_final_tasks.append(ExtractMetrics)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment