Commit 3b36d9fa authored by Mikhail Khokhlov's avatar Mikhail Khokhlov Committed by Commit Bot

[tools/perf] Implement metric computation in Results Processor

So far TBMv2 metrics have been computed on the Telemetry side and passed
to Results Processor as a special artifact. This CL implements metric
computation in RP. Metrics will be computed if this artifact is absent.

Bug: 981349
Change-Id: I46803784bd75269e578e951f5dd2e5bfaa5cae68
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1821527
Commit-Queue: Mikhail Khokhlov <khokhlov@google.com>
Reviewed-by: default avatarJuan Antonio Navarro Pérez <perezju@chromium.org>
Cr-Commit-Position: refs/heads/master@{#701662}
parent ac0be37f
......@@ -3,7 +3,20 @@
# found in the LICENSE file.
import json
import logging
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool
import os
import time
from tracing.metrics import metric_runner
# Aggregated trace is saved under this name.
HTML_TRACE_NAME = 'trace.html'
# Results of metric computation are stored under this key in test_results.
HISTOGRAM_DICTS_KEY = 'histogram_dicts'
# This file is written by telemetry, it contains output of metric computation.
# This is a temporary hack to keep things working while we gradually move
......@@ -11,6 +24,44 @@ import json
HISTOGRAM_DICTS_FILE = 'histogram_dicts.json'
def _PoolWorker(test_result):
try:
metrics = [tag['value'] for tag in test_result['tags']
if tag['key'] == 'tbmv2']
html_local_path = test_result['artifacts'][HTML_TRACE_NAME]['filePath']
html_remote_url = test_result['artifacts'][HTML_TRACE_NAME]['remoteUrl']
logging.info('%s: Starting to compute metrics on trace.',
test_result['testPath'])
start = time.time()
# The timeout needs to be coordinated with the Swarming IO timeout for the
# task that runs this code. If this timeout is longer or close in length
# to the swarming IO timeout then we risk being forcibly killed for not
# producing any output. Note that this could be fixed by periodically
# outputting logs while waiting for metrics to be calculated.
TEN_MINUTES = 60 * 10
mre_result = metric_runner.RunMetricOnSingleTrace(
html_local_path, metrics, canonical_url=html_remote_url,
timeout=TEN_MINUTES,
extra_import_options={'trackDetailedModelStats': True})
logging.info('%s: Computing metrics took %.3f seconds.' % (
test_result['testPath'], time.time() - start))
if mre_result.failures:
for f in mre_result.failures:
logging.error('Failure recorded for test %s: %s',
test_result['testPath'], f)
return mre_result.pairs.get('histograms', [])
except Exception: # pylint: disable=broad-except
# logging exception here is the only way to get a stack trace since
# multiprocessing's pool implementation does not save that data. See
# crbug.com/953365.
logging.exception('%s: Exception while calculating metric' %
test_result['testPath'])
raise
def ComputeTBMv2Metrics(intermediate_results):
"""Compute metrics on aggregated traces in parallel.
......@@ -19,13 +70,53 @@ def ComputeTBMv2Metrics(intermediate_results):
in the corresponding test result.
"""
histogram_dicts = []
work_list = []
for test_result in intermediate_results['testResults']:
artifacts = test_result.get('artifacts', {})
# For now, metrics are computed in telemetry.
# TODO(crbug.com/981349): Replace it with actual metrics computation.
assert HISTOGRAM_DICTS_FILE in artifacts
with open(artifacts[HISTOGRAM_DICTS_FILE]['filePath']) as f:
histogram_dicts += json.load(f)
# TODO(crbug.com/981349): If metrics have already been computed in
# Telemetry, we read it from the file. Remove this branch after Telemetry
# does not compute metrics anymore.
if HISTOGRAM_DICTS_FILE in artifacts:
with open(artifacts[HISTOGRAM_DICTS_FILE]['filePath']) as f:
histogram_dicts += json.load(f)
del artifacts[HISTOGRAM_DICTS_FILE]
continue
return histogram_dicts
if (HTML_TRACE_NAME not in artifacts or
not any(tag['key'] == 'tbmv2' for tag in test_result.get('tags', []))):
continue
trace_size_in_mib = (os.path.getsize(artifacts[HTML_TRACE_NAME]['filePath'])
/ (2 ** 20))
# Bails out on traces that are too big. See crbug.com/812631 for more
# details.
# TODO(crbug.com/1010041): Return a non-zero exit code in this case.
if trace_size_in_mib > 400:
logging.error('%s: Trace size is too big: %s MiB',
test_result['testPath'], trace_size_in_mib)
continue
work_list.append(test_result)
if not work_list:
return histogram_dicts
try:
# Note that this is speculatively halved as an attempt to fix
# crbug.com/953365.
cpu_count = multiprocessing.cpu_count() / 2
except NotImplementedError:
# Some platforms can raise a NotImplementedError from cpu_count()
logging.warning('cpu_count() not implemented.')
cpu_count = 4
pool = ThreadPool(min(cpu_count, len(work_list)))
try:
for dicts in pool.imap_unordered(_PoolWorker, work_list):
histogram_dicts += dicts
pool.close()
pool.join()
finally:
pool.terminate()
return histogram_dicts
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
from core.results_processor import compute_metrics
from core.results_processor import testing
from tracing.mre import mre_result
from tracing.value import histogram
import mock
RUN_METRICS_METHOD = 'tracing.metrics.metric_runner.RunMetricOnSingleTrace'
GETSIZE_METHOD = 'os.path.getsize'
class ComputeMetricsTest(unittest.TestCase):
def testComputeTBMv2Metrics(self):
in_results = testing.IntermediateResults([
testing.TestResult(
'benchmark/story1',
artifacts={
compute_metrics.HTML_TRACE_NAME:
testing.Artifact('/trace1.html', 'gs://trace1.html')},
tags=['tbmv2:metric1'],
),
testing.TestResult(
'benchmark/story2',
artifacts={
compute_metrics.HTML_TRACE_NAME:
testing.Artifact('/trace2.html', 'gs://trace2.html')},
tags=['tbmv2:metric2'],
),
])
test_dict = histogram.Histogram('a', 'unitless').AsDict()
metrics_result = mre_result.MreResult()
metrics_result.AddPair('histograms', [test_dict])
with mock.patch(GETSIZE_METHOD) as getsize_mock:
with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
getsize_mock.return_value = 1000
run_metrics_mock.return_value = metrics_result
histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
self.assertEqual(histogram_dicts, [test_dict, test_dict])
def testComputeTBMv2MetricsTraceTooBig(self):
in_results = testing.IntermediateResults([
testing.TestResult(
'benchmark/story1',
artifacts={
compute_metrics.HTML_TRACE_NAME:
testing.Artifact('/trace1.html', 'gs://trace1.html')},
tags=['tbmv2:metric1'],
),
])
with mock.patch(GETSIZE_METHOD) as getsize_mock:
with mock.patch(RUN_METRICS_METHOD) as run_metrics_mock:
getsize_mock.return_value = 1e9
histogram_dicts = compute_metrics.ComputeTBMv2Metrics(in_results)
run_metrics_mock.assert_not_called()
self.assertEqual(histogram_dicts, [])
......@@ -19,8 +19,6 @@ from tracing.value.diagnostics import generic_set
from tracing.value.diagnostics import reserved_infos
from tracing.value import histogram_set
HTML_TRACE_NAME = 'trace.html'
TELEMETRY_RESULTS = '_telemetry_results.jsonl'
FORMATS_WITH_METRICS = ['csv', 'histograms', 'html']
......@@ -85,7 +83,7 @@ def _AggregateTraces(intermediate_results):
# For now, the html trace is generated by Telemetry, so it should be there
# already. All we need to do is remove individual traces from the dict.
# TODO(crbug.com/981349): replace this with actual aggregation code.
assert HTML_TRACE_NAME in artifacts
assert compute_metrics.HTML_TRACE_NAME in artifacts
for trace in traces:
del artifacts[trace]
......
......@@ -232,6 +232,42 @@ class ResultsProcessorIntegrationTests(unittest.TestCase):
self.assertIn(['label1'], diag_values)
self.assertIn(['label2'], diag_values)
def testHistogramsOutputNoMetricsFromTelemetry(self):
trace_file = os.path.join(self.output_dir, compute_metrics.HTML_TRACE_NAME)
with open(trace_file, 'w') as f:
pass
self.SerializeIntermediateResults(
test_results=[
testing.TestResult(
'benchmark/story',
artifacts={
compute_metrics.HTML_TRACE_NAME:
testing.Artifact(trace_file, 'gs://trace.html')},
tags=['tbmv2:sampleMetric'],
),
],
)
processor.main([
'--output-format', 'histograms',
'--output-dir', self.output_dir,
'--intermediate-dir', self.intermediate_dir,
])
with open(os.path.join(
self.output_dir, histograms_output.OUTPUT_FILENAME)) as f:
results = json.load(f)
out_histograms = histogram_set.HistogramSet()
out_histograms.ImportDicts(results)
self.assertEqual(len(out_histograms), 4)
self.assertIsNotNone(out_histograms.GetHistogramNamed('foo'))
diag_values = [list(v) for v in out_histograms.shared_diagnostics]
self.assertEqual(len(diag_values), 1)
self.assertIn(['gs://trace.html'], diag_values)
def testHtmlOutput(self):
hist_file = os.path.join(self.output_dir,
compute_metrics.HISTOGRAM_DICTS_FILE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment