Re-land: Add multi results in legacy perf dashboard script.

Currently when the script reads multiple results it overrites the current result with last value read. It does not compute a mean or standard deviation. This change updates the script to keep a list of values. It then computes a mean and standard deviation from the list. The first attempt at fixing this returned a list of values. This was incorrect. Also includes a comment an assert to prevent this. Bug: 900677 Change-Id: Ided17ea36478128e003e6d9317ca5fb15128415a Reviewed-on: https://chromium-review.googlesource.com/c/1341211 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Ned Nguyen <nednguyen@google.com> Cr-Commit-Position: refs/heads/master@{#609342}

Re-land: Add multi results in legacy perf dashboard script.
Currently when the script reads multiple results it overrites the current result with last value read. It does not compute a mean or standard deviation. This change updates the script to keep a list of values. It then computes a mean and standard deviation from the list. The first attempt at fixing this returned a list of values. This was incorrect. Also includes a comment an assert to prevent this. Bug: 900677 Change-Id: Ided17ea36478128e003e6d9317ca5fb15128415a Reviewed-on: https://chromium-review.googlesource.com/c/1341211 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Ned Nguyen <nednguyen@google.com> Cr-Commit-Position: refs/heads/master@{#609342}
ce4bc3cb · Jamie Madill · Commit Bot · d5279079 · ce4bc3cb · ce4bc3cb
Commit ce4bc3cb authored Nov 19, 2018 by Jamie Madill Committed by Commit Bot Nov 19, 2018
3 changed files
--- a/tools/perf/generate_legacy_perf_dashboard_json.py
+++ b/tools/perf/generate_legacy_perf_dashboard_json.py
@@ -73,11 +73,12 @@ class LegacyResultsProcessor(object):
    def __init__(self):
      self.important = False
-      self.value = 0.0
+      self.values = []
+      self.mean = 0.0
      self.stddev = 0.0
    def __str__(self):
-      result = _FormatHumanReadable(self.value)
+      result = _FormatHumanReadable(self.mean)
      if self.stddev:
        result += '+/-%s' % _FormatHumanReadable(self.stddev)
      return result
@@ -100,7 +101,7 @@ class LegacyResultsProcessor(object):
      """Returns a dictionary mapping trace names to [value, stddev]."""
      traces_dict = {}
      for name, trace in self.traces.items():
-        traces_dict[name] = [str(trace.value), str(trace.stddev)]
+        traces_dict[name] = [str(trace.mean), str(trace.stddev)]
      return traces_dict
@@ -141,37 +142,43 @@ class LegacyResultsProcessor(object):
    graph = self._graphs.get(graph_name, self.Graph())
    graph.units = (match_dict['UNITS'] or '').strip()
    trace = graph.traces.get(trace_name, self.Trace())
-    trace.value = match_dict['VALUE']
+    value = match_dict['VALUE']
    trace.important = match_dict['IMPORTANT'] or False
    # Compute the mean and standard deviation for a list or a histogram,
    # or the numerical value of a scalar value.
-    if trace.value.startswith('['):
+    if value.startswith('['):
      try:
-        value_list = [float(x) for x in trace.value.strip('[],').split(',')]
+        value_list = [float(x) for x in value.strip('[],').split(',')]
      except ValueError:
        # Report, but ignore, corrupted data lines. (Lines that are so badly
        # broken that they don't even match the RESULTS_REGEX won't be
        # detected.)
-        logging.warning("Bad test output: '%s'" % trace.value.strip())
+        logging.warning("Bad test output: '%s'" % value.strip())
        return
-      trace.value, trace.stddev, filedata = self._CalculateStatistics(
+      trace.values += value_list
-          value_list, trace_name)
+      trace.mean, trace.stddev, filedata = self._CalculateStatistics(
+        trace.values, trace_name)
      assert filedata is not None
      for filename in filedata:
        self._PrependLog(filename, filedata[filename])
-    elif trace.value.startswith('{'):
+    elif value.startswith('{'):
-      stripped = trace.value.strip('{},')
+      stripped = value.strip('{},')
      try:
-        trace.value, trace.stddev = [float(x) for x in stripped.split(',')]
+        trace.mean, trace.stddev = [float(x) for x in stripped.split(',')]
      except ValueError:
-        logging.warning("Bad test output: '%s'" % trace.value.strip())
+        logging.warning("Bad test output: '%s'" % value.strip())
        return
    else:
      try:
-        trace.value = float(trace.value)
+        trace.values.append(float(value))
+        trace.mean, trace.stddev, filedata = self._CalculateStatistics(
+          trace.values, trace_name)
+        assert filedata is not None
+        for filename in filedata:
+          self._PrependLog(filename, filedata[filename])
      except ValueError:
-        logging.warning("Bad test output: '%s'" % trace.value.strip())
+        logging.warning("Bad test output: '%s'" % value.strip())
        return
    graph.traces[trace_name] = trace
@@ -183,8 +190,14 @@ class LegacyResultsProcessor(object):
    """
    charts = {}
    for graph_name, graph in self._graphs.iteritems():
+      traces = graph.BuildTracesDict()
+      # Traces should contain exactly two elements: [mean, stddev].
+      for _, trace in traces.iteritems():
+        assert len(trace) == 2
      graph_dict = collections.OrderedDict([
-        ('traces', graph.BuildTracesDict()),
+        ('traces', traces),
        ('units', str(graph.units)),
      ])

--- a/tools/perf/testdata/artificial_graph-summary.dat
+++ b/tools/perf/testdata/artificial_graph-summary.dat
-{"traces": {"trace_with_one_sample": ["177.0", "0.0"], "trace_with_one_sample_comma": ["177.0", "0.0"], "trace_with_three_samples": ["140.0", "43.2049379894"], "trace_with_three_samples_comma": ["140.0", "43.2049379894"]}, "units": "you-nits"}
+{"units": "you-nits", "traces": {"trace_with_three_samples": ["140.0", "43.2049379894"], "trace_with_multiple_samples": ["250.0", "111.803398875"], "trace_with_one_sample_comma": ["177.0", "0.0"], "trace_with_one_sample": ["177.0", "0.0"], "trace_with_three_samples_comma": ["140.0", "43.2049379894"]}}
\ No newline at end of file
--- a/tools/perf/testdata/graphing_processor.log
+++ b/tools/perf/testdata/graphing_processor.log
@@ -47,3 +47,10 @@ RESULT artificial_graph: trace_with_one_sample= [177.0] you-nits
 RESULT artificial_graph: trace_with_one_sample_comma= [177.0,] you-nits
 RESULT artificial_graph: trace_with_three_samples= [100.0,120.0,200.0] you-nits
 RESULT artificial_graph: trace_with_three_samples_comma= [100.0,120.0,200.0,] you-nits
+# Artificial log output to test multiple results with the same name
+RESULT artificial_graph: trace_with_multiple_samples= 100 you-nits
+RESULT artificial_graph: trace_with_multiple_samples= 200 you-nits
+RESULT artificial_graph: trace_with_multiple_samples= 300 you-nits
+RESULT artificial_graph: trace_with_multiple_samples= 400 you-nits