Added discrepancy and inverse RMS frame time to smoothness benchmark.

Discrepancy is a measure for the largest area of jank in a series of time stamps. Inverse RMS frame time is a generalization of frames/second that penalizes longer frame times. We can switch to the inverse of another generalized mean with a higher exponent if we want to penalize long frame times even more. R=nduca@chromium.org, tonyg@chromium.org, klobag@chromium.org, vangelis@chromium.org BUG=280627, 281544 Review URL: https://chromiumcodereview.appspot.com/23506030 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@221782 0039d316-1c4b-4281-b951-d872f2087c98

Added discrepancy and inverse RMS frame time to smoothness benchmark.
Discrepancy is a measure for the largest area of jank in a series of time stamps. Inverse RMS frame time is a generalization of frames/second that penalizes longer frame times. We can switch to the inverse of another generalized mean with a higher exponent if we want to penalize long frame times even more. R=nduca@chromium.org, tonyg@chromium.org, klobag@chromium.org, vangelis@chromium.org BUG=280627, 281544 Review URL: https://chromiumcodereview.appspot.com/23506030 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@221782 0039d316-1c4b-4281-b951-d872f2087c98
23998ad7 · ernstm@chromium.org · 9f282b48 · 23998ad7 · 23998ad7 · 23998ad7
Commit 23998ad7 authored Sep 06, 2013 by ernstm@chromium.org
6 changed files
--- a/tools/perf/measurements/smoothness.py
+++ b/tools/perf/measurements/smoothness.py
@@ -37,7 +37,7 @@ class Smoothness(page_measurement.PageMeasurement):
  def WillRunAction(self, page, tab, action):
    # TODO(ernstm): remove 'webkit' category when
    # https://codereview.chromium.org/23848006/ has landed.
-    tab.browser.StartTracing('webkit,webkit.console,cc,benchmark', 60)
+    tab.browser.StartTracing('webkit,webkit.console,benchmark', 60)
    if tab.browser.platform.IsRawDisplayFrameRateSupported():
      tab.browser.platform.StartRawDisplayFrameRateMeasurement()
    self._metrics = smoothness.SmoothnessMetrics(tab)

--- a/tools/perf/metrics/discrepancy.py
+++ b/tools/perf/metrics/discrepancy.py
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import bisect
+import math
+def Clamp(value, low=0.0, high=1.0):
+  return min(max(value, low), high)
+def NormalizeSamples(samples):
+  ''' Sort the N samples, and map them linearly to the range [0,1] such that the
+      first sample is 0.5/N and the last sample is (N-0.5)/N. Background: the
+      discrepancy of the sample set i/(N-1); i=0,...,N-1 is 2/N, twice the
+      discrepancy of the sample set (i+1/2)/N; i=0,...,N-1. In our case we
+      don't want to distinguish between these two cases, as our original domain
+      is not bounded (it is for Monte Carlo integration, where discrepancy was
+      first used).
+  '''
+  samples = sorted(samples)
+  low = min(samples)
+  high = max(samples)
+  new_low = 0.5 / len(samples)
+  new_high = (len(samples)-0.5) / len(samples)
+  scale = (new_high - new_low) / (high - low)
+  for i in xrange(0, len(samples)):
+    samples[i] = float(samples[i] - low) * scale + new_low
+  return samples, scale
+def Discrepancy(samples, interval_multiplier = 10000):
+  ''' Compute the discrepancy of a set of 1D samples from the unit interval
+      [0,1]. The samples must be sorted.
+      http://en.wikipedia.org/wiki/Low-discrepancy_sequence
+      http://mathworld.wolfram.com/Discrepancy.html
+  '''
+  if (len(samples) < 3):
+    return 0
+  max_local_discrepancy = 0
+  locations = []
+  # For each location, stores the number of samples less than that location.
+  left = []
+  # For each location, stores the number of samples less than or equal to that
+  # location.
+  right = []
+  interval_count = len(samples) * interval_multiplier
+  # Compute number of locations the will roughly result in the requested number
+  # of intervals.
+  location_count = int(math.ceil(math.sqrt(interval_count*2)))
+  inv_sample_count = 1.0 / len(samples)
+  # Generate list of equally spaced locations.
+  for i in xrange(0, location_count):
+    location = float(i) / (location_count-1)
+    locations.append(location)
+    left.append(bisect.bisect_left(samples, location))
+    right.append(bisect.bisect_right(samples, location))
+  # Iterate over the intervals defined by any pair of locations.
+  for i in xrange(0, len(locations)):
+    for j in xrange(i, len(locations)):
+      # Compute length of interval and number of samples in the interval.
+      length = locations[j] - locations[i]
+      count = right[j] - left[i]
+      # Compute local discrepancy and update max_local_discrepancy.
+      local_discrepancy = abs(float(count)*inv_sample_count - length)
+      max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)
+  return max_local_discrepancy
+def FrameDiscrepancy(frame_timestamps, absolute = True,
+                     interval_multiplier = 10000):
+  ''' A discrepancy based metric for measuring jank.
+      FrameDiscrepancy quantifies the largest area of jank observed in a series
+      of timestamps.  Note that this is different form metrics based on the
+      max_frame_time. For example, the time stamp series A = [0,1,2,3,5,6] and
+      B = [0,1,2,3,5,7] have the same max_frame_time = 2, but
+      Discrepancy(B) > Discrepancy(A).
+      Two variants of discrepancy can be computed:
+      Relative discrepancy is following the original definition of
+      discrepancy. It characterized the largest area of jank, relative to the
+      duration of the entire time stamp series.  We normalize the raw results,
+      because the best case discrepancy for a set of N samples is 1/N (for
+      equally spaced samples), and we want our metric to report 0.0 in that
+      case.
+      Absolute discrepancy also characterizes the largest area of jank, but its
+      value wouldn't change (except for imprecisions due to a low
+      interval_multiplier) if additional 'good' frames were added to an
+      exisiting list of time stamps.  Its range is [0,inf] and the unit is
+      milliseconds.
+      The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
+      absolute discrepancy, but D has lower relative discrepancy than C.
+  '''
+  samples, sample_scale = NormalizeSamples(frame_timestamps)
+  discrepancy = Discrepancy(samples, interval_multiplier)
+  inv_sample_count = 1.0 / len(samples)
+  if absolute:
+    # Compute absolute discrepancy
+    discrepancy /= sample_scale
+  else:
+    # Compute relative discrepancy
+    discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
+  return discrepancy
--- a/tools/perf/metrics/discrepancy_unittest.py
+++ b/tools/perf/metrics/discrepancy_unittest.py
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import unittest
+import random
+from metrics import discrepancy
+def Relax(samples, iterations=10):
+  ''' Lloyd relaxation in 1D. Keeps the position of the first and last
+      sample.
+  '''
+  for _ in xrange(0, iterations):
+    voronoi_boundaries = []
+    for i in xrange(1, len(samples)):
+      voronoi_boundaries.append((samples[i] + samples[i-1]) * 0.5)
+    relaxed_samples = []
+    relaxed_samples.append(samples[0])
+    for i in xrange(1, len(samples)-1):
+      relaxed_samples.append(
+          (voronoi_boundaries[i-1] + voronoi_boundaries[i]) * 0.5)
+    relaxed_samples.append(samples[-1])
+    samples = relaxed_samples
+  return samples
+class DiscrepancyUnitTest(unittest.TestCase):
+  def testRandom(self):
+    ''' Generates 10 sets of 10 random samples, computes the discrepancy,
+        relaxes the samples using Llloyd's algorithm in 1D, and computes the
+        discrepancy of the relaxed samples. Discrepancy of the relaxed samples
+        must be less than or equal to the discrepancy of the original samples.
+    '''
+    random.seed(1234567)
+    for _ in xrange(0, 10):
+      samples = []
+      num_samples = 10
+      clock = 0.0
+      samples.append(clock)
+      for _ in xrange(1, num_samples):
+        clock += random.random()
+        samples.append(clock)
+      samples = discrepancy.NormalizeSamples(samples)[0]
+      d = discrepancy.Discrepancy(samples)
+      relaxed_samples = Relax(samples)
+      d_relaxed = discrepancy.Discrepancy(relaxed_samples)
+      self.assertLessEqual(d_relaxed, d)
+  def testAnalytic(self):
+    ''' Computes discrepancy for sample sets with known discrepancy. '''
+    interval_multiplier = 100000
+    samples = [1.0/8.0, 3.0/8.0, 5.0/8.0, 7.0/8.0]
+    d = discrepancy.Discrepancy(samples, interval_multiplier)
+    self.assertAlmostEquals(round(d, 2), 0.25)
+    samples = [0.0, 1.0/3.0, 2.0/3.0, 1.0]
+    d = discrepancy.Discrepancy(samples, interval_multiplier)
+    self.assertAlmostEquals(round(d, 2), 0.5)
+    samples = discrepancy.NormalizeSamples(samples)[0]
+    d = discrepancy.Discrepancy(samples, interval_multiplier)
+    self.assertAlmostEquals(round(d, 2), 0.25)
+    time_stamps_a = [0, 1, 2, 3, 5, 6]
+    time_stamps_b = [0, 1, 2, 3, 5, 7]
+    time_stamps_c = [0, 2, 3, 4]
+    time_stamps_d = [0, 2, 3, 4, 5]
+    d_abs_a = discrepancy.FrameDiscrepancy(time_stamps_a, True,
+                                           interval_multiplier)
+    d_abs_b = discrepancy.FrameDiscrepancy(time_stamps_b, True,
+                                           interval_multiplier)
+    d_abs_c = discrepancy.FrameDiscrepancy(time_stamps_c, True,
+                                           interval_multiplier)
+    d_abs_d = discrepancy.FrameDiscrepancy(time_stamps_d, True,
+                                           interval_multiplier)
+    d_rel_a = discrepancy.FrameDiscrepancy(time_stamps_a, False,
+                                           interval_multiplier)
+    d_rel_b = discrepancy.FrameDiscrepancy(time_stamps_b, False,
+                                           interval_multiplier)
+    d_rel_c = discrepancy.FrameDiscrepancy(time_stamps_c, False,
+                                           interval_multiplier)
+    d_rel_d = discrepancy.FrameDiscrepancy(time_stamps_d, False,
+                                           interval_multiplier)
+    self.assertLess(d_abs_a, d_abs_b)
+    self.assertLess(d_rel_a, d_rel_b)
+    self.assertLess(d_rel_d, d_rel_c)
+    self.assertEquals(round(d_abs_d, 2), round(d_abs_c, 2))
--- a/tools/perf/metrics/gpu_rendering_stats.py
+++ b/tools/perf/metrics/gpu_rendering_stats.py
@@ -23,6 +23,7 @@ class GpuRenderingStats(object):
    self.total_time = (self.end - self.start) / 1000.0
    self.animation_frame_count = []
    self.screen_frame_count = []
+    self.screen_frame_timestamps = []
    self.paint_time = []
    self.record_time = []
    self.commit_time = []
@@ -90,10 +91,15 @@ class GpuRenderingStats(object):
      if event.start >= self.start and event.end <= self.end:
        if 'data' not in event.args:
          continue
+        if event.args['data']['screen_frame_count'] > 1:
+          raise ValueError, 'trace contains multi-frame render stats'
        self.animation_frame_count.append(
            event.args['data']['animation_frame_count'])
        self.screen_frame_count.append(
            event.args['data']['screen_frame_count'])
+        if event.args['data']['screen_frame_count'] == 1:
+          self.screen_frame_timestamps.append(
+              event.start)
        self.paint_time.append(
            event.args['data']['paint_time'])
        self.record_time.append(
@@ -117,8 +123,13 @@ class GpuRenderingStats(object):
      if event.start >= self.start and event.end <= self.end:
        if 'data' not in event.args:
          continue
+        if event.args['data']['screen_frame_count'] > 1:
+          raise ValueError, 'trace contains multi-frame render stats'
        self.screen_frame_count.append(
            event.args['data']['screen_frame_count'])
+        if event.args['data']['screen_frame_count'] == 1:
+          self.screen_frame_timestamps.append(
+              event.start)
        self.dropped_frame_count.append(
            event.args['data']['dropped_frame_count'])
        self.rasterize_time.append(

--- a/tools/perf/metrics/smoothness.py
+++ b/tools/perf/metrics/smoothness.py
@@ -4,6 +4,7 @@
 import os
 from telemetry.core import util
+from metrics import discrepancy
 TIMELINE_MARKER = 'smoothness_scroll'
@@ -88,6 +89,32 @@ def Average(numerator, denominator, scale = None, precision = None):
    avg = round(avg, precision)
  return avg
+def DivideIfPossibleOrZero(numerator, denominator):
+  if not denominator:
+    return 0.0
+  else:
+    return numerator / denominator
+def GeneralizedMean(values, exponent):
+  ''' http://en.wikipedia.org/wiki/Generalized_mean '''
+  if not values:
+    return 0.0
+  sum_of_powers = 0.0
+  for v in values:
+    sum_of_powers += v ** exponent
+  return (sum_of_powers / len(values)) ** (1.0/exponent)
+def Median(values):
+  if not values:
+    return 0.0
+  values.sort()
+  n = len(values)
+  if n % 2:
+    median = values[n/2]
+  else:
+    median = 0.5 * (values[n/2] + values[n/2 - 1])
+  return median
 def CalcFirstPaintTimeResults(results, tab):
  if tab.browser.is_content_shell:
    results.Add('first_paint', 'ms', 'unsupported')
@@ -110,9 +137,25 @@ def CalcFirstPaintTimeResults(results, tab):
 def CalcResults(benchmark_stats, results):
  s = benchmark_stats
+  frame_times = []
+  for i in xrange(1, len(s.screen_frame_timestamps)):
+    frame_times.append(
+        s.screen_frame_timestamps[i] - s.screen_frame_timestamps[i-1])
  # Scroll Results
  results.Add('mean_frame_time', 'ms',
              Average(s.total_time, s.screen_frame_count, 1000, 3))
+  # Absolute discrepancy of frame time stamps (experimental)
+  results.Add('experimental_jank', '',
+              round(discrepancy.FrameDiscrepancy(s.screen_frame_timestamps,
+                                                 True), 4))
+  # Generalized mean frame time with exponent=2 (experimental)
+  results.Add('experimental_mean_frame_time', '',
+              round(GeneralizedMean(frame_times, 2.0), 2))
+  # Median frame time (experimental)
+  results.Add('experimental_median_frame_time', '',
+              round(Median(frame_times), 2))
  results.Add('dropped_percent', '%',
              Average(s.dropped_frame_count, s.screen_frame_count,
                      100, 1),

--- a/tools/perf/metrics/smoothness_unittest.py
+++ b/tools/perf/metrics/smoothness_unittest.py
@@ -4,6 +4,7 @@
 import unittest
 import random
+from metrics import discrepancy
 from metrics import smoothness
 from metrics.gpu_rendering_stats import GpuRenderingStats
 from telemetry.page import page
@@ -238,6 +239,16 @@ class SmoothnessMetricsUnitTest(unittest.TestCase):
        round(rs['totalTimeInSeconds'] / rs['numFramesSentToScreen'] * 1000.0,
              3),
        res.page_results[0]['mean_frame_time'].value, 2)
+    # We don't verify the correctness of the discrepancy computation
+    # itself, because we have a separate unit test for that purpose.
+    self.assertEquals(
+        round(discrepancy.FrameDiscrepancy(stats.screen_frame_timestamps,
+                                           True), 4),
+        res.page_results[0]['absolute_frame_discrepancy'].value)
+    self.assertEquals(
+        round(discrepancy.FrameDiscrepancy(stats.screen_frame_timestamps,
+                                           False), 4),
+        res.page_results[0]['relative_frame_discrepancy'].value)
    self.assertAlmostEquals(
        round(rs['droppedFrameCount'] / rs['numFramesSentToScreen'] * 100.0, 1),
        res.page_results[0]['dropped_percent'].value)