[Perf Waterfall] Assume that stories without known runtimes take 10 seconds to run.

This helps to create balanced shard maps. Specifically for weblayer bot, this will help us add startup.mobile without unbalancing the shard maps. Bug: 1026327 Change-Id: I8f5ebceba13cc6f7de8450389ecf59410425e1c3 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1949239 Commit-Queue: Caleb Rouleau <crouleau@chromium.org> Reviewed-by: John Chen <johnchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#721292}

[Perf Waterfall] Assume that stories without known runtimes take 10 seconds to run.
This helps to create balanced shard maps. Specifically for weblayer bot, this will help us add startup.mobile without unbalancing the shard maps. Bug: 1026327 Change-Id: I8f5ebceba13cc6f7de8450389ecf59410425e1c3 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1949239 Commit-Queue: Caleb Rouleau <crouleau@chromium.org> Reviewed-by: John Chen <johnchen@chromium.org> Cr-Commit-Position: refs/heads/master@{#721292}
2ad514bb · Caleb Rouleau · Commit Bot · 4fa87cf7 · 2ad514bb · 2ad514bb
Commit 2ad514bb authored Dec 04, 2019 by Caleb Rouleau Committed by Commit Bot Dec 04, 2019
Showing with 20 additions and 19 deletions

tools/perf/core/sharding_map_generator.py tools/perf/core/sharding_map_generator.py +7 -4

tools/perf/core/sharding_map_generator_unittest.py tools/perf/core/sharding_map_generator_unittest.py +13 -15

No files found.
--- a/tools/perf/core/sharding_map_generator.py
+++ b/tools/perf/core/sharding_map_generator.py
@@ -144,18 +144,21 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
  for b in benchmarks_to_shard:
    story_list = b.stories
    benchmarks_data_by_name[b.name] = b
-    # Initialize the duration of all stories to be shard to 1 * repeat.
+    # Initialize the duration of all stories to be shard to 10 seconds.
    # The reasons are:
-    # 1) Even if the stories are skipped, they still have non neligible
+    # 1) Even if the stories are skipped, they still have non negligible
    #    overhead.
    # 2) For a case of sharding a set of benchmarks with no existing data about
    #    timing, initializing the stories time within a single repeat to 1 leads
    #    to a roughly equal distribution of stories on the shards, whereas
    #    initializing them to zero will make the algorithm put all the stories
    #    into the first shard.
+    # 3) For the case  of adding a new benchmark to a builder that hasn't run
+    #    it before but has run other benchmarks, 10 seconds is a reasonable
+    #    amount of time to guess that it would take the stories to run and
+    #    creates reasonably balanced shard maps.
    for story in story_list:
-      story_timing_dict[b.name + '/' + story] = b.repeat
+      story_timing_dict[b.name + '/' + story] = 10
  for run in timing_data:
    benchmark = run['name'].split('/', 1)[0]
    if run['name'] in story_timing_dict:

--- a/tools/perf/core/sharding_map_generator_unittest.py
+++ b/tools/perf/core/sharding_map_generator_unittest.py
@@ -51,35 +51,33 @@ class TestShardingMapGenerator(unittest.TestCase):
    self.assertEqual(results['2']['full_time'], 140)
  def testGenerateShardingMapsWithoutStoryTimingData(self):
-    # Two tests benchmarks are to be sharded between 3 machines. The first one
+    # 3 benchmarks are to be sharded between 3 machines. The first one
    # has 4 stories, each repeat 2 times. The second one has 4 stories
    # without repeat. Without any assumption about timing, the best sharding
-    # is to shard the first 2 stories of 'foo_benchmark' on shard 1, the next
+    # is to put each benchmark on its own device. Repeats do not necessarily
-    # two stories of 'foo_benchmark' on shard 2, and 'bar_benchmark' entirely on
+    # imply that a story will take longer than another story that is not
-    # shard 3.
+    # repeated. This is because short stories tend to be repeated, whereas long
+    # stories tend to not be repeated.
    timing_data = []
    benchmarks_data = [
        FakeBenchmarkConfig(
-            'foo_benchmark', ['foo_1', 'foo_2', 'foo_3', 'foo_4'], 2),
+            'a_benchmark', ['a_1', 'a_2', 'a_3', 'a_4'], 2),
        FakeBenchmarkConfig(
-            'bar_benchmark', ['bar_1', 'bar_2', 'bar_3', 'bar_4'], 1),
+            'b_benchmark', ['b_1', 'b_2', 'b_3', 'b_4'], 1),
+        FakeBenchmarkConfig(
+            'c_benchmark', ['c_1', 'c_2', 'c_3', 'c_4'], 1),
    ]
    sharding_map = sharding_map_generator.generate_sharding_map(
        benchmarks_data, timing_data, 3, None)
    self.assertEquals(
      sharding_map['0']['benchmarks'],
-      collections.OrderedDict([('bar_benchmark', {'abridged': False})]))
+      collections.OrderedDict([('a_benchmark', {'abridged': False})]))
    self.assertEquals(
      sharding_map['1']['benchmarks'],
-      collections.OrderedDict([('foo_benchmark',
+      collections.OrderedDict([('b_benchmark', {'abridged': False})]))
-                                {'end': 2, 'abridged': False})]))
    self.assertEquals(
      sharding_map['2']['benchmarks'],
-      collections.OrderedDict([('foo_benchmark',
+      collections.OrderedDict([('c_benchmark', {'abridged': False})]))
-                                {'begin': 2, 'abridged': False})]))
  def testGeneratePerfSharding(self):
    test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
@@ -104,4 +102,4 @@ class TestShardingMapGenerator(unittest.TestCase):
    shards_timing = []
    for shard in results:
      shards_timing.append(results[shard]['full_time'])
-    self.assertTrue(max(shards_timing) - min(shards_timing) < 300)
+    self.assertTrue(max(shards_timing) - min(shards_timing) < 600)