[benchmarking] Simplified logic on getting timing list for resharding.

Updated the logic to generate timing list. Bug: chromium:1130157 Change-Id: I285a6712ade0018bae4e1af23081827124b146ad Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2468291Reviewed-by: John Chen <johnchen@chromium.org> Commit-Queue: Wenbin Zhang <wenbinzhang@google.com> Cr-Commit-Position: refs/heads/master@{#818827}

[benchmarking] Simplified logic on getting timing list for resharding.
Updated the logic to generate timing list. Bug: chromium:1130157 Change-Id: I285a6712ade0018bae4e1af23081827124b146ad Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2468291Reviewed-by: John Chen <johnchen@chromium.org> Commit-Queue: Wenbin Zhang <wenbinzhang@google.com> Cr-Commit-Position: refs/heads/master@{#818827}
d3d32e3e · Wenbin Zhang · Commit Bot · 0c493ed2 · d3d32e3e
Commit d3d32e3e authored Oct 20, 2020 by Wenbin Zhang Committed by Commit Bot Oct 20, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 39 additions and 42 deletions

tools/perf/core/sharding_map_generator.py tools/perf/core/sharding_map_generator.py +39 -42

No files found.
--- a/tools/perf/core/sharding_map_generator.py
+++ b/tools/perf/core/sharding_map_generator.py
@@ -8,18 +8,33 @@ import core.path_util
 core.path_util.AddTelemetryToPath()
+# Initialize the duration of all stories to be shard to 10 seconds.
+# The reasons are:
+# 1) Even if the stories are skipped, they still have non negligible
+#    overhead.
+# 2) For a case of sharding a set of benchmarks with no existing data about
+#    timing, initializing the stories time within a single repeat to 1 leads
+#    to a roughly equal distribution of stories on the shards, whereas
+#    initializing them to zero will make the algorithm put all the stories
+#    into the first shard.
+# 3) For the case  of adding a new benchmark to a builder that hasn't run
+#    it before but has run other benchmarks, 10 seconds is a reasonable
+#    amount of time to guess that it would take the stories to run and
+#    creates reasonably balanced shard maps.
+DEFAULT_STORY_DURATION = 10
 def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug):
  """Generate sharding map.
    Args:
-      benchmarks_to_shard is a list of bot_platforms.BenchmarkConfig and
+      benchmarks_to_shard: a list of bot_platforms.BenchmarkConfig and
      ExecutableConfig objects.
+      timing_data: The timing data in json with 'name' and 'duration'
-      The "stories" field contains a list of ordered story names. Notes that
+      num_shards: the total number of shards
-      this should match the actual order of how the benchmark stories are
+      debug: if true, print out full list of stories of each shard in shard map.
-      executed for the sharding algorithm to be effective.
+    Return:
+      The shard map.
  """
  # Sort the list of benchmarks to be sharded by benchmark's name to make the
  # execution of this algorithm deterministic.
@@ -43,7 +58,6 @@ def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug):
  min_shard_index = None
  max_shard_time = 0
  max_shard_index = None
-  num_stories = len(story_timing_list)
  predicted_shard_timings = []
  # The algorithm below removes all the stories from |story_timing_list| one by
@@ -148,42 +162,25 @@ def _add_benchmarks_to_shard(sharding_map, shard_index, stories_in_shard,
 def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
-  story_timing_dict = {}
+  """Generates a list of story and duration in order.
-  benchmarks_data_by_name = {}
+  Return:
-  for b in benchmarks_to_shard:
+    A list of tuples of (story_name, story_duration), sorted by the order of
-    story_list = b.stories
+    benchmark name + story order within the benchmark.
-    benchmarks_data_by_name[b.name] = b
+  """
-    # Initialize the duration of all stories to be shard to 10 seconds.
+  timing_data_dict = {}
-    # The reasons are:
-    # 1) Even if the stories are skipped, they still have non negligible
-    #    overhead.
-    # 2) For a case of sharding a set of benchmarks with no existing data about
-    #    timing, initializing the stories time within a single repeat to 1 leads
-    #    to a roughly equal distribution of stories on the shards, whereas
-    #    initializing them to zero will make the algorithm put all the stories
-    #    into the first shard.
-    # 3) For the case  of adding a new benchmark to a builder that hasn't run
-    #    it before but has run other benchmarks, 10 seconds is a reasonable
-    #    amount of time to guess that it would take the stories to run and
-    #    creates reasonably balanced shard maps.
-    for story in story_list:
-      story_timing_dict[b.name + '/' + story] = 10
  for run in timing_data:
-    benchmark = run['name'].split('/', 1)[0]
+    if run['duration']:
-    if run['name'] in story_timing_dict:
+      timing_data_dict[run['name']] = float(run['duration'])
-      if run['duration']:
+  timing_data_list = []
-        if repeat:
+  for b in benchmarks_to_shard:
-          story_timing_dict[run['name']] = (float(run['duration'])
+    run_count = b.repeat if repeat else 1
-              * benchmarks_data_by_name[benchmark].repeat)
+    for s in b.stories:
-        else:
+      test_name = '%s/%s' % (b.name, s)
-          story_timing_dict[run['name']] = float(run['duration'])
+      test_duration = DEFAULT_STORY_DURATION
-  story_timing_list = []
+      if test_name in timing_data_dict:
-  for entry in benchmarks_to_shard:
+        test_duration = timing_data_dict[test_name] * run_count
-    benchmark_name = entry.name
+      timing_data_list.append((test_name, test_duration))
-    for story_name in entry.stories:
+  return timing_data_list
-      test_name = '%s/%s' % (benchmark_name, story_name)
-      story_timing_list.append((test_name, story_timing_dict[test_name]))
-  return story_timing_list
 def _generate_empty_sharding_map(num_shards):