Commit d3d32e3e authored by Wenbin Zhang's avatar Wenbin Zhang Committed by Commit Bot

[benchmarking] Simplified logic on getting timing list for resharding.

Updated the logic to generate timing list.

Bug: chromium:1130157
Change-Id: I285a6712ade0018bae4e1af23081827124b146ad
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2468291Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Commit-Queue: Wenbin Zhang <wenbinzhang@google.com>
Cr-Commit-Position: refs/heads/master@{#818827}
parent 0c493ed2
...@@ -8,18 +8,33 @@ import core.path_util ...@@ -8,18 +8,33 @@ import core.path_util
core.path_util.AddTelemetryToPath() core.path_util.AddTelemetryToPath()
# Initialize the duration of all stories to be shard to 10 seconds.
# The reasons are:
# 1) Even if the stories are skipped, they still have non negligible
# overhead.
# 2) For a case of sharding a set of benchmarks with no existing data about
# timing, initializing the stories time within a single repeat to 1 leads
# to a roughly equal distribution of stories on the shards, whereas
# initializing them to zero will make the algorithm put all the stories
# into the first shard.
# 3) For the case of adding a new benchmark to a builder that hasn't run
# it before but has run other benchmarks, 10 seconds is a reasonable
# amount of time to guess that it would take the stories to run and
# creates reasonably balanced shard maps.
DEFAULT_STORY_DURATION = 10
def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug): def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug):
"""Generate sharding map. """Generate sharding map.
Args: Args:
benchmarks_to_shard is a list of bot_platforms.BenchmarkConfig and benchmarks_to_shard: a list of bot_platforms.BenchmarkConfig and
ExecutableConfig objects. ExecutableConfig objects.
timing_data: The timing data in json with 'name' and 'duration'
The "stories" field contains a list of ordered story names. Notes that num_shards: the total number of shards
this should match the actual order of how the benchmark stories are debug: if true, print out full list of stories of each shard in shard map.
executed for the sharding algorithm to be effective. Return:
The shard map.
""" """
# Sort the list of benchmarks to be sharded by benchmark's name to make the # Sort the list of benchmarks to be sharded by benchmark's name to make the
# execution of this algorithm deterministic. # execution of this algorithm deterministic.
...@@ -43,7 +58,6 @@ def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug): ...@@ -43,7 +58,6 @@ def generate_sharding_map(benchmarks_to_shard, timing_data, num_shards, debug):
min_shard_index = None min_shard_index = None
max_shard_time = 0 max_shard_time = 0
max_shard_index = None max_shard_index = None
num_stories = len(story_timing_list)
predicted_shard_timings = [] predicted_shard_timings = []
# The algorithm below removes all the stories from |story_timing_list| one by # The algorithm below removes all the stories from |story_timing_list| one by
...@@ -148,42 +162,25 @@ def _add_benchmarks_to_shard(sharding_map, shard_index, stories_in_shard, ...@@ -148,42 +162,25 @@ def _add_benchmarks_to_shard(sharding_map, shard_index, stories_in_shard,
def _gather_timing_data(benchmarks_to_shard, timing_data, repeat): def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
story_timing_dict = {} """Generates a list of story and duration in order.
benchmarks_data_by_name = {} Return:
for b in benchmarks_to_shard: A list of tuples of (story_name, story_duration), sorted by the order of
story_list = b.stories benchmark name + story order within the benchmark.
benchmarks_data_by_name[b.name] = b """
# Initialize the duration of all stories to be shard to 10 seconds. timing_data_dict = {}
# The reasons are:
# 1) Even if the stories are skipped, they still have non negligible
# overhead.
# 2) For a case of sharding a set of benchmarks with no existing data about
# timing, initializing the stories time within a single repeat to 1 leads
# to a roughly equal distribution of stories on the shards, whereas
# initializing them to zero will make the algorithm put all the stories
# into the first shard.
# 3) For the case of adding a new benchmark to a builder that hasn't run
# it before but has run other benchmarks, 10 seconds is a reasonable
# amount of time to guess that it would take the stories to run and
# creates reasonably balanced shard maps.
for story in story_list:
story_timing_dict[b.name + '/' + story] = 10
for run in timing_data: for run in timing_data:
benchmark = run['name'].split('/', 1)[0] if run['duration']:
if run['name'] in story_timing_dict: timing_data_dict[run['name']] = float(run['duration'])
if run['duration']: timing_data_list = []
if repeat: for b in benchmarks_to_shard:
story_timing_dict[run['name']] = (float(run['duration']) run_count = b.repeat if repeat else 1
* benchmarks_data_by_name[benchmark].repeat) for s in b.stories:
else: test_name = '%s/%s' % (b.name, s)
story_timing_dict[run['name']] = float(run['duration']) test_duration = DEFAULT_STORY_DURATION
story_timing_list = [] if test_name in timing_data_dict:
for entry in benchmarks_to_shard: test_duration = timing_data_dict[test_name] * run_count
benchmark_name = entry.name timing_data_list.append((test_name, test_duration))
for story_name in entry.stories: return timing_data_list
test_name = '%s/%s' % (benchmark_name, story_name)
story_timing_list.append((test_name, story_timing_dict[test_name]))
return story_timing_list
def _generate_empty_sharding_map(num_shards): def _generate_empty_sharding_map(num_shards):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment