Commit 2ad514bb authored by Caleb Rouleau's avatar Caleb Rouleau Committed by Commit Bot

[Perf Waterfall] Assume that stories without known runtimes take 10 seconds to run.

This helps to create balanced shard maps. Specifically for weblayer bot, this will help
us add startup.mobile without unbalancing the shard maps.

Bug: 1026327
Change-Id: I8f5ebceba13cc6f7de8450389ecf59410425e1c3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1949239
Commit-Queue: Caleb Rouleau <crouleau@chromium.org>
Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#721292}
parent 4fa87cf7
......@@ -144,18 +144,21 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
for b in benchmarks_to_shard:
story_list = b.stories
benchmarks_data_by_name[b.name] = b
# Initialize the duration of all stories to be shard to 1 * repeat.
# Initialize the duration of all stories to be shard to 10 seconds.
# The reasons are:
# 1) Even if the stories are skipped, they still have non neligible
# 1) Even if the stories are skipped, they still have non negligible
# overhead.
# 2) For a case of sharding a set of benchmarks with no existing data about
# timing, initializing the stories time within a single repeat to 1 leads
# to a roughly equal distribution of stories on the shards, whereas
# initializing them to zero will make the algorithm put all the stories
# into the first shard.
# 3) For the case of adding a new benchmark to a builder that hasn't run
# it before but has run other benchmarks, 10 seconds is a reasonable
# amount of time to guess that it would take the stories to run and
# creates reasonably balanced shard maps.
for story in story_list:
story_timing_dict[b.name + '/' + story] = b.repeat
story_timing_dict[b.name + '/' + story] = 10
for run in timing_data:
benchmark = run['name'].split('/', 1)[0]
if run['name'] in story_timing_dict:
......
......@@ -51,35 +51,33 @@ class TestShardingMapGenerator(unittest.TestCase):
self.assertEqual(results['2']['full_time'], 140)
def testGenerateShardingMapsWithoutStoryTimingData(self):
# Two tests benchmarks are to be sharded between 3 machines. The first one
# 3 benchmarks are to be sharded between 3 machines. The first one
# has 4 stories, each repeat 2 times. The second one has 4 stories
# without repeat. Without any assumption about timing, the best sharding
# is to shard the first 2 stories of 'foo_benchmark' on shard 1, the next
# two stories of 'foo_benchmark' on shard 2, and 'bar_benchmark' entirely on
# shard 3.
# is to put each benchmark on its own device. Repeats do not necessarily
# imply that a story will take longer than another story that is not
# repeated. This is because short stories tend to be repeated, whereas long
# stories tend to not be repeated.
timing_data = []
benchmarks_data = [
FakeBenchmarkConfig(
'foo_benchmark', ['foo_1', 'foo_2', 'foo_3', 'foo_4'], 2),
'a_benchmark', ['a_1', 'a_2', 'a_3', 'a_4'], 2),
FakeBenchmarkConfig(
'bar_benchmark', ['bar_1', 'bar_2', 'bar_3', 'bar_4'], 1),
'b_benchmark', ['b_1', 'b_2', 'b_3', 'b_4'], 1),
FakeBenchmarkConfig(
'c_benchmark', ['c_1', 'c_2', 'c_3', 'c_4'], 1),
]
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_data, timing_data, 3, None)
self.assertEquals(
sharding_map['0']['benchmarks'],
collections.OrderedDict([('bar_benchmark', {'abridged': False})]))
collections.OrderedDict([('a_benchmark', {'abridged': False})]))
self.assertEquals(
sharding_map['1']['benchmarks'],
collections.OrderedDict([('foo_benchmark',
{'end': 2, 'abridged': False})]))
collections.OrderedDict([('b_benchmark', {'abridged': False})]))
self.assertEquals(
sharding_map['2']['benchmarks'],
collections.OrderedDict([('foo_benchmark',
{'begin': 2, 'abridged': False})]))
collections.OrderedDict([('c_benchmark', {'abridged': False})]))
def testGeneratePerfSharding(self):
test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
......@@ -104,4 +102,4 @@ class TestShardingMapGenerator(unittest.TestCase):
shards_timing = []
for shard in results:
shards_timing.append(results[shard]['full_time'])
self.assertTrue(max(shards_timing) - min(shards_timing) < 300)
self.assertTrue(max(shards_timing) - min(shards_timing) < 600)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment