Commit 2ad514bb authored by Caleb Rouleau's avatar Caleb Rouleau Committed by Commit Bot

[Perf Waterfall] Assume that stories without known runtimes take 10 seconds to run.

This helps to create balanced shard maps. Specifically for weblayer bot, this will help
us add startup.mobile without unbalancing the shard maps.

Bug: 1026327
Change-Id: I8f5ebceba13cc6f7de8450389ecf59410425e1c3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1949239
Commit-Queue: Caleb Rouleau <crouleau@chromium.org>
Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#721292}
parent 4fa87cf7
...@@ -144,18 +144,21 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat): ...@@ -144,18 +144,21 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
for b in benchmarks_to_shard: for b in benchmarks_to_shard:
story_list = b.stories story_list = b.stories
benchmarks_data_by_name[b.name] = b benchmarks_data_by_name[b.name] = b
# Initialize the duration of all stories to be shard to 1 * repeat. # Initialize the duration of all stories to be shard to 10 seconds.
# The reasons are: # The reasons are:
# 1) Even if the stories are skipped, they still have non neligible # 1) Even if the stories are skipped, they still have non negligible
# overhead. # overhead.
# 2) For a case of sharding a set of benchmarks with no existing data about # 2) For a case of sharding a set of benchmarks with no existing data about
# timing, initializing the stories time within a single repeat to 1 leads # timing, initializing the stories time within a single repeat to 1 leads
# to a roughly equal distribution of stories on the shards, whereas # to a roughly equal distribution of stories on the shards, whereas
# initializing them to zero will make the algorithm put all the stories # initializing them to zero will make the algorithm put all the stories
# into the first shard. # into the first shard.
# 3) For the case of adding a new benchmark to a builder that hasn't run
# it before but has run other benchmarks, 10 seconds is a reasonable
# amount of time to guess that it would take the stories to run and
# creates reasonably balanced shard maps.
for story in story_list: for story in story_list:
story_timing_dict[b.name + '/' + story] = b.repeat story_timing_dict[b.name + '/' + story] = 10
for run in timing_data: for run in timing_data:
benchmark = run['name'].split('/', 1)[0] benchmark = run['name'].split('/', 1)[0]
if run['name'] in story_timing_dict: if run['name'] in story_timing_dict:
......
...@@ -51,35 +51,33 @@ class TestShardingMapGenerator(unittest.TestCase): ...@@ -51,35 +51,33 @@ class TestShardingMapGenerator(unittest.TestCase):
self.assertEqual(results['2']['full_time'], 140) self.assertEqual(results['2']['full_time'], 140)
def testGenerateShardingMapsWithoutStoryTimingData(self): def testGenerateShardingMapsWithoutStoryTimingData(self):
# Two tests benchmarks are to be sharded between 3 machines. The first one # 3 benchmarks are to be sharded between 3 machines. The first one
# has 4 stories, each repeat 2 times. The second one has 4 stories # has 4 stories, each repeat 2 times. The second one has 4 stories
# without repeat. Without any assumption about timing, the best sharding # without repeat. Without any assumption about timing, the best sharding
# is to shard the first 2 stories of 'foo_benchmark' on shard 1, the next # is to put each benchmark on its own device. Repeats do not necessarily
# two stories of 'foo_benchmark' on shard 2, and 'bar_benchmark' entirely on # imply that a story will take longer than another story that is not
# shard 3. # repeated. This is because short stories tend to be repeated, whereas long
# stories tend to not be repeated.
timing_data = [] timing_data = []
benchmarks_data = [ benchmarks_data = [
FakeBenchmarkConfig( FakeBenchmarkConfig(
'foo_benchmark', ['foo_1', 'foo_2', 'foo_3', 'foo_4'], 2), 'a_benchmark', ['a_1', 'a_2', 'a_3', 'a_4'], 2),
FakeBenchmarkConfig( FakeBenchmarkConfig(
'bar_benchmark', ['bar_1', 'bar_2', 'bar_3', 'bar_4'], 1), 'b_benchmark', ['b_1', 'b_2', 'b_3', 'b_4'], 1),
FakeBenchmarkConfig(
'c_benchmark', ['c_1', 'c_2', 'c_3', 'c_4'], 1),
] ]
sharding_map = sharding_map_generator.generate_sharding_map( sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_data, timing_data, 3, None) benchmarks_data, timing_data, 3, None)
self.assertEquals( self.assertEquals(
sharding_map['0']['benchmarks'], sharding_map['0']['benchmarks'],
collections.OrderedDict([('bar_benchmark', {'abridged': False})])) collections.OrderedDict([('a_benchmark', {'abridged': False})]))
self.assertEquals( self.assertEquals(
sharding_map['1']['benchmarks'], sharding_map['1']['benchmarks'],
collections.OrderedDict([('foo_benchmark', collections.OrderedDict([('b_benchmark', {'abridged': False})]))
{'end': 2, 'abridged': False})]))
self.assertEquals( self.assertEquals(
sharding_map['2']['benchmarks'], sharding_map['2']['benchmarks'],
collections.OrderedDict([('foo_benchmark', collections.OrderedDict([('c_benchmark', {'abridged': False})]))
{'begin': 2, 'abridged': False})]))
def testGeneratePerfSharding(self): def testGeneratePerfSharding(self):
test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data') test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
...@@ -104,4 +102,4 @@ class TestShardingMapGenerator(unittest.TestCase): ...@@ -104,4 +102,4 @@ class TestShardingMapGenerator(unittest.TestCase):
shards_timing = [] shards_timing = []
for shard in results: for shard in results:
shards_timing.append(results[shard]['full_time']) shards_timing.append(results[shard]['full_time'])
self.assertTrue(max(shards_timing) - min(shards_timing) < 300) self.assertTrue(max(shards_timing) - min(shards_timing) < 600)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment