Commit 162b8a75 authored by Caleb Rouleau's avatar Caleb Rouleau Committed by Commit Bot

[Perf Waterfall] Sharding system supports abridging benchmarks.

In order to support abridging benchmarks, switch to use BenchmarkConfig
objects instead of straight dictionaries.

Bug: 965158
Change-Id: I73cd1d3d34531a1e18d1d97debad645d65f80c5e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1935751
Commit-Queue: Caleb Rouleau <crouleau@chromium.org>
Reviewed-by: default avatarJohn Chen <johnchen@chromium.org>
Cr-Commit-Position: refs/heads/master@{#719323}
parent b4366518
......@@ -5,6 +5,9 @@ import os
import urllib
from core import benchmark_finders
from core import benchmark_utils
from telemetry.story import story_filter
_SHARD_MAP_DIR = os.path.join(os.path.dirname(__file__), 'shard_maps')
......@@ -109,11 +112,31 @@ class BenchmarkConfig(object):
"""
self.benchmark = benchmark
self.abridged = abridged
self._stories = None
@property
def name(self):
return self.benchmark.Name()
@property
def repeat(self):
return self.benchmark.options.get('pageset_repeat', 1)
@property
def stories(self):
if self._stories != None:
return self._stories
else:
story_set = benchmark_utils.GetBenchmarkStorySet(self.benchmark())
abridged_story_set_tag = (
story_set.GetAbridgedStorySetTagFilter() if self.abridged else None)
story_filter_obj = story_filter.StoryFilter(
abridged_story_set_tag=abridged_story_set_tag)
stories = story_filter_obj.FilterStories(story_set)
self._stories = [story.name for story in stories]
return self._stories
# Global |benchmarks| is convenient way to keep BenchmarkConfig objects
# unique, which allows us to use set subtraction below.
benchmarks = {b.Name(): {True: BenchmarkConfig(b, abridged=True),
......
......@@ -15,20 +15,7 @@ def generate_sharding_map(
"""Generate sharding map.
Args:
benchmarks_to_shard is a list all benchmarks to be sharded. Its
structure is as follows:
[{
"name": "benchmark_1",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
{
"name": "benchmark_2",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
...
]
benchmarks_to_shard is a list of bot_platforms.BenchmarkConfig objects.
The "stories" field contains a list of ordered story names. Notes that
this should match the actual order of how the benchmark stories are
......@@ -37,14 +24,15 @@ def generate_sharding_map(
"""
# Sort the list of benchmarks to be sharded by benchmark's name to make the
# execution of this algorithm deterministic.
benchmarks_to_shard.sort(key=lambda entry: entry['name'])
benchmarks_to_shard.sort(key=lambda entry: entry.name)
benchmark_name_to_config = {b.name: b for b in benchmarks_to_shard}
story_timing_list = _gather_timing_data(
benchmarks_to_shard, timing_data, True)
all_stories = {}
for b in benchmarks_to_shard:
all_stories[b['name']] = b['stories']
all_stories[b.name] = b.stories
total_time = sum(p[1] for p in story_timing_list)
expected_time_per_shard = total_time/num_shards
......@@ -96,7 +84,8 @@ def generate_sharding_map(
stories_in_shard.append(candidate_story)
debug_map[shard_name][candidate_story] = candidate_story_duration
last_diff = abs(total_time_scheduled - expected_total_time)
_add_benchmarks_to_shard(sharding_map, i, stories_in_shard, all_stories)
_add_benchmarks_to_shard(sharding_map, i, stories_in_shard, all_stories,
benchmark_name_to_config)
else:
break
# Double time_per_shard to account for reference benchmark run.
......@@ -127,7 +116,7 @@ def generate_sharding_map(
def _add_benchmarks_to_shard(sharding_map, shard_index, stories_in_shard,
all_stories):
all_stories, benchmark_name_to_config):
benchmarks = collections.OrderedDict()
for story in stories_in_shard:
(b, story) = story.split('/', 1)
......@@ -145,10 +134,7 @@ def _add_benchmarks_to_shard(sharding_map, shard_index, stories_in_shard,
benchmarks_in_shard[b]['begin'] = first_story
if last_story != len(all_stories[b]):
benchmarks_in_shard[b]['end'] = last_story
# TODO(crbug.com/965158): Currently we unconditionally run the full story
# set. Instead we should allow choosing certain benchmarks to run
# the abridged story set instead.
benchmarks_in_shard[b]['abridged'] = False
benchmarks_in_shard[b]['abridged'] = benchmark_name_to_config[b].abridged
sharding_map[str(shard_index)] = {'benchmarks': benchmarks_in_shard}
......@@ -156,8 +142,8 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
story_timing_dict = {}
benchmarks_data_by_name = {}
for b in benchmarks_to_shard:
story_list = b['stories']
benchmarks_data_by_name[b['name']] = b
story_list = b.stories
benchmarks_data_by_name[b.name] = b
# Initialize the duration of all stories to be shard to 1 * repeat.
# The reasons are:
# 1) Even if the stories are skipped, they still have non neligible
......@@ -168,7 +154,7 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
# initializing them to zero will make the algorithm put all the stories
# into the first shard.
for story in story_list:
story_timing_dict[b['name'] + '/' + story] = b['repeat']
story_timing_dict[b.name + '/' + story] = b.repeat
for run in timing_data:
benchmark = run['name'].split('/', 1)[0]
......@@ -176,13 +162,13 @@ def _gather_timing_data(benchmarks_to_shard, timing_data, repeat):
if run['duration']:
if repeat:
story_timing_dict[run['name']] = (float(run['duration'])
* benchmarks_data_by_name[benchmark]['repeat'])
* benchmarks_data_by_name[benchmark].repeat)
else:
story_timing_dict[run['name']] = float(run['duration'])
story_timing_list = []
for entry in benchmarks_to_shard:
benchmark_name = entry['name']
for story_name in entry['stories']:
benchmark_name = entry.name
for story_name in entry.stories:
test_name = '%s/%s' % (benchmark_name, story_name)
story_timing_list.append((test_name, story_timing_dict[test_name]))
return story_timing_list
......@@ -205,7 +191,7 @@ def test_sharding_map(
results = collections.OrderedDict()
all_stories = {}
for b in benchmarks_to_shard:
all_stories[b['name']] = b['stories']
all_stories[b.name] = b.stories
sharding_map.pop('extra_infos', None)
for shard in sharding_map:
......
......@@ -11,23 +11,27 @@ import unittest
from core import sharding_map_generator
class FakeBenchmarkConfig(object):
def __init__(self, name, stories, repeat):
self.name = name
self.stories = stories
self.repeat = repeat
self.abridged = False
class TestShardingMapGenerator(unittest.TestCase):
def _generate_test_data(self, times):
timing_data = []
benchmarks_data = []
for i, _ in enumerate(times):
b = {
'name': 'benchmark_' + str(i),
'stories': [],
'repeat': 1,
}
b = FakeBenchmarkConfig('benchmark_' + str(i), [], 1)
benchmarks_data.append(b)
story_times = times[i]
for j, _ in enumerate(story_times):
benchmark_name = 'benchmark_' + str(i)
story_name = 'story_' + str(j)
b['stories'].append(story_name)
b.stories.append(story_name)
timing_data.append({
'name': benchmark_name + '/' + story_name,
'duration': story_times[j]
......@@ -55,15 +59,10 @@ class TestShardingMapGenerator(unittest.TestCase):
# shard 3.
timing_data = []
benchmarks_data = [
{ 'name': 'foo_benchmark',
'stories': ['foo_1', 'foo_2', 'foo_3', 'foo_4'],
'repeat': 2
},
{ 'name': 'bar_benchmark',
'stories': ['bar_1', 'bar_2', 'bar_3', 'bar_4'],
'repeat': 1
}
FakeBenchmarkConfig(
'foo_benchmark', ['foo_1', 'foo_2', 'foo_3', 'foo_4'], 2),
FakeBenchmarkConfig(
'bar_benchmark', ['bar_1', 'bar_2', 'bar_3', 'bar_4'], 1),
]
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_data, timing_data, 3, None)
......@@ -85,7 +84,9 @@ class TestShardingMapGenerator(unittest.TestCase):
def testGeneratePerfSharding(self):
test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
with open(os.path.join(test_data_dir, 'benchmarks_to_shard.json')) as f:
benchmarks_to_shard = json.load(f)
raw_configs = json.load(f)
benchmarks_to_shard = [FakeBenchmarkConfig(
c['name'], c['stories'], c['repeat']) for c in raw_configs]
with open(os.path.join(test_data_dir, 'test_timing_data.json')) as f:
timing_data = json.load(f)
......
......@@ -104,38 +104,6 @@ def _DumpJson(data, output_path):
json.dump(data, output_file, indent=4, separators=(',', ': '))
def _GenerateBenchmarksToShardsList(benchmarks):
"""Return |benchmarks_to_shard| from given list of |benchmarks|.
benchmarks_to_shard is a list all benchmarks to be sharded. Its
structure is as follows:
[{
"name": "benchmark_1",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
{
"name": "benchmark_2",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
...
]
The "stories" field contains a list of ordered story names. Notes that
this should match the actual order of how the benchmark stories are
executed for the sharding algorithm to be effective.
"""
benchmarks_to_shard = []
for b in benchmarks:
benchmarks_to_shard.append({
'name': b.Name(),
'repeat': b().options.get('pageset_repeat', 1),
'stories': benchmark_utils.GetBenchmarkStoryNames(b())
})
return benchmarks_to_shard
def _LoadTimingData(args):
builder_name, timing_file_path = args
data = retrieve_story_timing.FetchAverageStortyTimingData(
......@@ -150,8 +118,7 @@ def _GenerateShardMap(
if builder:
with open(builder.timing_file_path) as f:
timing_data = json.load(f)
benchmarks_to_shard = _GenerateBenchmarksToShardsList(
list(builder.benchmarks_to_run))
benchmarks_to_shard = list(builder.benchmark_configs)
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_to_shard, timing_data, num_shards=num_of_shards,
debug=debug)
......@@ -265,7 +232,7 @@ def _ValidateShardMaps(args):
# Check that bot_platforms.py matches the actual shard maps
for platform in bot_platforms.ALL_PLATFORMS:
platform_benchmark_names = set(
b.Name() for b in platform.benchmarks_to_run)
b.name for b in platform.benchmark_configs)
shard_map_benchmark_names = _ParseBenchmarks(platform.shards_map_file_path)
for benchmark in platform_benchmark_names - shard_map_benchmark_names:
errors.append(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment