Commit 3e1d472b authored by Leonard Ge's avatar Leonard Ge Committed by Commit Bot

Add functionality to generate the shard map for the given benchmark.

In this CL, I've extended the functionality of the `generate_perf_sharding`
script to generate the shard_map only for the given benchmark. The user needs
to provide the `benchmark_name` and `builder_name`, and the script will
generate the shard map. Users can also set the number of shards to use as well
as the output file path.

I've also fixed some minor style issues.

Bug: catapult:#
Change-Id: I956ebea0a8d044b1725de4a94eb17f60260a1424
Reviewed-on: https://chromium-review.googlesource.com/1216064Reviewed-by: default avatarNed Nguyen <nednguyen@google.com>
Commit-Queue: Leonard Ge <wangge@google.com>
Cr-Commit-Position: refs/heads/master@{#589974}
parent e4c2bbbb
......@@ -10,28 +10,51 @@ import sys
from core import benchmark_utils
from core import bot_platforms
from core import sharding_map_generator
from core import retrieve_story_timing
from core import sharding_map_generator
def get_parser():
def GetParser():
parser = argparse.ArgumentParser(
description='Generate perf test sharding map.')
parser.add_argument(
subparsers = parser.add_subparsers()
parser_update = subparsers.add_parser('update')
parser_update.add_argument(
'--regenerate-timing-data', '-r', action='store_true',
help=('Whether to regenerate timing data for all builders in '
'chromium.perf'), default=False)
parser.add_argument(
'--builder', '-b', action='store', nargs='*',
help=('The builder name to reshard. If not specified, use all '
parser_update.add_argument(
'--builders', '-b', action='store', nargs='*',
help=('The builder names to reshard. If not specified, use all '
'perf builders'),
choices=bot_platforms.ALL_PLATFORM_NAMES)
choices=bot_platforms.ALL_PLATFORM_NAMES,
default=bot_platforms.ALL_PLATFORM_NAMES)
parser_update.set_defaults(func=_UpdateShardsForBuilders)
parser_create = subparsers.add_parser('create')
parser_create.add_argument(
'--benchmark', help='The benchmark that you want to create shard for',
required=True)
parser_create.add_argument(
'--timing-data-source', '-t', choices=bot_platforms.ALL_PLATFORM_NAMES,
help='The timing data that you want to use. If not set, it will assume '
'all stories use the same amount of time to run')
parser_create.add_argument(
# pinpoint typically has 16 machines for each hardware types, so we set
# the default to use half of them to avoid starving the pool.
'--shards-num', type=int, default=8,
help="The number of shards you'd like to use, default is %(default)s")
parser_create.add_argument(
'--output-path', default='new_shard_map.json',
help='Output file path for the shard map, default is `%(default)s`')
parser_create.set_defaults(func=_CreateShardMapForBenchmark)
return parser
def _GenerateBenchmarksToShardsList(benchmarks):
""" Return |benchmarks_to_shard| from given list of |benchmarks|.
"""Return |benchmarks_to_shard| from given list of |benchmarks|.
benchmarks_to_shard is a list all benchmarks to be sharded. Its
structure is as follows:
[{
......@@ -70,11 +93,24 @@ def _LoadTimingData(args):
print 'Finish retrieve story timing data for %s' % repr(builder_name)
def _UpdateShardsForBuilders(builder_names, regenerate_timing_data):
def _GenerateShardMap(builder, num_of_shards, output_path, benchmark=None):
timing_data = []
if builder:
with open(builder.timing_file_path) as f:
timing_data = json.load(f)
benchmarks_to_shard = _GenerateBenchmarksToShardsList(
[b for b in builder.benchmarks_to_run if not benchmark or (
b.Name() == benchmark)])
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_to_shard, timing_data, num_shards=num_of_shards,
debug=False)
with open(output_path, 'w') as output_file:
json.dump(sharding_map, output_file, indent=4, separators=(',', ': '))
builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in builder_names}
if regenerate_timing_data:
def _UpdateShardsForBuilders(args):
builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in args.builders}
if args.regenerate_timing_data:
print 'Update shards timing data. May take a while...'
args = []
for b in builders:
......@@ -83,23 +119,33 @@ def _UpdateShardsForBuilders(builder_names, regenerate_timing_data):
p.map(_LoadTimingData, args)
for b in builders:
with open(b.timing_file_path) as f:
timing_data = json.load(f)
benchmarks_to_shard = _GenerateBenchmarksToShardsList(b.benchmarks_to_run)
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_to_shard, timing_data, num_shards=b.num_shards,
debug=False)
with open(b.shards_map_file_path, 'w') as output_file:
json.dump(sharding_map, output_file, indent=4, separators=(',', ': '))
_GenerateShardMap(b, b.num_shards, b.shards_map_file_path)
print 'Updated sharding map for %s' % repr(b.name)
def _CreateShardMapForBenchmark(args):
"""Create the shard map for the given benchmark.
Args:
args(Namespace object): the namespace object for the subparser `create`. It
will contain the attributes:
`benchmark`: the name of the benchmark that we want the shard for
`num_shards`: the total number of shards that we want to use
`output_path`: the output file path for the shard map
`builder`: the builder name, unlike the above, this is a string instead
of a list of string like above
"""
builder = None
if args.timing_data_source:
[builder] = [b for b in bot_platforms.ALL_PLATFORMS
if b.name == args.timing_data_source]
_GenerateShardMap(builder, args.shards_num, args.output_path, args.benchmark)
def main():
parser = get_parser()
parser = GetParser()
options = parser.parse_args()
builder_names = options.builder or bot_platforms.ALL_PLATFORM_NAMES
_UpdateShardsForBuilders(
builder_names, options.regenerate_timing_data)
options.func(options)
if __name__ == '__main__':
sys.exit(main())
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment