Commit 2c707c29 authored by Andrew Grieve's avatar Andrew Grieve Committed by Chromium LUCI CQ

mojom_parser.py: Use multiprocessing to speed up runtime

On my machine, blink_public_mojom_mojom_platform with python3 goes from:

I     21 Started (mojom_parser.py)
I     33 Parsing 177 .mojom into ASTs
I   2178 Processing dependencies
I   2425 Loaded 92 modules from dependencies
I   2425 Ensuring inputs are loaded
I   2571 Serializing 177 modules
I   3083 Finished

to:

21 Started (mojom_parser.py)
I     33 Parsing 177 .mojom into ASTs
I    195 Processing dependencies
I    455 Loaded 92 modules from dependencies
I    455 Ensuring inputs are loaded
I    537 Serializing 177 modules
I    677 Finished

It's still the case that for some steps "Processing dependencies"
can be the majority of time spent. E.g. 2 of 2.7 seconds for
content_common_mojo_bindings

Bug: 1143399
Change-Id: I3fc6b7003f3d222da8743bec38bb9718aabb983e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2605350
Commit-Queue: Andrew Grieve <agrieve@chromium.org>
Auto-Submit: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarKen Rockot <rockot@google.com>
Cr-Commit-Position: refs/heads/master@{#840876}
parent 12e675dd
......@@ -15,6 +15,7 @@ import codecs
import errno
import json
import logging
import multiprocessing
import os
import os.path
import sys
......@@ -26,6 +27,10 @@ from mojom.parse import parser
from mojom.parse import conditional_features
# Disable this for easier debugging.
ENABLE_MULTIPROCESSING = True
def _ResolveRelativeImportPath(path, roots):
"""Attempts to resolve a relative import path against a set of possible roots.
......@@ -157,6 +162,56 @@ def _CollectAllowedImportsFromBuildMetadata(build_metadata_filename):
return allowed_imports
# multiprocessing helper.
def _ParseAstHelper(args):
mojom_abspath, enabled_features = args
with codecs.open(mojom_abspath, encoding='utf-8') as f:
ast = parser.Parse(f.read(), mojom_abspath)
conditional_features.RemoveDisabledDefinitions(ast, enabled_features)
return mojom_abspath, ast
# multiprocessing helper.
def _SerializeHelper(args):
mojom_abspath, mojom_path = args
module_path = os.path.join(_SerializeHelper.output_root_path,
_GetModuleFilename(mojom_path))
module_dir = os.path.dirname(module_path)
if not os.path.exists(module_dir):
try:
# Python 2 doesn't support exist_ok on makedirs(), so we just ignore
# that failure if it happens. It's possible during build due to races
# among build steps with module outputs in the same directory.
os.makedirs(module_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open(module_path, 'wb') as f:
_SerializeHelper.loaded_modules[mojom_abspath].Dump(f)
def _Shard(target_func, args, processes=None):
args = list(args)
if processes is None:
processes = multiprocessing.cpu_count()
# Seems optimal to have each process perform at least 2 tasks.
processes = min(processes, len(args) // 2)
# Don't spin up processes unless there is enough work to merit doing so.
if not ENABLE_MULTIPROCESSING or processes < 2:
for result in map(target_func, args):
yield result
return
pool = multiprocessing.Pool(processes=processes)
try:
for result in pool.imap_unordered(target_func, args):
yield result
finally:
pool.close()
pool.join() # Needed on Windows to avoid WindowsError during terminate.
pool.terminate()
def _ParseMojoms(mojom_files,
input_root_paths,
output_root_path,
......@@ -194,13 +249,13 @@ def _ParseMojoms(mojom_files,
mojom_files_to_parse = dict((os.path.normcase(abs_path),
_RebaseAbsolutePath(abs_path, input_root_paths))
for abs_path in mojom_files)
logging.info('Parsing %d .mojom into ASTs', len(mojom_files_to_parse))
abs_paths = dict(
(path, abs_path) for abs_path, path in mojom_files_to_parse.items())
for mojom_abspath in mojom_files_to_parse:
with codecs.open(mojom_abspath, encoding='utf-8') as f:
ast = parser.Parse(''.join(f.readlines()), mojom_abspath)
conditional_features.RemoveDisabledDefinitions(ast, enabled_features)
logging.info('Parsing %d .mojom into ASTs', len(mojom_files_to_parse))
map_args = ((mojom_abspath, enabled_features)
for mojom_abspath in mojom_files_to_parse)
for mojom_abspath, ast in _Shard(_ParseAstHelper, map_args):
loaded_mojom_asts[mojom_abspath] = ast
logging.info('Processing dependencies')
......@@ -249,21 +304,18 @@ def _ParseMojoms(mojom_files,
# Now we have fully translated modules for every input and every transitive
# dependency. We can dump the modules to disk for other tools to use.
logging.info('Serializeing %d modules', len(mojom_files_to_parse))
for mojom_abspath, mojom_path in mojom_files_to_parse.items():
module_path = os.path.join(output_root_path, _GetModuleFilename(mojom_path))
module_dir = os.path.dirname(module_path)
if not os.path.exists(module_dir):
try:
# Python 2 doesn't support exist_ok on makedirs(), so we just ignore
# that failure if it happens. It's possible during build due to races
# among build steps with module outputs in the same directory.
os.makedirs(module_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open(module_path, 'wb') as f:
loaded_modules[mojom_abspath].Dump(f)
logging.info('Serializing %d modules', len(mojom_files_to_parse))
# Windows does not use fork() for multiprocessing, so we'd need to pass
# loaded_module via IPC rather than via globals. Doing so is slower than not
# using multiprocessing.
_SerializeHelper.loaded_modules = loaded_modules
_SerializeHelper.output_root_path = output_root_path
# Doesn't seem to help past 4. Perhaps IO bound here?
processes = 0 if sys.platform == 'win32' else 4
map_args = mojom_files_to_parse.items()
for _ in _Shard(_SerializeHelper, map_args, processes=processes):
pass
def Run(command_line):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment