Commit 2c707c29 authored by Andrew Grieve's avatar Andrew Grieve Committed by Chromium LUCI CQ

mojom_parser.py: Use multiprocessing to speed up runtime

On my machine, blink_public_mojom_mojom_platform with python3 goes from:

I     21 Started (mojom_parser.py)
I     33 Parsing 177 .mojom into ASTs
I   2178 Processing dependencies
I   2425 Loaded 92 modules from dependencies
I   2425 Ensuring inputs are loaded
I   2571 Serializing 177 modules
I   3083 Finished

to:

21 Started (mojom_parser.py)
I     33 Parsing 177 .mojom into ASTs
I    195 Processing dependencies
I    455 Loaded 92 modules from dependencies
I    455 Ensuring inputs are loaded
I    537 Serializing 177 modules
I    677 Finished

It's still the case that for some steps "Processing dependencies"
can be the majority of time spent. E.g. 2 of 2.7 seconds for
content_common_mojo_bindings

Bug: 1143399
Change-Id: I3fc6b7003f3d222da8743bec38bb9718aabb983e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2605350
Commit-Queue: Andrew Grieve <agrieve@chromium.org>
Auto-Submit: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: default avatarKen Rockot <rockot@google.com>
Cr-Commit-Position: refs/heads/master@{#840876}
parent 12e675dd
...@@ -15,6 +15,7 @@ import codecs ...@@ -15,6 +15,7 @@ import codecs
import errno import errno
import json import json
import logging import logging
import multiprocessing
import os import os
import os.path import os.path
import sys import sys
...@@ -26,6 +27,10 @@ from mojom.parse import parser ...@@ -26,6 +27,10 @@ from mojom.parse import parser
from mojom.parse import conditional_features from mojom.parse import conditional_features
# Disable this for easier debugging.
ENABLE_MULTIPROCESSING = True
def _ResolveRelativeImportPath(path, roots): def _ResolveRelativeImportPath(path, roots):
"""Attempts to resolve a relative import path against a set of possible roots. """Attempts to resolve a relative import path against a set of possible roots.
...@@ -157,6 +162,56 @@ def _CollectAllowedImportsFromBuildMetadata(build_metadata_filename): ...@@ -157,6 +162,56 @@ def _CollectAllowedImportsFromBuildMetadata(build_metadata_filename):
return allowed_imports return allowed_imports
# multiprocessing helper.
def _ParseAstHelper(args):
mojom_abspath, enabled_features = args
with codecs.open(mojom_abspath, encoding='utf-8') as f:
ast = parser.Parse(f.read(), mojom_abspath)
conditional_features.RemoveDisabledDefinitions(ast, enabled_features)
return mojom_abspath, ast
# multiprocessing helper.
def _SerializeHelper(args):
mojom_abspath, mojom_path = args
module_path = os.path.join(_SerializeHelper.output_root_path,
_GetModuleFilename(mojom_path))
module_dir = os.path.dirname(module_path)
if not os.path.exists(module_dir):
try:
# Python 2 doesn't support exist_ok on makedirs(), so we just ignore
# that failure if it happens. It's possible during build due to races
# among build steps with module outputs in the same directory.
os.makedirs(module_dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open(module_path, 'wb') as f:
_SerializeHelper.loaded_modules[mojom_abspath].Dump(f)
def _Shard(target_func, args, processes=None):
args = list(args)
if processes is None:
processes = multiprocessing.cpu_count()
# Seems optimal to have each process perform at least 2 tasks.
processes = min(processes, len(args) // 2)
# Don't spin up processes unless there is enough work to merit doing so.
if not ENABLE_MULTIPROCESSING or processes < 2:
for result in map(target_func, args):
yield result
return
pool = multiprocessing.Pool(processes=processes)
try:
for result in pool.imap_unordered(target_func, args):
yield result
finally:
pool.close()
pool.join() # Needed on Windows to avoid WindowsError during terminate.
pool.terminate()
def _ParseMojoms(mojom_files, def _ParseMojoms(mojom_files,
input_root_paths, input_root_paths,
output_root_path, output_root_path,
...@@ -194,14 +249,14 @@ def _ParseMojoms(mojom_files, ...@@ -194,14 +249,14 @@ def _ParseMojoms(mojom_files,
mojom_files_to_parse = dict((os.path.normcase(abs_path), mojom_files_to_parse = dict((os.path.normcase(abs_path),
_RebaseAbsolutePath(abs_path, input_root_paths)) _RebaseAbsolutePath(abs_path, input_root_paths))
for abs_path in mojom_files) for abs_path in mojom_files)
logging.info('Parsing %d .mojom into ASTs', len(mojom_files_to_parse))
abs_paths = dict( abs_paths = dict(
(path, abs_path) for abs_path, path in mojom_files_to_parse.items()) (path, abs_path) for abs_path, path in mojom_files_to_parse.items())
for mojom_abspath in mojom_files_to_parse:
with codecs.open(mojom_abspath, encoding='utf-8') as f: logging.info('Parsing %d .mojom into ASTs', len(mojom_files_to_parse))
ast = parser.Parse(''.join(f.readlines()), mojom_abspath) map_args = ((mojom_abspath, enabled_features)
conditional_features.RemoveDisabledDefinitions(ast, enabled_features) for mojom_abspath in mojom_files_to_parse)
loaded_mojom_asts[mojom_abspath] = ast for mojom_abspath, ast in _Shard(_ParseAstHelper, map_args):
loaded_mojom_asts[mojom_abspath] = ast
logging.info('Processing dependencies') logging.info('Processing dependencies')
for mojom_abspath, ast in loaded_mojom_asts.items(): for mojom_abspath, ast in loaded_mojom_asts.items():
...@@ -249,21 +304,18 @@ def _ParseMojoms(mojom_files, ...@@ -249,21 +304,18 @@ def _ParseMojoms(mojom_files,
# Now we have fully translated modules for every input and every transitive # Now we have fully translated modules for every input and every transitive
# dependency. We can dump the modules to disk for other tools to use. # dependency. We can dump the modules to disk for other tools to use.
logging.info('Serializeing %d modules', len(mojom_files_to_parse)) logging.info('Serializing %d modules', len(mojom_files_to_parse))
for mojom_abspath, mojom_path in mojom_files_to_parse.items():
module_path = os.path.join(output_root_path, _GetModuleFilename(mojom_path)) # Windows does not use fork() for multiprocessing, so we'd need to pass
module_dir = os.path.dirname(module_path) # loaded_module via IPC rather than via globals. Doing so is slower than not
if not os.path.exists(module_dir): # using multiprocessing.
try: _SerializeHelper.loaded_modules = loaded_modules
# Python 2 doesn't support exist_ok on makedirs(), so we just ignore _SerializeHelper.output_root_path = output_root_path
# that failure if it happens. It's possible during build due to races # Doesn't seem to help past 4. Perhaps IO bound here?
# among build steps with module outputs in the same directory. processes = 0 if sys.platform == 'win32' else 4
os.makedirs(module_dir) map_args = mojom_files_to_parse.items()
except OSError as e: for _ in _Shard(_SerializeHelper, map_args, processes=processes):
if e.errno != errno.EEXIST: pass
raise
with open(module_path, 'wb') as f:
loaded_modules[mojom_abspath].Dump(f)
def Run(command_line): def Run(command_line):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment