Pre-cache PLY yacc parse table and use optimized mode (45% build time improvement)

PLY yacc parse table creation is (obviously) expensive. Pre-caching this in a separate build step substantially improves build time. Improves build time (user time) by 45% on my Linux box: 1m 50s => 59s BUG=341748 R=haraken Review URL: https://codereview.chromium.org/184233005 git-svn-id: svn://svn.chromium.org/blink/trunk@168611 bbb929c8-8fbe-4397-9dbb-9b2b20218538

Pre-cache PLY yacc parse table and use optimized mode (45% build time improvement)
PLY yacc parse table creation is (obviously) expensive. Pre-caching this in a separate build step substantially improves build time. Improves build time (user time) by 45% on my Linux box: 1m 50s => 59s BUG=341748 R=haraken Review URL: https://codereview.chromium.org/184233005 git-svn-id: svn://svn.chromium.org/blink/trunk@168611 bbb929c8-8fbe-4397-9dbb-9b2b20218538
c53653f5 · nbarth@chromium.org · 9abe68fb · c53653f5 · c53653f5 · c53653f5
Commit c53653f5 authored Mar 06, 2014 by nbarth@chromium.org
4 changed files
--- a/third_party/WebKit/Source/bindings/generated_bindings.gyp
+++ b/third_party/WebKit/Source/bindings/generated_bindings.gyp
@@ -119,8 +119,7 @@
      '<(DEPTH)/third_party/jinja2/__init__.py',
      '<(DEPTH)/third_party/markupsafe/__init__.py',  # jinja2 dep
    ],
-    'idl_compiler_files': [
+    'idl_lexer_parser_files': [
-      'scripts/idl_compiler.py',
      # PLY (Python Lex-Yacc)
      '<(DEPTH)/third_party/ply/lex.py',
      '<(DEPTH)/third_party/ply/yacc.py',
@@ -131,6 +130,10 @@
      # Blink IDL lexer/parser/constructor
      'scripts/blink_idl_lexer.py',
      'scripts/blink_idl_parser.py',
+    ],
+    'idl_compiler_files': [
+      'scripts/idl_compiler.py',
+      # Blink IDL front end (ex-lexer/parser)
      'scripts/idl_definitions.py',
      'scripts/idl_reader.py',
      'scripts/idl_validator.py',
@@ -279,6 +282,31 @@
      'message': 'Generating list of Event interfaces',
      }]
  },
+################################################################################
+  {
+    # A separate pre-caching step is *not required* to use parse table caching
+    # in PLY, as the cache is concurrency-safe.
+    # However, pre-caching ensures that all compiler processes use the cached
+    # file (hence maximizing speed), instead of early processes building the
+    # table themselves (as it's not yet been written by the time they start).
+    'target_name': 'cached_yacc_tables',
+    'type': 'none',
+    'actions': [{
+      'action_name': 'cache_yacc_tables',
+      'inputs': [
+        '<@(idl_lexer_parser_files)',
+      ],
+      'outputs': [
+        '<(bindings_output_dir)/parsetab.pickle',
+      ],
+      'action': [
+        'python',
+        'scripts/blink_idl_parser.py',
+        '<(bindings_output_dir)',
+      ],
+      'message': 'Caching PLY yacc parse tables',
+    }],
+  },
 ################################################################################
  {
    # A separate pre-caching step is *required* to use bytecode caching in
@@ -314,6 +342,7 @@
    'hard_dependency': 1,
    'dependencies': [
      'interfaces_info',
+      'cached_yacc_tables',
      'cached_jinja_templates',
      '../core/core_generated.gyp:generated_testing_idls',
    ],
@@ -325,7 +354,9 @@
      'extension': 'idl',
      'msvs_external_rule': 1,
      'inputs': [
+        '<@(idl_lexer_parser_files)',  # to be explicit (covered by parsetab)
        '<@(idl_compiler_files)',
+        '<(bindings_output_dir)/parsetab.pickle',
        '<(bindings_output_dir)/cached_jinja_templates.stamp',
        'IDLExtendedAttributes.txt',
        # If the dependency structure or public interface info (e.g.,

--- a/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
+++ b/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
@@ -402,14 +402,47 @@ class BlinkIDLParser(IDLParser):
                keys.remove(production_name)
        return list(keys)
-    def __init__(self, lexer=None, verbose=False, debug=False, mute_error=False, outputdir=''):
+    def __init__(self,
+                 # common parameters
+                 debug=False,
+                 # idl_parser parameters
+                 lexer=None, verbose=False, mute_error=False,
+                 # yacc parameters
+                 outputdir='', optimize=True, write_tables=False,
+                 picklefile=None):
+        if debug:
+            # Turn off optimization and caching, and write out tables,
+            # to help debugging
+            optimize = False
+            outputdir = None
+            picklefile = None
+            write_tables = True
+        if outputdir:
+            picklefile = picklefile or os.path.join(outputdir, 'parsetab.pickle')
        lexer = lexer or BlinkIDLLexer()
        self.lexer = lexer
        self.tokens = lexer.KnownTokens()
        # Using SLR (instead of LALR) generates the table faster,
        # but produces the same output. This is ok b/c Web IDL (and Blink IDL)
        # is an SLR grammar (as is often the case for simple LL(1) grammars).
-        self.yaccobj = yacc.yacc(module=self, start=STARTING_SYMBOL, method='SLR', debug=debug, outputdir=outputdir)
+        #
+        # Optimized mode substantially decreases startup time (by disabling
+        # error checking), and also allows use of Python's optimized mode.
+        # See: Using Python's Optimized Mode
+        # http://www.dabeaz.com/ply/ply.html#ply_nn38
+        #
+        # |picklefile| allows simpler importing than |tabmodule| (parsetab.py),
+        # as we don't need to modify sys.path; virtually identical speed.
+        # See: CHANGES, Version 3.2
+        # http://ply.googlecode.com/svn/trunk/CHANGES
+        self.yaccobj = yacc.yacc(module=self,
+                                 start=STARTING_SYMBOL,
+                                 method='SLR',
+                                 debug=debug,
+                                 optimize=optimize,
+                                 write_tables=write_tables,
+                                 picklefile=picklefile)
        self.parse_debug = debug
        self.verbose = verbose
        self.mute_error = mute_error
@@ -420,6 +453,17 @@ class BlinkIDLParser(IDLParser):
        self._last_error_pos = 0
-# If run by itself, attempt to build the parser
+################################################################################
+def main(argv):
+    # If file itself executed, cache parse table
+    try:
+        outputdir = argv[1]
+    except IndexError as err:
+        print 'Usage: %s OUTPUT_DIR' % argv[0]
+        return 1
+    parser = BlinkIDLParser(outputdir=outputdir)
 if __name__ == '__main__':
-    parser = BlinkIDLParser()
+    sys.exit(main(sys.argv))
--- a/third_party/WebKit/Source/bindings/scripts/idl_reader.py
+++ b/third_party/WebKit/Source/bindings/scripts/idl_reader.py
@@ -31,24 +31,25 @@
 import os
 import blink_idl_parser
+from blink_idl_parser import BlinkIDLParser
 from idl_definitions import IdlDefinitions
-import idl_validator
+from idl_validator import IDLInvalidExtendedAttributeError, IDLExtendedAttributeValidator
-import interface_dependency_resolver
+from interface_dependency_resolver import InterfaceDependencyResolver
 class IdlReader(object):
    def __init__(self, interfaces_info=None, idl_attributes_filename=None, outputdir=''):
        if idl_attributes_filename:
-            self.extended_attribute_validator = idl_validator.IDLExtendedAttributeValidator(idl_attributes_filename)
+            self.extended_attribute_validator = IDLExtendedAttributeValidator(idl_attributes_filename)
        else:
            self.extended_attribute_validator = None
        if interfaces_info:
-            self.interface_dependency_resolver = interface_dependency_resolver.InterfaceDependencyResolver(interfaces_info, self)
+            self.interface_dependency_resolver = InterfaceDependencyResolver(interfaces_info, self)
        else:
            self.interface_dependency_resolver = None
-        self.parser = blink_idl_parser.BlinkIDLParser(outputdir=outputdir)
+        self.parser = BlinkIDLParser(outputdir=outputdir)
    def read_idl_definitions(self, idl_filename):
        """Returns an IdlDefinitions object for an IDL file, including all dependencies."""
@@ -72,8 +73,9 @@ class IdlReader(object):
        try:
            self.extended_attribute_validator.validate_extended_attributes(definitions)
-        except idl_validator.IDLInvalidExtendedAttributeError as error:
+        except IDLInvalidExtendedAttributeError as error:
-            raise idl_validator.IDLInvalidExtendedAttributeError("""IDL ATTRIBUTE ERROR in file %s:
+            raise IDLInvalidExtendedAttributeError(
+    """IDL ATTRIBUTE ERROR in file %s:
    %s
 If you want to add a new IDL extended attribute, please add it to
    bindings/IDLExtendedAttributes.txt

--- a/third_party/WebKit/Tools/Scripts/webkitpy/bindings/main.py
+++ b/third_party/WebKit/Tools/Scripts/webkitpy/bindings/main.py
@@ -195,8 +195,9 @@ class BindingsTests(object):
                       os.path.join(self.output_directory, output_file))
                      for output_file in os.listdir(self.output_directory)
                      # Skip cache
-                      if (output_file != 'parsetab.py' and  # PLY cache
+                      if not output_file.endswith(('.pickle',  # PLY yacc
-                          not output_file.endswith('.cache'))]  # Jinja cache
+                                                   '.cache',  # Jinja
+                                                   ))]
        return all([self.identical_file(reference_filename, output_filename)
                    for (reference_filename, output_filename) in file_pairs])