Pre-cache PLY lex table and use optimized mode (5% build time improvement)

PLY lex table creation is (obviously) expensive. Pre-caching this in a separate build step improves build time. This is much less of an improvement than the parse table, as the lexical grammar is much simpler than the phrase grammar, but: * it's a quick win, and * will allow us to use Python's optimized mode (hopefully more win). Improves build time (user time) by 5% on my Linux box: 59s => 56s Followup to: Pre-cache PLY yacc parse table and use optimized mode (45% build time improvement) https://codereview.chromium.org/184233005/ BUG=341748 R=haraken Review URL: https://codereview.chromium.org/183013013 git-svn-id: svn://svn.chromium.org/blink/trunk@168630 bbb929c8-8fbe-4397-9dbb-9b2b20218538

Pre-cache PLY lex table and use optimized mode (5% build time improvement)
PLY lex table creation is (obviously) expensive. Pre-caching this in a separate build step improves build time. This is much less of an improvement than the parse table, as the lexical grammar is much simpler than the phrase grammar, but: * it's a quick win, and * will allow us to use Python's optimized mode (hopefully more win). Improves build time (user time) by 5% on my Linux box: 59s => 56s Followup to: Pre-cache PLY yacc parse table and use optimized mode (45% build time improvement) https://codereview.chromium.org/184233005/ BUG=341748 R=haraken Review URL: https://codereview.chromium.org/183013013 git-svn-id: svn://svn.chromium.org/blink/trunk@168630 bbb929c8-8fbe-4397-9dbb-9b2b20218538
4312e7df · nbarth@chromium.org · fd51a7c0 · 4312e7df · 4312e7df · 4312e7df
Commit 4312e7df authored Mar 06, 2014 by nbarth@chromium.org
4 changed files
--- a/third_party/WebKit/Source/bindings/generated_bindings.gyp
+++ b/third_party/WebKit/Source/bindings/generated_bindings.gyp
@@ -284,11 +284,11 @@
  },
 ################################################################################
  {
-    # A separate pre-caching step is *not required* to use parse table caching
+    # A separate pre-caching step is *not required* to use lex/parse table
-    # in PLY, as the cache is concurrency-safe.
+    # caching in PLY, as the caches are concurrency-safe.
    # However, pre-caching ensures that all compiler processes use the cached
-    # file (hence maximizing speed), instead of early processes building the
+    # files (hence maximizing speed), instead of early processes building the
-    # table themselves (as it's not yet been written by the time they start).
+    # tables themselves (as they've not yet been written when they start).
    'target_name': 'cached_yacc_tables',
    'type': 'none',
    'actions': [{
@@ -297,6 +297,7 @@
        '<@(idl_lexer_parser_files)',
      ],
      'outputs': [
+        '<(bindings_output_dir)/lextab.py',
        '<(bindings_output_dir)/parsetab.pickle',
      ],
      'action': [
@@ -304,7 +305,7 @@
        'scripts/blink_idl_parser.py',
        '<(bindings_output_dir)',
      ],
-      'message': 'Caching PLY yacc parse tables',
+      'message': 'Caching PLY lex & yacc lex/parse tables',
    }],
  },
 ################################################################################
@@ -356,6 +357,7 @@
      'inputs': [
        '<@(idl_lexer_parser_files)',  # to be explicit (covered by parsetab)
        '<@(idl_compiler_files)',
+        '<(bindings_output_dir)/lextab.py',
        '<(bindings_output_dir)/parsetab.pickle',
        '<(bindings_output_dir)/cached_jinja_templates.stamp',
        'IDLExtendedAttributes.txt',

--- a/third_party/WebKit/Source/bindings/scripts/blink_idl_lexer.py
+++ b/third_party/WebKit/Source/bindings/scripts/blink_idl_lexer.py
@@ -54,11 +54,16 @@ PLY:
 import os.path
 import sys
-# Base lexer is in Chromium src/tools/idl_parser
+# PLY is in Chromium src/third_party/ply
 module_path, module_name = os.path.split(__file__)
-tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')
+third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)
-sys.path.append(tools_dir)
+# Insert at front to override system libraries, and after path[0] == script dir
+sys.path.insert(1, third_party)
+from ply import lex
+# Base lexer is in Chromium src/tools/idl_parser
+tools_dir = os.path.join(third_party, os.pardir, 'tools')
+sys.path.append(tools_dir)
 from idl_parser.idl_lexer import IDLLexer
 REMOVE_TOKENS = ['COMMENT']
@@ -80,11 +85,40 @@ class BlinkIDLLexer(IDLLexer):
        for token in tokens:
            self._RemoveToken(token)
-    def __init__(self):
+    def __init__(self, debug=False, optimize=True, outputdir=None):
+        if debug:
+            # Turn off optimization and caching to help debugging
+            optimize = False
+            outputdir = None
+        if outputdir:
+            # Need outputdir in path because lex imports the cached lex table
+            # as a Python module
+            sys.path.append(outputdir)
        IDLLexer.__init__(self)
+        # Overrides to parent class
        self._RemoveTokens(REMOVE_TOKENS)
+        # Optimized mode substantially decreases startup time (by disabling
+        # error checking), and also allows use of Python's optimized mode.
+        # See: Optimized Mode
+        # http://www.dabeaz.com/ply/ply.html#ply_nn15
+        self._lexobj = lex.lex(object=self,
+                               debug=debug,
+                               optimize=optimize,
+                               outputdir=outputdir)
+################################################################################
+def main(argv):
+    # If file itself executed, build and cache lex table
+    try:
+        outputdir = argv[1]
+    except IndexError as err:
+        print 'Usage: %s OUTPUT_DIR' % argv[0]
+        return 1
+    lexer = BlinkIDLLexer(outputdir=outputdir)
-# If run by itself, attempt to build the lexer
 if __name__ == '__main__':
-    lexer = BlinkIDLLexer()
+    sys.exit(main(sys.argv))
--- a/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
+++ b/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
@@ -420,7 +420,9 @@ class BlinkIDLParser(IDLParser):
        if outputdir:
            picklefile = picklefile or os.path.join(outputdir, 'parsetab.pickle')
-        lexer = lexer or BlinkIDLLexer()
+        lexer = lexer or BlinkIDLLexer(debug=debug,
+                                       outputdir=outputdir,
+                                       optimize=optimize)
        self.lexer = lexer
        self.tokens = lexer.KnownTokens()
        # Using SLR (instead of LALR) generates the table faster,

--- a/third_party/WebKit/Tools/Scripts/webkitpy/bindings/main.py
+++ b/third_party/WebKit/Tools/Scripts/webkitpy/bindings/main.py
@@ -194,10 +194,11 @@ class BindingsTests(object):
        file_pairs = [(os.path.join(reference_directory, output_file),
                       os.path.join(self.output_directory, output_file))
                      for output_file in os.listdir(self.output_directory)
-                      # Skip cache
+                      # Skip caches
-                      if not output_file.endswith(('.pickle',  # PLY yacc
+                      if not (output_file in ('lextab.py',  # PLY lex
-                                                   '.cache',  # Jinja
+                                              'lextab.pyc',
-                                                   ))]
+                                              'parsetab.pickle') or  # PLY yacc
+                              output_file.endswith('.cache'))]  # Jinja
        return all([self.identical_file(reference_filename, output_filename)
                    for (reference_filename, output_filename) in file_pairs])