IDL parser: Ignore comments

Before this CL the base lexer treated comments as tokens and the base parser had some custom rules to handle special comments like copyrights. Blink's lexer/parser needed to extend the base lexer/parser to remove comments as otherwise comments must be explicitly included in the grammar. Since the base parser is only used by Blink and Blink doesn't need comments (at least now), it makes sense to ignore comments in the base lexer/parser. This enables us to remove all custom rules from BlinkIDLParser. BUG=617899 Change-Id: I6ead510680457dfb13ed9797ac056c34c185e292 Reviewed-on: https://chromium-review.googlesource.com/544424 Commit-Queue: Kenichi Ishibashi <bashi@chromium.org> Reviewed-by: Yuki Shiino <yukishiino@chromium.org> Reviewed-by: Hitoshi Yoshida <peria@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#481863}

IDL parser: Ignore comments
Before this CL the base lexer treated comments as tokens and the base parser had some custom rules to handle special comments like copyrights. Blink's lexer/parser needed to extend the base lexer/parser to remove comments as otherwise comments must be explicitly included in the grammar. Since the base parser is only used by Blink and Blink doesn't need comments (at least now), it makes sense to ignore comments in the base lexer/parser. This enables us to remove all custom rules from BlinkIDLParser. BUG=617899 Change-Id: I6ead510680457dfb13ed9797ac056c34c185e292 Reviewed-on: https://chromium-review.googlesource.com/544424 Commit-Queue: Kenichi Ishibashi <bashi@chromium.org> Reviewed-by: Yuki Shiino <yukishiino@chromium.org> Reviewed-by: Hitoshi Yoshida <peria@chromium.org> Reviewed-by: Kentaro Hara <haraken@chromium.org> Cr-Commit-Position: refs/heads/master@{#481863}
4e46f03a · Kenichi Ishibashi · Commit Bot · f2380ef3 · 4e46f03a · 4e46f03a
Commit 4e46f03a authored Jun 23, 2017 by Kenichi Ishibashi Committed by Commit Bot Jun 23, 2017
5 changed files
--- a/third_party/WebKit/Source/bindings/scripts/blink_idl_lexer.py
+++ b/third_party/WebKit/Source/bindings/scripts/blink_idl_lexer.py
@@ -70,25 +70,9 @@ sys.path.append(tools_dir)
 from idl_parser.idl_lexer import IDLLexer
 LEXTAB = 'lextab'
-REMOVE_TOKENS = ['COMMENT']
 class BlinkIDLLexer(IDLLexer):
-    # ignore comments
-    def t_COMMENT(self, t):
-        r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
-        self.AddLines(t.value.count('\n'))
-    # Analogs to _AddToken/_AddTokens in base lexer
-    # Needed to remove COMMENT token, since comments ignored
-    def _RemoveToken(self, token):
-        if token in self.tokens:
-            self.tokens.remove(token)
-    def _RemoveTokens(self, tokens):
-        for token in tokens:
-            self._RemoveToken(token)
    def __init__(self, debug=False, optimize=True, outputdir=None,
                 rewrite_tables=False):
        if debug:
@@ -115,8 +99,6 @@ class BlinkIDLLexer(IDLLexer):
            lextab = None
        IDLLexer.__init__(self)
-        # Overrides to parent class
-        self._RemoveTokens(REMOVE_TOKENS)
        # Optimized mode substantially decreases startup time (by disabling
        # error checking), and also allows use of Python's optimized mode.
        # See: Optimized Mode

--- a/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
+++ b/third_party/WebKit/Source/bindings/scripts/blink_idl_parser.py
@@ -54,9 +54,6 @@ http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end
 # Disable attribute validation, as lint can't import parent class to check
 # pylint: disable=E1101
 #
-# Disable check for invalid name as patterns use p_ prefix and they take |p|
-# argument
-# pylint: disable=C0103
 import os.path
 import sys
@@ -71,168 +68,14 @@ from ply import yacc
 # Base parser is in Chromium src/tools/idl_parser
 tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')
 sys.path.append(tools_dir)
-from idl_parser.idl_parser import IDLParser, ListFromConcat
+from idl_parser.idl_parser import IDLParser  # pylint: disable=import-error
 from idl_parser.idl_parser import ParseFile as parse_file
 from blink_idl_lexer import BlinkIDLLexer
 import blink_idl_lexer
-# Explicitly set starting symbol to rule defined only in base parser.
-# BEWARE that the starting symbol should NOT be defined in both the base parser
-# and the derived one, as otherwise which is used depends on which line number
-# is lower, which is fragile. Instead, either use one in base parser or
-# create a new symbol, so that this is unambiguous.
-# FIXME: unfortunately, this doesn't work in PLY 3.4, so need to duplicate the
-# rule below.
-STARTING_SYMBOL = 'Definitions'
-# We ignore comments (and hence don't need 'Top') but base parser preserves them
-# FIXME: Upstream: comments should be removed in base parser
-REMOVED_RULES = ['Top',  # [0]
-                 'Comments',  # [0.1]
-                 'CommentsRest',  # [0.2]
-                ]
-# Remove rules from base class
-# FIXME: add a class method upstream: @classmethod IDLParser._RemoveRules
-for rule in REMOVED_RULES:
-    production_name = 'p_' + rule
-    delattr(IDLParser, production_name)
 class BlinkIDLParser(IDLParser):
-    # [1]
-    # FIXME: Need to duplicate rule for starting symbol here, with line number
-    # *lower* than in the base parser (idl_parser.py).
-    # This is a bug in PLY: it determines starting symbol by lowest line number.
-    # This can be overridden by the 'start' parameter, but as of PLY 3.4 this
-    # doesn't work correctly.
-    def p_Definitions(self, p):
-        """Definitions : ExtendedAttributeList Definition Definitions
-                       | """
-        if len(p) > 1:
-            p[2].AddChildren(p[1])
-            p[0] = ListFromConcat(p[2], p[3])
-    # Below are grammar rules used by yacc, given by functions named p_<RULE>.
-    # * The docstring is the production rule in BNF (grammar).
-    # * The body is the yacc action (semantics).
-    #
-    # The PLY framework builds the actual low-level parser by introspecting this
-    # parser object, selecting all attributes named p_<RULE> as grammar rules.
-    # It extracts the docstrings and uses them as the production rules, building
-    # the table of a LALR parser, and uses the body of the functions as actions.
-    #
-    # Reference:
-    # http://www.dabeaz.com/ply/ply.html#ply_nn23
-    #
-    # Review of yacc:
-    # Yacc parses a token stream, internally producing a Concrete Syntax Tree
-    # (CST), where each node corresponds to a production rule in the grammar.
-    # At each node, it runs an action, which is usually "produce a node in the
-    # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST
-    # that aren't included in the AST, since only needed for parsing).
-    #
-    # The rules use pseudo-variables; in PLY syntax:
-    # p[0] is the left side: assign return value to p[0] instead of returning,
-    # p[1] ... p[n] are the right side: the values can be accessed, and they
-    # can be modified.
-    # (In yacc these are $$ and $1 ... $n.)
-    #
-    # The rules can look cryptic at first, but there are a few standard
-    # transforms from the CST to AST. With these in mind, the actions should
-    # be reasonably legible.
-    #
-    # * Ignore production
-    #   Discard this branch. Primarily used when one alternative is empty.
-    #
-    #   Sample code:
-    #   if len(p) > 1:
-    #       p[0] = ...
-    #   # Note no assignment if len(p) == 1
-    #
-    # * Eliminate singleton production
-    #   Discard this node in the CST, pass the next level down up the tree.
-    #   Used to ignore productions only necessary for parsing, but not needed
-    #   in the AST.
-    #
-    #   Sample code:
-    #   p[0] = p[1]
-    #
-    # * Build node
-    #   The key type of rule. In this parser, produces object of class IDLNode.
-    #   There are several helper functions:
-    #   * BuildProduction: actually builds an IDLNode, based on a production.
-    #   * BuildAttribute: builds an IDLAttribute, which is a temporary
-    #                     object to hold a name-value pair, which is then
-    #                     set as a Property of the IDLNode when the IDLNode
-    #                     is built.
-    #   * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
-    #   * BuildTrue: BuildAttribute with value True, for flags.
-    #   See base idl_parser.py for definitions and more examples of use.
-    #
-    #   Sample code:
-    #   # Build node of type NodeType, with value p[1], and children.
-    #   p[0] = self.BuildProduction('NodeType', p, 1, children)
-    #
-    #   # Build named node of type NodeType, with name and value p[1].
-    #   # (children optional)
-    #   p[0] = self.BuildNamed('NodeType', p, 1)
-    #
-    #   # Make a list
-    #   # Used if one node has several children.
-    #   children = ListFromConcat(p[2], p[3])
-    #   p[0] = self.BuildProduction('NodeType', p, 1, children)
-    #
-    #   # Also used to collapse the right-associative tree
-    #   # produced by parsing a list back into a single list.
-    #   """Foos : Foo Foos
-    #           |"""
-    #   if len(p) > 1:
-    #       p[0] = ListFromConcat(p[1], p[2])
-    #
-    #   # Add children.
-    #   # Primarily used to add attributes, produced via BuildTrue.
-    #   # p_StaticAttribute
-    #   """StaticAttribute : STATIC Attribute"""
-    #   p[2].AddChildren(self.BuildTrue('STATIC'))
-    #   p[0] = p[2]
-    #
-    # Numbering scheme for the rules is:
-    # [1] for Web IDL spec (or additions in base parser)
-    #     These should all be upstreamed to the base parser.
-    # [b1] for Blink IDL changes (overrides Web IDL)
-    # [b1.1] for Blink IDL additions, auxiliary rules for [b1]
-    # Numbers are as per Candidate Recommendation 19 April 2012:
-    # http://www.w3.org/TR/2012/CR-WebIDL-20120419/
-    # Extended attributes
-    # [b49] Override base parser: remove comment field, since comments stripped
-    # FIXME: Upstream
-    def p_ExtendedAttributeList(self, p):
-        """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
-                                 | """
-        if len(p) > 3:
-            items = ListFromConcat(p[2], p[3])
-            p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
-    # Error handling for ExtendedAttributeList.
-    # We can't upstream this because we override ExtendedAttributeList.
-    def p_ExtendedAttributeListError(self, p):
-        """ExtendedAttributeList : '[' ExtendedAttribute ',' error"""
-        p[0] = self.BuildError(p, "ExtendedAttributeList")
-    # Historically we allowed trailing comma but now it's a syntax error.
-    def p_ExtendedAttributes(self, p):
-        """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes
-                              | ','
-                              |"""
-        if len(p) > 3:
-            p[0] = ListFromConcat(p[2], p[3])
-        elif len(p) == 2:
-            p[0] = self.BuildError(p, 'ExtendedAttributes')
    def __init__(self,
                 # common parameters
                 debug=False,
@@ -277,7 +120,6 @@ class BlinkIDLParser(IDLParser):
        # See: CHANGES, Version 3.2
        # http://ply.googlecode.com/svn/trunk/CHANGES
        self.yaccobj = yacc.yacc(module=self,
-                                 start=STARTING_SYMBOL,
                                 method='SLR',
                                 debug=debug,
                                 optimize=optimize,

--- a/tools/idl_parser/idl_lexer.py
+++ b/tools/idl_parser/idl_lexer.py
@@ -42,7 +42,6 @@ class IDLLexer(object):
      'string',
    # Symbol and keywords types
-      'COMMENT',
      'identifier',
    # MultiChar operators
@@ -145,10 +144,10 @@ class IDLLexer(object):
    return t
  # A C or C++ style comment:  /* xxx */ or //
+  # This token is ignored.
  def t_COMMENT(self, t):
    r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
    self.AddLines(t.value.count('\n'))
-    return t
  # A symbol or keyword.
  def t_KEYWORD_OR_SYMBOL(self, t):

--- a/tools/idl_parser/idl_parser.py
+++ b/tools/idl_parser/idl_parser.py
@@ -3,7 +3,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
-""" Parser for Web IDL """
+"""Parser for Web IDL."""
 #
 # IDL Parser
@@ -34,7 +34,8 @@ import sys
 import time
 from idl_lexer import IDLLexer
-from idl_node import IDLAttribute, IDLNode
+from idl_node import IDLAttribute
+from idl_node import IDLNode
 SRC_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
 sys.path.insert(0, os.path.join(SRC_DIR, 'third_party'))
@@ -52,7 +53,6 @@ ERROR_REMAP = {
  'Unexpected ")" after ",".' : 'Missing argument.',
  'Unexpected "}" after ",".' : 'Trailing comma in block.',
  'Unexpected "}" after "{".' : 'Unexpected empty block.',
-  'Unexpected comment after "}".' : 'Unexpected trailing comment.',
  'Unexpected "{" after keyword "enum".' : 'Enum missing name.',
  'Unexpected "{" after keyword "struct".' : 'Struct missing name.',
  'Unexpected "{" after keyword "interface".' : 'Interface missing name.',
@@ -101,8 +101,6 @@ def TokenTypeName(t):
    return 'value %s' % t.value
  if t.type == 'string' :
    return 'string "%s"' % t.value
-  if t.type == 'COMMENT' :
-    return 'comment'
  if t.type == t.value:
    return '"%s"' % t.value
  if t.type == ',':
@@ -129,51 +127,73 @@ def TokenTypeName(t):
 # and p[n] is the set of inputs for positive values of 'n'.  Len(p) can be
 # used to distinguish between multiple item sets in the pattern.
 #
-# For more details on parsing refer to the PLY documentation at
+# The rules can look cryptic at first, but there are a few standard
-#    http://www.dabeaz.com/ply/
+# transforms from the CST to AST. With these in mind, the actions should
+# be reasonably legible.
 #
-# The parser is based on the WebIDL standard.  See:
+# * Ignore production
-#    http://heycam.github.io/webidl/#idl-grammar
+#   Discard this branch. Primarily used when one alternative is empty.
 #
-# The various productions are annotated so that the WHOLE number greater than
+#   Sample code:
-# zero in the comment denotes the matching WebIDL grammar definition.
+#   if len(p) > 1:
+#       p[0] = ...
+#   # Note no assignment if len(p) == 1
 #
-# Productions with a fractional component in the comment denote additions to
+# * Eliminate singleton production
-# the WebIDL spec, such as comments.
+#   Discard this node in the CST, pass the next level down up the tree.
+#   Used to ignore productions only necessary for parsing, but not needed
+#   in the AST.
 #
+#   Sample code:
+#   p[0] = p[1]
-class IDLParser(object):
 #
-# We force all input files to start with two comments.  The first comment is a
+# * Build node
-# Copyright notice followed by a file comment and finally by file level
+#   The key type of rule. In this parser, produces object of class IDLNode.
-# productions.
+#   There are several helper functions:
+#   * BuildProduction: actually builds an IDLNode, based on a production.
+#   * BuildAttribute: builds an IDLAttribute, which is a temporary
+#                     object to hold a name-value pair, which is then
+#                     set as a Property of the IDLNode when the IDLNode
+#                     is built.
+#   * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
+#   * BuildTrue: BuildAttribute with value True, for flags.
 #
-  # [0] Insert a TOP definition for Copyright and Comments
+#   Sample code:
-  def p_Top(self, p):
+#   # Build node of type NodeType, with value p[1], and children.
-    """Top : COMMENT COMMENT Definitions"""
+#   p[0] = self.BuildProduction('NodeType', p, 1, children)
-    Copyright = self.BuildComment('Copyright', p, 1)
+#
-    Filedoc = self.BuildComment('Comment', p, 2)
+#   # Build named node of type NodeType, with name and value p[1].
-    p[0] = ListFromConcat(Copyright, Filedoc, p[3])
+#   # (children optional)
+#   p[0] = self.BuildNamed('NodeType', p, 1)
-  # [0.1] Add support for Multiple COMMENTS
+#
-  def p_Comments(self, p):
+#   # Make a list
-    """Comments : CommentsRest"""
+#   # Used if one node has several children.
-    if len(p) > 1:
+#   children = ListFromConcat(p[2], p[3])
-      p[0] = p[1]
+#   p[0] = self.BuildProduction('NodeType', p, 1, children)
+#
-  # [0.2] Produce a COMMENT and aggregate sibling comments
+#   # Also used to collapse the right-associative tree
-  def p_CommentsRest(self, p):
+#   # produced by parsing a list back into a single list.
-    """CommentsRest : COMMENT CommentsRest
+#   """Foos : Foo Foos
-                    | """
+#           |"""
-    if len(p) > 1:
+#   if len(p) > 1:
-      p[0] = ListFromConcat(self.BuildComment('Comment', p, 1), p[2])
+#       p[0] = ListFromConcat(p[1], p[2])
+#
+#   # Add children.
+#   # Primarily used to add attributes, produced via BuildTrue.
+#   # p_StaticAttribute
+#   """StaticAttribute : STATIC Attribute"""
+#   p[2].AddChildren(self.BuildTrue('STATIC'))
+#   p[0] = p[2]
 #
-#The parser is based on the WebIDL standard.  See:
+# For more details on parsing refer to the PLY documentation at
-# http://heycam.github.io/webidl/#idl-grammar
+#    http://www.dabeaz.com/ply/
 #
+# The parser is based on the Web IDL standard.  See:
+#    http://heycam.github.io/webidl/#idl-grammar
+#
+# Productions with a fractional component in the comment denote additions to
+# the Web IDL spec, such as allowing string list in extended attributes.
+class IDLParser(object):
  # [1]
  def p_Definitions(self, p):
    """Definitions : ExtendedAttributeList Definition Definitions
@@ -410,7 +430,7 @@ class IDLParser(object):
  # [24]
  def p_Typedef(self, p):
-    """Typedef : TYPEDEF ExtendedAttributeListNoComments Type identifier ';'"""
+    """Typedef : TYPEDEF ExtendedAttributeList Type identifier ';'"""
    p[0] = self.BuildNamed('Typedef', p, 4, ListFromConcat(p[2], p[3]))
  # [24.1] Error recovery for Typedefs
@@ -785,24 +805,18 @@ class IDLParser(object):
    """SetlikeRest : SETLIKE '<' Type '>' ';'"""
    p[0] = self.BuildProduction('Setlike', p, 2, p[3])
-  # [65] No comment version for mid statement attributes.
+  # [65]
-  def p_ExtendedAttributeListNoComments(self, p):
+  def p_ExtendedAttributeList(self, p):
-    """ExtendedAttributeListNoComments : '[' ExtendedAttribute ExtendedAttributes ']'
+    """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
-                                       | """
+                             | """
-    if len(p) > 2:
+    if len(p) > 3:
      items = ListFromConcat(p[2], p[3])
      p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
-  # [65.1] Add optional comment field for start of statements.
+  # Error recovery for ExtendedAttributeList
-  def p_ExtendedAttributeList(self, p):
+  def p_ExtendedAttributeListError(self, p):
-    """ExtendedAttributeList : Comments '[' ExtendedAttribute ExtendedAttributes ']'
+    """ExtendedAttributeList : '[' ExtendedAttribute ',' error"""
-                             | Comments """
+    p[0] = self.BuildError(p, 'ExtendedAttributeList')
-    if len(p) > 2:
-      items = ListFromConcat(p[3], p[4])
-      attribs = self.BuildProduction('ExtAttributes', p, 2, items)
-      p[0] = ListFromConcat(p[1], attribs)
-    else:
-      p[0] = p[1]
  # [66]
  def p_ExtendedAttributes(self, p):
@@ -1131,7 +1145,6 @@ class IDLParser(object):
    p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),
                          self.BuildAttribute('NAME', p[1]))
  # [99]
  def p_StringType(self, p):
    """StringType : BYTESTRING
@@ -1238,35 +1251,6 @@ class IDLParser(object):
    childlist.append(self.BuildAttribute('NAME', p[index]))
    return self.BuildProduction(cls, p, index, childlist)
-  def BuildComment(self, cls, p, index):
-    name = p[index]
-    # Remove comment markers
-    lines = []
-    if name[:2] == '//':
-      # For C++ style, remove any leading whitespace and the '//' marker from
-      # each line.
-      form = 'cc'
-      for line in name.split('\n'):
-        start = line.find('//')
-        lines.append(line[start+2:])
-    else:
-      # For C style, remove ending '*/''
-      form = 'c'
-      for line in name[:-2].split('\n'):
-        # Remove characters until start marker for this line '*' if found
-        # otherwise it should be blank.
-        offs = line.find('*')
-        if offs >= 0:
-          line = line[offs + 1:].rstrip()
-        else:
-          line = ''
-        lines.append(line)
-    name = '\n'.join(lines)
-    childlist = [self.BuildAttribute('NAME', name),
-                 self.BuildAttribute('FORM', form)]
-    return self.BuildProduction(cls, p, index, childlist)
 #
 # BuildError
 #

--- a/tools/idl_parser/test_lexer/values.in
+++ b/tools/idl_parser/test_lexer/values.in
 integer 1 integer 123 integer 12345
 identifier A123 identifier A_A
-COMMENT /*XXXX*/
+/*XXXX*/
-COMMENT //XXXX
+//XXXX
-COMMENT /*MULTI LINE*/
+/*MULTI LINE*/
 [ [
 ] ]