Commit 4e46f03a authored by Kenichi Ishibashi's avatar Kenichi Ishibashi Committed by Commit Bot

IDL parser: Ignore comments

Before this CL the base lexer treated comments as tokens and the base
parser had some custom rules to handle special comments like
copyrights. Blink's lexer/parser needed to extend the base lexer/parser
to remove comments as otherwise comments must be explicitly included
in the grammar. Since the base parser is only used by Blink and Blink
doesn't need comments (at least now), it makes sense to ignore comments
in the base lexer/parser. This enables us to remove all custom rules
from BlinkIDLParser.

BUG=617899

Change-Id: I6ead510680457dfb13ed9797ac056c34c185e292
Reviewed-on: https://chromium-review.googlesource.com/544424
Commit-Queue: Kenichi Ishibashi <bashi@chromium.org>
Reviewed-by: default avatarYuki Shiino <yukishiino@chromium.org>
Reviewed-by: default avatarHitoshi Yoshida <peria@chromium.org>
Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Cr-Commit-Position: refs/heads/master@{#481863}
parent f2380ef3
...@@ -70,25 +70,9 @@ sys.path.append(tools_dir) ...@@ -70,25 +70,9 @@ sys.path.append(tools_dir)
from idl_parser.idl_lexer import IDLLexer from idl_parser.idl_lexer import IDLLexer
LEXTAB = 'lextab' LEXTAB = 'lextab'
REMOVE_TOKENS = ['COMMENT']
class BlinkIDLLexer(IDLLexer): class BlinkIDLLexer(IDLLexer):
# ignore comments
def t_COMMENT(self, t):
r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
self.AddLines(t.value.count('\n'))
# Analogs to _AddToken/_AddTokens in base lexer
# Needed to remove COMMENT token, since comments ignored
def _RemoveToken(self, token):
if token in self.tokens:
self.tokens.remove(token)
def _RemoveTokens(self, tokens):
for token in tokens:
self._RemoveToken(token)
def __init__(self, debug=False, optimize=True, outputdir=None, def __init__(self, debug=False, optimize=True, outputdir=None,
rewrite_tables=False): rewrite_tables=False):
if debug: if debug:
...@@ -115,8 +99,6 @@ class BlinkIDLLexer(IDLLexer): ...@@ -115,8 +99,6 @@ class BlinkIDLLexer(IDLLexer):
lextab = None lextab = None
IDLLexer.__init__(self) IDLLexer.__init__(self)
# Overrides to parent class
self._RemoveTokens(REMOVE_TOKENS)
# Optimized mode substantially decreases startup time (by disabling # Optimized mode substantially decreases startup time (by disabling
# error checking), and also allows use of Python's optimized mode. # error checking), and also allows use of Python's optimized mode.
# See: Optimized Mode # See: Optimized Mode
......
...@@ -54,9 +54,6 @@ http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end ...@@ -54,9 +54,6 @@ http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end
# Disable attribute validation, as lint can't import parent class to check # Disable attribute validation, as lint can't import parent class to check
# pylint: disable=E1101 # pylint: disable=E1101
# #
# Disable check for invalid name as patterns use p_ prefix and they take |p|
# argument
# pylint: disable=C0103
import os.path import os.path
import sys import sys
...@@ -71,168 +68,14 @@ from ply import yacc ...@@ -71,168 +68,14 @@ from ply import yacc
# Base parser is in Chromium src/tools/idl_parser # Base parser is in Chromium src/tools/idl_parser
tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools') tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')
sys.path.append(tools_dir) sys.path.append(tools_dir)
from idl_parser.idl_parser import IDLParser, ListFromConcat from idl_parser.idl_parser import IDLParser # pylint: disable=import-error
from idl_parser.idl_parser import ParseFile as parse_file from idl_parser.idl_parser import ParseFile as parse_file
from blink_idl_lexer import BlinkIDLLexer from blink_idl_lexer import BlinkIDLLexer
import blink_idl_lexer import blink_idl_lexer
# Explicitly set starting symbol to rule defined only in base parser.
# BEWARE that the starting symbol should NOT be defined in both the base parser
# and the derived one, as otherwise which is used depends on which line number
# is lower, which is fragile. Instead, either use one in base parser or
# create a new symbol, so that this is unambiguous.
# FIXME: unfortunately, this doesn't work in PLY 3.4, so need to duplicate the
# rule below.
STARTING_SYMBOL = 'Definitions'
# We ignore comments (and hence don't need 'Top') but base parser preserves them
# FIXME: Upstream: comments should be removed in base parser
REMOVED_RULES = ['Top', # [0]
'Comments', # [0.1]
'CommentsRest', # [0.2]
]
# Remove rules from base class
# FIXME: add a class method upstream: @classmethod IDLParser._RemoveRules
for rule in REMOVED_RULES:
production_name = 'p_' + rule
delattr(IDLParser, production_name)
class BlinkIDLParser(IDLParser): class BlinkIDLParser(IDLParser):
# [1]
# FIXME: Need to duplicate rule for starting symbol here, with line number
# *lower* than in the base parser (idl_parser.py).
# This is a bug in PLY: it determines starting symbol by lowest line number.
# This can be overridden by the 'start' parameter, but as of PLY 3.4 this
# doesn't work correctly.
def p_Definitions(self, p):
"""Definitions : ExtendedAttributeList Definition Definitions
| """
if len(p) > 1:
p[2].AddChildren(p[1])
p[0] = ListFromConcat(p[2], p[3])
# Below are grammar rules used by yacc, given by functions named p_<RULE>.
# * The docstring is the production rule in BNF (grammar).
# * The body is the yacc action (semantics).
#
# The PLY framework builds the actual low-level parser by introspecting this
# parser object, selecting all attributes named p_<RULE> as grammar rules.
# It extracts the docstrings and uses them as the production rules, building
# the table of a LALR parser, and uses the body of the functions as actions.
#
# Reference:
# http://www.dabeaz.com/ply/ply.html#ply_nn23
#
# Review of yacc:
# Yacc parses a token stream, internally producing a Concrete Syntax Tree
# (CST), where each node corresponds to a production rule in the grammar.
# At each node, it runs an action, which is usually "produce a node in the
# Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST
# that aren't included in the AST, since only needed for parsing).
#
# The rules use pseudo-variables; in PLY syntax:
# p[0] is the left side: assign return value to p[0] instead of returning,
# p[1] ... p[n] are the right side: the values can be accessed, and they
# can be modified.
# (In yacc these are $$ and $1 ... $n.)
#
# The rules can look cryptic at first, but there are a few standard
# transforms from the CST to AST. With these in mind, the actions should
# be reasonably legible.
#
# * Ignore production
# Discard this branch. Primarily used when one alternative is empty.
#
# Sample code:
# if len(p) > 1:
# p[0] = ...
# # Note no assignment if len(p) == 1
#
# * Eliminate singleton production
# Discard this node in the CST, pass the next level down up the tree.
# Used to ignore productions only necessary for parsing, but not needed
# in the AST.
#
# Sample code:
# p[0] = p[1]
#
# * Build node
# The key type of rule. In this parser, produces object of class IDLNode.
# There are several helper functions:
# * BuildProduction: actually builds an IDLNode, based on a production.
# * BuildAttribute: builds an IDLAttribute, which is a temporary
# object to hold a name-value pair, which is then
# set as a Property of the IDLNode when the IDLNode
# is built.
# * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
# * BuildTrue: BuildAttribute with value True, for flags.
# See base idl_parser.py for definitions and more examples of use.
#
# Sample code:
# # Build node of type NodeType, with value p[1], and children.
# p[0] = self.BuildProduction('NodeType', p, 1, children)
#
# # Build named node of type NodeType, with name and value p[1].
# # (children optional)
# p[0] = self.BuildNamed('NodeType', p, 1)
#
# # Make a list
# # Used if one node has several children.
# children = ListFromConcat(p[2], p[3])
# p[0] = self.BuildProduction('NodeType', p, 1, children)
#
# # Also used to collapse the right-associative tree
# # produced by parsing a list back into a single list.
# """Foos : Foo Foos
# |"""
# if len(p) > 1:
# p[0] = ListFromConcat(p[1], p[2])
#
# # Add children.
# # Primarily used to add attributes, produced via BuildTrue.
# # p_StaticAttribute
# """StaticAttribute : STATIC Attribute"""
# p[2].AddChildren(self.BuildTrue('STATIC'))
# p[0] = p[2]
#
# Numbering scheme for the rules is:
# [1] for Web IDL spec (or additions in base parser)
# These should all be upstreamed to the base parser.
# [b1] for Blink IDL changes (overrides Web IDL)
# [b1.1] for Blink IDL additions, auxiliary rules for [b1]
# Numbers are as per Candidate Recommendation 19 April 2012:
# http://www.w3.org/TR/2012/CR-WebIDL-20120419/
# Extended attributes
# [b49] Override base parser: remove comment field, since comments stripped
# FIXME: Upstream
def p_ExtendedAttributeList(self, p):
"""ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
| """
if len(p) > 3:
items = ListFromConcat(p[2], p[3])
p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
# Error handling for ExtendedAttributeList.
# We can't upstream this because we override ExtendedAttributeList.
def p_ExtendedAttributeListError(self, p):
"""ExtendedAttributeList : '[' ExtendedAttribute ',' error"""
p[0] = self.BuildError(p, "ExtendedAttributeList")
# Historically we allowed trailing comma but now it's a syntax error.
def p_ExtendedAttributes(self, p):
"""ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes
| ','
|"""
if len(p) > 3:
p[0] = ListFromConcat(p[2], p[3])
elif len(p) == 2:
p[0] = self.BuildError(p, 'ExtendedAttributes')
def __init__(self, def __init__(self,
# common parameters # common parameters
debug=False, debug=False,
...@@ -277,7 +120,6 @@ class BlinkIDLParser(IDLParser): ...@@ -277,7 +120,6 @@ class BlinkIDLParser(IDLParser):
# See: CHANGES, Version 3.2 # See: CHANGES, Version 3.2
# http://ply.googlecode.com/svn/trunk/CHANGES # http://ply.googlecode.com/svn/trunk/CHANGES
self.yaccobj = yacc.yacc(module=self, self.yaccobj = yacc.yacc(module=self,
start=STARTING_SYMBOL,
method='SLR', method='SLR',
debug=debug, debug=debug,
optimize=optimize, optimize=optimize,
......
...@@ -42,7 +42,6 @@ class IDLLexer(object): ...@@ -42,7 +42,6 @@ class IDLLexer(object):
'string', 'string',
# Symbol and keywords types # Symbol and keywords types
'COMMENT',
'identifier', 'identifier',
# MultiChar operators # MultiChar operators
...@@ -145,10 +144,10 @@ class IDLLexer(object): ...@@ -145,10 +144,10 @@ class IDLLexer(object):
return t return t
# A C or C++ style comment: /* xxx */ or // # A C or C++ style comment: /* xxx */ or //
# This token is ignored.
def t_COMMENT(self, t): def t_COMMENT(self, t):
r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)' r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
self.AddLines(t.value.count('\n')) self.AddLines(t.value.count('\n'))
return t
# A symbol or keyword. # A symbol or keyword.
def t_KEYWORD_OR_SYMBOL(self, t): def t_KEYWORD_OR_SYMBOL(self, t):
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
""" Parser for Web IDL """ """Parser for Web IDL."""
# #
# IDL Parser # IDL Parser
...@@ -34,7 +34,8 @@ import sys ...@@ -34,7 +34,8 @@ import sys
import time import time
from idl_lexer import IDLLexer from idl_lexer import IDLLexer
from idl_node import IDLAttribute, IDLNode from idl_node import IDLAttribute
from idl_node import IDLNode
SRC_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) SRC_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
sys.path.insert(0, os.path.join(SRC_DIR, 'third_party')) sys.path.insert(0, os.path.join(SRC_DIR, 'third_party'))
...@@ -52,7 +53,6 @@ ERROR_REMAP = { ...@@ -52,7 +53,6 @@ ERROR_REMAP = {
'Unexpected ")" after ",".' : 'Missing argument.', 'Unexpected ")" after ",".' : 'Missing argument.',
'Unexpected "}" after ",".' : 'Trailing comma in block.', 'Unexpected "}" after ",".' : 'Trailing comma in block.',
'Unexpected "}" after "{".' : 'Unexpected empty block.', 'Unexpected "}" after "{".' : 'Unexpected empty block.',
'Unexpected comment after "}".' : 'Unexpected trailing comment.',
'Unexpected "{" after keyword "enum".' : 'Enum missing name.', 'Unexpected "{" after keyword "enum".' : 'Enum missing name.',
'Unexpected "{" after keyword "struct".' : 'Struct missing name.', 'Unexpected "{" after keyword "struct".' : 'Struct missing name.',
'Unexpected "{" after keyword "interface".' : 'Interface missing name.', 'Unexpected "{" after keyword "interface".' : 'Interface missing name.',
...@@ -101,8 +101,6 @@ def TokenTypeName(t): ...@@ -101,8 +101,6 @@ def TokenTypeName(t):
return 'value %s' % t.value return 'value %s' % t.value
if t.type == 'string' : if t.type == 'string' :
return 'string "%s"' % t.value return 'string "%s"' % t.value
if t.type == 'COMMENT' :
return 'comment'
if t.type == t.value: if t.type == t.value:
return '"%s"' % t.value return '"%s"' % t.value
if t.type == ',': if t.type == ',':
...@@ -129,51 +127,73 @@ def TokenTypeName(t): ...@@ -129,51 +127,73 @@ def TokenTypeName(t):
# and p[n] is the set of inputs for positive values of 'n'. Len(p) can be # and p[n] is the set of inputs for positive values of 'n'. Len(p) can be
# used to distinguish between multiple item sets in the pattern. # used to distinguish between multiple item sets in the pattern.
# #
# For more details on parsing refer to the PLY documentation at # The rules can look cryptic at first, but there are a few standard
# http://www.dabeaz.com/ply/ # transforms from the CST to AST. With these in mind, the actions should
# be reasonably legible.
# #
# The parser is based on the WebIDL standard. See: # * Ignore production
# http://heycam.github.io/webidl/#idl-grammar # Discard this branch. Primarily used when one alternative is empty.
# #
# The various productions are annotated so that the WHOLE number greater than # Sample code:
# zero in the comment denotes the matching WebIDL grammar definition. # if len(p) > 1:
# p[0] = ...
# # Note no assignment if len(p) == 1
# #
# Productions with a fractional component in the comment denote additions to # * Eliminate singleton production
# the WebIDL spec, such as comments. # Discard this node in the CST, pass the next level down up the tree.
# Used to ignore productions only necessary for parsing, but not needed
# in the AST.
# #
# Sample code:
# p[0] = p[1]
class IDLParser(object):
# #
# We force all input files to start with two comments. The first comment is a # * Build node
# Copyright notice followed by a file comment and finally by file level # The key type of rule. In this parser, produces object of class IDLNode.
# productions. # There are several helper functions:
# * BuildProduction: actually builds an IDLNode, based on a production.
# * BuildAttribute: builds an IDLAttribute, which is a temporary
# object to hold a name-value pair, which is then
# set as a Property of the IDLNode when the IDLNode
# is built.
# * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
# * BuildTrue: BuildAttribute with value True, for flags.
# #
# [0] Insert a TOP definition for Copyright and Comments # Sample code:
def p_Top(self, p): # # Build node of type NodeType, with value p[1], and children.
"""Top : COMMENT COMMENT Definitions""" # p[0] = self.BuildProduction('NodeType', p, 1, children)
Copyright = self.BuildComment('Copyright', p, 1) #
Filedoc = self.BuildComment('Comment', p, 2) # # Build named node of type NodeType, with name and value p[1].
p[0] = ListFromConcat(Copyright, Filedoc, p[3]) # # (children optional)
# p[0] = self.BuildNamed('NodeType', p, 1)
# [0.1] Add support for Multiple COMMENTS #
def p_Comments(self, p): # # Make a list
"""Comments : CommentsRest""" # # Used if one node has several children.
if len(p) > 1: # children = ListFromConcat(p[2], p[3])
p[0] = p[1] # p[0] = self.BuildProduction('NodeType', p, 1, children)
#
# [0.2] Produce a COMMENT and aggregate sibling comments # # Also used to collapse the right-associative tree
def p_CommentsRest(self, p): # # produced by parsing a list back into a single list.
"""CommentsRest : COMMENT CommentsRest # """Foos : Foo Foos
| """ # |"""
if len(p) > 1: # if len(p) > 1:
p[0] = ListFromConcat(self.BuildComment('Comment', p, 1), p[2]) # p[0] = ListFromConcat(p[1], p[2])
#
# # Add children.
# # Primarily used to add attributes, produced via BuildTrue.
# # p_StaticAttribute
# """StaticAttribute : STATIC Attribute"""
# p[2].AddChildren(self.BuildTrue('STATIC'))
# p[0] = p[2]
# #
#The parser is based on the WebIDL standard. See: # For more details on parsing refer to the PLY documentation at
# http://heycam.github.io/webidl/#idl-grammar # http://www.dabeaz.com/ply/
# #
# The parser is based on the Web IDL standard. See:
# http://heycam.github.io/webidl/#idl-grammar
#
# Productions with a fractional component in the comment denote additions to
# the Web IDL spec, such as allowing string list in extended attributes.
class IDLParser(object):
# [1] # [1]
def p_Definitions(self, p): def p_Definitions(self, p):
"""Definitions : ExtendedAttributeList Definition Definitions """Definitions : ExtendedAttributeList Definition Definitions
...@@ -410,7 +430,7 @@ class IDLParser(object): ...@@ -410,7 +430,7 @@ class IDLParser(object):
# [24] # [24]
def p_Typedef(self, p): def p_Typedef(self, p):
"""Typedef : TYPEDEF ExtendedAttributeListNoComments Type identifier ';'""" """Typedef : TYPEDEF ExtendedAttributeList Type identifier ';'"""
p[0] = self.BuildNamed('Typedef', p, 4, ListFromConcat(p[2], p[3])) p[0] = self.BuildNamed('Typedef', p, 4, ListFromConcat(p[2], p[3]))
# [24.1] Error recovery for Typedefs # [24.1] Error recovery for Typedefs
...@@ -785,24 +805,18 @@ class IDLParser(object): ...@@ -785,24 +805,18 @@ class IDLParser(object):
"""SetlikeRest : SETLIKE '<' Type '>' ';'""" """SetlikeRest : SETLIKE '<' Type '>' ';'"""
p[0] = self.BuildProduction('Setlike', p, 2, p[3]) p[0] = self.BuildProduction('Setlike', p, 2, p[3])
# [65] No comment version for mid statement attributes. # [65]
def p_ExtendedAttributeListNoComments(self, p): def p_ExtendedAttributeList(self, p):
"""ExtendedAttributeListNoComments : '[' ExtendedAttribute ExtendedAttributes ']' """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
| """ | """
if len(p) > 2: if len(p) > 3:
items = ListFromConcat(p[2], p[3]) items = ListFromConcat(p[2], p[3])
p[0] = self.BuildProduction('ExtAttributes', p, 1, items) p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
# [65.1] Add optional comment field for start of statements. # Error recovery for ExtendedAttributeList
def p_ExtendedAttributeList(self, p): def p_ExtendedAttributeListError(self, p):
"""ExtendedAttributeList : Comments '[' ExtendedAttribute ExtendedAttributes ']' """ExtendedAttributeList : '[' ExtendedAttribute ',' error"""
| Comments """ p[0] = self.BuildError(p, 'ExtendedAttributeList')
if len(p) > 2:
items = ListFromConcat(p[3], p[4])
attribs = self.BuildProduction('ExtAttributes', p, 2, items)
p[0] = ListFromConcat(p[1], attribs)
else:
p[0] = p[1]
# [66] # [66]
def p_ExtendedAttributes(self, p): def p_ExtendedAttributes(self, p):
...@@ -1131,7 +1145,6 @@ class IDLParser(object): ...@@ -1131,7 +1145,6 @@ class IDLParser(object):
p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'), p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),
self.BuildAttribute('NAME', p[1])) self.BuildAttribute('NAME', p[1]))
# [99] # [99]
def p_StringType(self, p): def p_StringType(self, p):
"""StringType : BYTESTRING """StringType : BYTESTRING
...@@ -1238,35 +1251,6 @@ class IDLParser(object): ...@@ -1238,35 +1251,6 @@ class IDLParser(object):
childlist.append(self.BuildAttribute('NAME', p[index])) childlist.append(self.BuildAttribute('NAME', p[index]))
return self.BuildProduction(cls, p, index, childlist) return self.BuildProduction(cls, p, index, childlist)
def BuildComment(self, cls, p, index):
name = p[index]
# Remove comment markers
lines = []
if name[:2] == '//':
# For C++ style, remove any leading whitespace and the '//' marker from
# each line.
form = 'cc'
for line in name.split('\n'):
start = line.find('//')
lines.append(line[start+2:])
else:
# For C style, remove ending '*/''
form = 'c'
for line in name[:-2].split('\n'):
# Remove characters until start marker for this line '*' if found
# otherwise it should be blank.
offs = line.find('*')
if offs >= 0:
line = line[offs + 1:].rstrip()
else:
line = ''
lines.append(line)
name = '\n'.join(lines)
childlist = [self.BuildAttribute('NAME', name),
self.BuildAttribute('FORM', form)]
return self.BuildProduction(cls, p, index, childlist)
# #
# BuildError # BuildError
# #
......
integer 1 integer 123 integer 12345 integer 1 integer 123 integer 12345
identifier A123 identifier A_A identifier A123 identifier A_A
COMMENT /*XXXX*/ /*XXXX*/
COMMENT //XXXX //XXXX
COMMENT /*MULTI LINE*/ /*MULTI LINE*/
[ [ [ [
] ] ] ]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment