Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(544)

Unified Diff: tools/idl_parser/idl_lexer.py

Issue 13498002: Add WebIDL compliant parser plus tests (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: 'Delay build of Lexer to handler tokens correctly.' Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/idl_parser/idl_node.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/idl_parser/idl_lexer.py
diff --git a/tools/idl_parser/idl_lexer.py b/tools/idl_parser/idl_lexer.py
index 9a07ac6adc14f32760621c3e9c01669584cae80e..aa0da06f5789ac2b9f6d64d6594430384fd6c2c9 100755
--- a/tools/idl_parser/idl_lexer.py
+++ b/tools/idl_parser/idl_lexer.py
@@ -100,31 +100,24 @@ class IDLLexer(object):
'void' : 'VOID'
}
- # Add keywords
- for key in keywords:
- tokens.append(keywords[key])
-
- # 'literals' is a value expected by lex which specifies a list of valid
- # literal tokens, meaning the token type and token value are identical.
- literals = '"*.(){}[],;:=+-/~|&^?<>'
-
# Token definitions
#
# Lex assumes any value or function in the form of 't_<TYPE>' represents a
# regular expression where a match will emit a token of type <TYPE>. In the
# case of a function, the function is called when a match is made. These
# definitions come from WebIDL.
+ def t_ELLIPSIS(self, t):
+ r'\.\.\.'
+ return t
- # 't_ignore' is a special match of items to ignore
- t_ignore = ' \t'
+ def t_float(self, t):
+ r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)'
+ return t
- # Ellipsis operator
- t_ELLIPSIS = r'\.\.\.'
+ def t_integer(self, t):
+ r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
+ return t
- # Constant values
- t_integer = r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
- t_float = r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)'
- t_float += r'([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)'
# A line ending '\n', we use this to increment the line number
def t_LINE_END(self, t):
@@ -160,7 +153,7 @@ class IDLLexer(object):
def t_ANY_error(self, t):
msg = 'Unrecognized input'
- line = self.lexobj.lineno
+ line = self.Lexer().lineno
# If that line has not been accounted for, then we must have hit
# EoF, so compute the beginning of the line that caused the problem.
@@ -169,10 +162,10 @@ class IDLLexer(object):
word = t.value.split()[0]
offs = self.lines[line - 1].find(word)
# Add the computed line's starting position
- self.index.append(self.lexobj.lexpos - offs)
+ self.index.append(self.Lexer().lexpos - offs)
msg = 'Unexpected EoF reached after'
- pos = self.lexobj.lexpos - self.index[line]
+ pos = self.Lexer().lexpos - self.index[line]
out = self.ErrorMessage(line, pos, msg)
sys.stderr.write(out + '\n')
self._lex_errors += 1
@@ -183,13 +176,13 @@ class IDLLexer(object):
# of multiple lines, tokens can not exist on any of the lines except the
# last one, so the recorded value for previous lines are unused. We still
# fill the array however, to make sure the line count is correct.
- self.lexobj.lineno += count
+ self.Lexer().lineno += count
for _ in range(count):
- self.index.append(self.lexobj.lexpos)
+ self.index.append(self.Lexer().lexpos)
def FileLineMsg(self, line, msg):
# Generate a message containing the file and line number of a token.
- filename = self.lexobj.filename
+ filename = self.Lexer().filename
if filename:
return "%s(%d) : %s" % (filename, line + 1, msg)
return "<BuiltIn> : %s" % msg
@@ -213,7 +206,7 @@ class IDLLexer(object):
# against the leaf paterns.
#
def token(self):
- tok = self.lexobj.token()
+ tok = self.Lexer().token()
if tok:
self.last = tok
return tok
@@ -222,21 +215,60 @@ class IDLLexer(object):
def GetTokens(self):
outlist = []
while True:
- t = self.lexobj.token()
+ t = self.Lexer().token()
if not t:
break
outlist.append(t)
return outlist
def Tokenize(self, data, filename='__no_file__'):
- self.lexobj.filename = filename
- self.lexobj.input(data)
+ lexer = self.Lexer()
+ lexer.lineno = 1
+ lexer.filename = filename
+ lexer.input(data)
self.lines = data.split('\n')
+ def KnownTokens(self):
+ return self.tokens
+
+ def Lexer(self):
+ if not self._lexobj:
+ self._lexobj = lex.lex(object=self, lextab=None, optimize=0)
+ return self._lexobj
+
+ def _AddConstDefs(self):
+ # 'literals' is a value expected by lex which specifies a list of valid
+ # literal tokens, meaning the token type and token value are identical.
+ self.literals = r'"*.(){}[],;:=+-/~|&^?<>'
+ self.t_ignore = ' \t'
+
+ def _AddToken(self, token):
+ if token in self.tokens:
+ raise RuntimeError('Same token: ' + token)
+ self.tokens.append(token)
+
+ def _AddTokens(self, tokens):
+ for token in tokens:
+ self._AddToken(token)
+
+ def _AddKeywords(self, keywords):
+ for key in keywords:
+ value = key.upper()
+ self._AddToken(value)
+ self.keywords[key] = value
+
def __init__(self):
self.index = [0]
self._lex_errors = 0
self.linex = []
self.filename = None
- self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
-
+ self.keywords = {}
+ self.tokens = []
+ self._AddConstDefs()
+ self._AddTokens(IDLLexer.tokens)
+ self._AddKeywords(IDLLexer.keywords)
+ self._lexobj = None
+
+# If run by itself, attempt to build the lexer
+if __name__ == '__main__':
+ lexer = IDLLexer()
« no previous file with comments | « no previous file | tools/idl_parser/idl_node.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698