Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(401)

Side by Side Diff: tools/idl_parser/idl_lexer.py

Issue 13498002: Add WebIDL compliant parser plus tests (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: 'Delay build of Lexer to handler tokens correctly.' Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/idl_parser/idl_node.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """ Lexer for PPAPI IDL 6 """ Lexer for PPAPI IDL
7 7
8 The lexer uses the PLY library to build a tokenizer which understands both 8 The lexer uses the PLY library to build a tokenizer which understands both
9 WebIDL and Pepper tokens. 9 WebIDL and Pepper tokens.
10 10
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 'short' : 'SHORT', 93 'short' : 'SHORT',
94 'static' : 'STATIC', 94 'static' : 'STATIC',
95 'stringifier' : 'STRINGIFIER', 95 'stringifier' : 'STRINGIFIER',
96 'typedef' : 'TYPEDEF', 96 'typedef' : 'TYPEDEF',
97 'true' : 'TRUE', 97 'true' : 'TRUE',
98 'unsigned' : 'UNSIGNED', 98 'unsigned' : 'UNSIGNED',
99 'unrestricted' : 'UNRESTRICTED', 99 'unrestricted' : 'UNRESTRICTED',
100 'void' : 'VOID' 100 'void' : 'VOID'
101 } 101 }
102 102
103 # Add keywords
104 for key in keywords:
105 tokens.append(keywords[key])
106
107 # 'literals' is a value expected by lex which specifies a list of valid
108 # literal tokens, meaning the token type and token value are identical.
109 literals = '"*.(){}[],;:=+-/~|&^?<>'
110
111 # Token definitions 103 # Token definitions
112 # 104 #
113 # Lex assumes any value or function in the form of 't_<TYPE>' represents a 105 # Lex assumes any value or function in the form of 't_<TYPE>' represents a
114 # regular expression where a match will emit a token of type <TYPE>. In the 106 # regular expression where a match will emit a token of type <TYPE>. In the
115 # case of a function, the function is called when a match is made. These 107 # case of a function, the function is called when a match is made. These
116 # definitions come from WebIDL. 108 # definitions come from WebIDL.
109 def t_ELLIPSIS(self, t):
110 r'\.\.\.'
111 return t
117 112
118 # 't_ignore' is a special match of items to ignore 113 def t_float(self, t):
119 t_ignore = ' \t' 114 r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+ )'
115 return t
120 116
121 # Ellipsis operator 117 def t_integer(self, t):
122 t_ELLIPSIS = r'\.\.\.' 118 r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
119 return t
123 120
124 # Constant values
125 t_integer = r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
126 t_float = r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)'
127 t_float += r'([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)'
128 121
129 # A line ending '\n', we use this to increment the line number 122 # A line ending '\n', we use this to increment the line number
130 def t_LINE_END(self, t): 123 def t_LINE_END(self, t):
131 r'\n+' 124 r'\n+'
132 self.AddLines(len(t.value)) 125 self.AddLines(len(t.value))
133 126
134 # We do not process escapes in the IDL strings. Strings are exclusively 127 # We do not process escapes in the IDL strings. Strings are exclusively
135 # used for attributes and enums, and not used as typical 'C' constants. 128 # used for attributes and enums, and not used as typical 'C' constants.
136 def t_string(self, t): 129 def t_string(self, t):
137 r'"[^"]*"' 130 r'"[^"]*"'
(...skipping 15 matching lines...) Expand all
153 t.type = self.keywords.get(t.value, 'identifier') 146 t.type = self.keywords.get(t.value, 'identifier')
154 147
155 # We strip leading underscores so that you can specify symbols with the same 148 # We strip leading underscores so that you can specify symbols with the same
156 # value as a keywords (E.g. a dictionary named 'interface'). 149 # value as a keywords (E.g. a dictionary named 'interface').
157 if t.value[0] == '_': 150 if t.value[0] == '_':
158 t.value = t.value[1:] 151 t.value = t.value[1:]
159 return t 152 return t
160 153
161 def t_ANY_error(self, t): 154 def t_ANY_error(self, t):
162 msg = 'Unrecognized input' 155 msg = 'Unrecognized input'
163 line = self.lexobj.lineno 156 line = self.Lexer().lineno
164 157
165 # If that line has not been accounted for, then we must have hit 158 # If that line has not been accounted for, then we must have hit
166 # EoF, so compute the beginning of the line that caused the problem. 159 # EoF, so compute the beginning of the line that caused the problem.
167 if line >= len(self.index): 160 if line >= len(self.index):
168 # Find the offset in the line of the first word causing the issue 161 # Find the offset in the line of the first word causing the issue
169 word = t.value.split()[0] 162 word = t.value.split()[0]
170 offs = self.lines[line - 1].find(word) 163 offs = self.lines[line - 1].find(word)
171 # Add the computed line's starting position 164 # Add the computed line's starting position
172 self.index.append(self.lexobj.lexpos - offs) 165 self.index.append(self.Lexer().lexpos - offs)
173 msg = 'Unexpected EoF reached after' 166 msg = 'Unexpected EoF reached after'
174 167
175 pos = self.lexobj.lexpos - self.index[line] 168 pos = self.Lexer().lexpos - self.index[line]
176 out = self.ErrorMessage(line, pos, msg) 169 out = self.ErrorMessage(line, pos, msg)
177 sys.stderr.write(out + '\n') 170 sys.stderr.write(out + '\n')
178 self._lex_errors += 1 171 self._lex_errors += 1
179 172
180 173
181 def AddLines(self, count): 174 def AddLines(self, count):
182 # Set the lexer position for the beginning of the next line. In the case 175 # Set the lexer position for the beginning of the next line. In the case
183 # of multiple lines, tokens can not exist on any of the lines except the 176 # of multiple lines, tokens can not exist on any of the lines except the
184 # last one, so the recorded value for previous lines are unused. We still 177 # last one, so the recorded value for previous lines are unused. We still
185 # fill the array however, to make sure the line count is correct. 178 # fill the array however, to make sure the line count is correct.
186 self.lexobj.lineno += count 179 self.Lexer().lineno += count
187 for _ in range(count): 180 for _ in range(count):
188 self.index.append(self.lexobj.lexpos) 181 self.index.append(self.Lexer().lexpos)
189 182
190 def FileLineMsg(self, line, msg): 183 def FileLineMsg(self, line, msg):
191 # Generate a message containing the file and line number of a token. 184 # Generate a message containing the file and line number of a token.
192 filename = self.lexobj.filename 185 filename = self.Lexer().filename
193 if filename: 186 if filename:
194 return "%s(%d) : %s" % (filename, line + 1, msg) 187 return "%s(%d) : %s" % (filename, line + 1, msg)
195 return "<BuiltIn> : %s" % msg 188 return "<BuiltIn> : %s" % msg
196 189
197 def SourceLine(self, line, pos): 190 def SourceLine(self, line, pos):
198 # Create a source line marker 191 # Create a source line marker
199 caret = ' ' * pos + '^' 192 caret = ' ' * pos + '^'
200 # We decrement the line number since the array is 0 based while the 193 # We decrement the line number since the array is 0 based while the
201 # line numbers are 1 based. 194 # line numbers are 1 based.
202 return "%s\n%s" % (self.lines[line - 1], caret) 195 return "%s\n%s" % (self.lines[line - 1], caret)
203 196
204 def ErrorMessage(self, line, pos, msg): 197 def ErrorMessage(self, line, pos, msg):
205 return "\n%s\n%s" % ( 198 return "\n%s\n%s" % (
206 self.FileLineMsg(line, msg), 199 self.FileLineMsg(line, msg),
207 self.SourceLine(line, pos)) 200 self.SourceLine(line, pos))
208 201
209 # 202 #
210 # Tokenizer 203 # Tokenizer
211 # 204 #
212 # The token function returns the next token provided by IDLLexer for matching 205 # The token function returns the next token provided by IDLLexer for matching
213 # against the leaf paterns. 206 # against the leaf paterns.
214 # 207 #
215 def token(self): 208 def token(self):
216 tok = self.lexobj.token() 209 tok = self.Lexer().token()
217 if tok: 210 if tok:
218 self.last = tok 211 self.last = tok
219 return tok 212 return tok
220 213
221 214
222 def GetTokens(self): 215 def GetTokens(self):
223 outlist = [] 216 outlist = []
224 while True: 217 while True:
225 t = self.lexobj.token() 218 t = self.Lexer().token()
226 if not t: 219 if not t:
227 break 220 break
228 outlist.append(t) 221 outlist.append(t)
229 return outlist 222 return outlist
230 223
231 def Tokenize(self, data, filename='__no_file__'): 224 def Tokenize(self, data, filename='__no_file__'):
232 self.lexobj.filename = filename 225 lexer = self.Lexer()
233 self.lexobj.input(data) 226 lexer.lineno = 1
227 lexer.filename = filename
228 lexer.input(data)
234 self.lines = data.split('\n') 229 self.lines = data.split('\n')
235 230
231 def KnownTokens(self):
232 return self.tokens
233
234 def Lexer(self):
235 if not self._lexobj:
236 self._lexobj = lex.lex(object=self, lextab=None, optimize=0)
237 return self._lexobj
238
239 def _AddConstDefs(self):
240 # 'literals' is a value expected by lex which specifies a list of valid
241 # literal tokens, meaning the token type and token value are identical.
242 self.literals = r'"*.(){}[],;:=+-/~|&^?<>'
243 self.t_ignore = ' \t'
244
245 def _AddToken(self, token):
246 if token in self.tokens:
247 raise RuntimeError('Same token: ' + token)
248 self.tokens.append(token)
249
250 def _AddTokens(self, tokens):
251 for token in tokens:
252 self._AddToken(token)
253
254 def _AddKeywords(self, keywords):
255 for key in keywords:
256 value = key.upper()
257 self._AddToken(value)
258 self.keywords[key] = value
259
236 def __init__(self): 260 def __init__(self):
237 self.index = [0] 261 self.index = [0]
238 self._lex_errors = 0 262 self._lex_errors = 0
239 self.linex = [] 263 self.linex = []
240 self.filename = None 264 self.filename = None
241 self.lexobj = lex.lex(object=self, lextab=None, optimize=0) 265 self.keywords = {}
266 self.tokens = []
267 self._AddConstDefs()
268 self._AddTokens(IDLLexer.tokens)
269 self._AddKeywords(IDLLexer.keywords)
270 self._lexobj = None
242 271
272 # If run by itself, attempt to build the lexer
273 if __name__ == '__main__':
274 lexer = IDLLexer()
OLDNEW
« no previous file with comments | « no previous file | tools/idl_parser/idl_node.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698