tools/idl_parser/idl_lexer.py - Issue 13498002: Add WebIDL compliant parser plus tests

Side by Side Diff: tools/idl_parser/idl_lexer.py

Issue 13498002: Add WebIDL compliant parser plus tests (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: 'Delay build of Lexer to handler tokens correctly.' Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """ Lexer for PPAPI IDL	6 """ Lexer for PPAPI IDL

7	7

8 The lexer uses the PLY library to build a tokenizer which understands both	8 The lexer uses the PLY library to build a tokenizer which understands both

9 WebIDL and Pepper tokens.	9 WebIDL and Pepper tokens.

10	10

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 'short' : 'SHORT',	93 'short' : 'SHORT',

94 'static' : 'STATIC',	94 'static' : 'STATIC',

95 'stringifier' : 'STRINGIFIER',	95 'stringifier' : 'STRINGIFIER',

96 'typedef' : 'TYPEDEF',	96 'typedef' : 'TYPEDEF',

97 'true' : 'TRUE',	97 'true' : 'TRUE',

98 'unsigned' : 'UNSIGNED',	98 'unsigned' : 'UNSIGNED',

99 'unrestricted' : 'UNRESTRICTED',	99 'unrestricted' : 'UNRESTRICTED',

100 'void' : 'VOID'	100 'void' : 'VOID'

101 }	101 }

102	102

103 # Add keywords

104 for key in keywords:

105 tokens.append(keywords[key])

106

107 # 'literals' is a value expected by lex which specifies a list of valid

108 # literal tokens, meaning the token type and token value are identical.

109 literals = '"*.(){}[],;:=+-/~\|&^?<>'

110

111 # Token definitions	103 # Token definitions

112 #	104 #

113 # Lex assumes any value or function in the form of 't_<TYPE>' represents a	105 # Lex assumes any value or function in the form of 't_<TYPE>' represents a

114 # regular expression where a match will emit a token of type <TYPE>. In the	106 # regular expression where a match will emit a token of type <TYPE>. In the

115 # case of a function, the function is called when a match is made. These	107 # case of a function, the function is called when a match is made. These

116 # definitions come from WebIDL.	108 # definitions come from WebIDL.

	109 def t_ELLIPSIS(self, t):

	110 r'\.\.\.'

	111 return t

117	112

118 # 't_ignore' is a special match of items to ignore	113 def t_float(self, t):

119 t_ignore = ' \t'	114 r'-?(([0-9]+\.[0-9]\|[0-9]\.[0-9]+)([Ee][+-]?[0-9]+)?\|[0-9]+[Ee][+-]?[0-9]+ )'

	115 return t

120	116

121 # Ellipsis operator	117 def t_integer(self, t):

122 t_ELLIPSIS = r'\.\.\.'	118 r'-?(0([0-7]\|[Xx][0-9A-Fa-f]+)\|[1-9][0-9])'

	119 return t

123	120

124 # Constant values

125 t_integer = r'-?(0([0-7]\|[Xx][0-9A-Fa-f]+)\|[1-9][0-9])'

126 t_float = r'-?(([0-9]+\.[0-9]\|[0-9]\.[0-9]+)'

127 t_float += r'([Ee][+-]?[0-9]+)?\|[0-9]+[Ee][+-]?[0-9]+)'

128	121

129 # A line ending '\n', we use this to increment the line number	122 # A line ending '\n', we use this to increment the line number

130 def t_LINE_END(self, t):	123 def t_LINE_END(self, t):

131 r'\n+'	124 r'\n+'

132 self.AddLines(len(t.value))	125 self.AddLines(len(t.value))

133	126

134 # We do not process escapes in the IDL strings. Strings are exclusively	127 # We do not process escapes in the IDL strings. Strings are exclusively

135 # used for attributes and enums, and not used as typical 'C' constants.	128 # used for attributes and enums, and not used as typical 'C' constants.

136 def t_string(self, t):	129 def t_string(self, t):

137 r'"[^"]*"'	130 r'"[^"]*"'

(...skipping 15 matching lines...) Expand all Loading...
153 t.type = self.keywords.get(t.value, 'identifier')	146 t.type = self.keywords.get(t.value, 'identifier')

154	147

155 # We strip leading underscores so that you can specify symbols with the same	148 # We strip leading underscores so that you can specify symbols with the same

156 # value as a keywords (E.g. a dictionary named 'interface').	149 # value as a keywords (E.g. a dictionary named 'interface').

157 if t.value[0] == '_':	150 if t.value[0] == '_':

158 t.value = t.value[1:]	151 t.value = t.value[1:]

159 return t	152 return t

160	153

161 def t_ANY_error(self, t):	154 def t_ANY_error(self, t):

162 msg = 'Unrecognized input'	155 msg = 'Unrecognized input'

163 line = self.lexobj.lineno	156 line = self.Lexer().lineno

164	157

165 # If that line has not been accounted for, then we must have hit	158 # If that line has not been accounted for, then we must have hit

166 # EoF, so compute the beginning of the line that caused the problem.	159 # EoF, so compute the beginning of the line that caused the problem.

167 if line >= len(self.index):	160 if line >= len(self.index):

168 # Find the offset in the line of the first word causing the issue	161 # Find the offset in the line of the first word causing the issue

169 word = t.value.split()[0]	162 word = t.value.split()[0]

170 offs = self.lines[line - 1].find(word)	163 offs = self.lines[line - 1].find(word)

171 # Add the computed line's starting position	164 # Add the computed line's starting position

172 self.index.append(self.lexobj.lexpos - offs)	165 self.index.append(self.Lexer().lexpos - offs)

173 msg = 'Unexpected EoF reached after'	166 msg = 'Unexpected EoF reached after'

174	167

175 pos = self.lexobj.lexpos - self.index[line]	168 pos = self.Lexer().lexpos - self.index[line]

176 out = self.ErrorMessage(line, pos, msg)	169 out = self.ErrorMessage(line, pos, msg)

177 sys.stderr.write(out + '\n')	170 sys.stderr.write(out + '\n')

178 self._lex_errors += 1	171 self._lex_errors += 1

179	172

180	173

181 def AddLines(self, count):	174 def AddLines(self, count):

182 # Set the lexer position for the beginning of the next line. In the case	175 # Set the lexer position for the beginning of the next line. In the case

183 # of multiple lines, tokens can not exist on any of the lines except the	176 # of multiple lines, tokens can not exist on any of the lines except the

184 # last one, so the recorded value for previous lines are unused. We still	177 # last one, so the recorded value for previous lines are unused. We still

185 # fill the array however, to make sure the line count is correct.	178 # fill the array however, to make sure the line count is correct.

186 self.lexobj.lineno += count	179 self.Lexer().lineno += count

187 for _ in range(count):	180 for _ in range(count):

188 self.index.append(self.lexobj.lexpos)	181 self.index.append(self.Lexer().lexpos)

189	182

190 def FileLineMsg(self, line, msg):	183 def FileLineMsg(self, line, msg):

191 # Generate a message containing the file and line number of a token.	184 # Generate a message containing the file and line number of a token.

192 filename = self.lexobj.filename	185 filename = self.Lexer().filename

193 if filename:	186 if filename:

194 return "%s(%d) : %s" % (filename, line + 1, msg)	187 return "%s(%d) : %s" % (filename, line + 1, msg)

195 return "<BuiltIn> : %s" % msg	188 return "<BuiltIn> : %s" % msg

196	189

197 def SourceLine(self, line, pos):	190 def SourceLine(self, line, pos):

198 # Create a source line marker	191 # Create a source line marker

199 caret = ' ' * pos + '^'	192 caret = ' ' * pos + '^'

200 # We decrement the line number since the array is 0 based while the	193 # We decrement the line number since the array is 0 based while the

201 # line numbers are 1 based.	194 # line numbers are 1 based.

202 return "%s\n%s" % (self.lines[line - 1], caret)	195 return "%s\n%s" % (self.lines[line - 1], caret)

203	196

204 def ErrorMessage(self, line, pos, msg):	197 def ErrorMessage(self, line, pos, msg):

205 return "\n%s\n%s" % (	198 return "\n%s\n%s" % (

206 self.FileLineMsg(line, msg),	199 self.FileLineMsg(line, msg),

207 self.SourceLine(line, pos))	200 self.SourceLine(line, pos))

208	201

209 #	202 #

210 # Tokenizer	203 # Tokenizer

211 #	204 #

212 # The token function returns the next token provided by IDLLexer for matching	205 # The token function returns the next token provided by IDLLexer for matching

213 # against the leaf paterns.	206 # against the leaf paterns.

214 #	207 #

215 def token(self):	208 def token(self):

216 tok = self.lexobj.token()	209 tok = self.Lexer().token()

217 if tok:	210 if tok:

218 self.last = tok	211 self.last = tok

219 return tok	212 return tok

220	213

221	214

222 def GetTokens(self):	215 def GetTokens(self):

223 outlist = []	216 outlist = []

224 while True:	217 while True:

225 t = self.lexobj.token()	218 t = self.Lexer().token()

226 if not t:	219 if not t:

227 break	220 break

228 outlist.append(t)	221 outlist.append(t)

229 return outlist	222 return outlist

230	223

231 def Tokenize(self, data, filename='__no_file__'):	224 def Tokenize(self, data, filename='__no_file__'):

232 self.lexobj.filename = filename	225 lexer = self.Lexer()

233 self.lexobj.input(data)	226 lexer.lineno = 1

	227 lexer.filename = filename

	228 lexer.input(data)

234 self.lines = data.split('\n')	229 self.lines = data.split('\n')

235	230

	231 def KnownTokens(self):

	232 return self.tokens

	233

	234 def Lexer(self):

	235 if not self._lexobj:

	236 self._lexobj = lex.lex(object=self, lextab=None, optimize=0)

	237 return self._lexobj

	238

	239 def _AddConstDefs(self):

	240 # 'literals' is a value expected by lex which specifies a list of valid

	241 # literal tokens, meaning the token type and token value are identical.

	242 self.literals = r'"*.(){}[],;:=+-/~\|&^?<>'

	243 self.t_ignore = ' \t'

	244

	245 def _AddToken(self, token):

	246 if token in self.tokens:

	247 raise RuntimeError('Same token: ' + token)

	248 self.tokens.append(token)

	249

	250 def _AddTokens(self, tokens):

	251 for token in tokens:

	252 self._AddToken(token)

	253

	254 def _AddKeywords(self, keywords):

	255 for key in keywords:

	256 value = key.upper()

	257 self._AddToken(value)

	258 self.keywords[key] = value

	259

236 def __init__(self):	260 def __init__(self):

237 self.index = [0]	261 self.index = [0]

238 self._lex_errors = 0	262 self._lex_errors = 0

239 self.linex = []	263 self.linex = []

240 self.filename = None	264 self.filename = None

241 self.lexobj = lex.lex(object=self, lextab=None, optimize=0)	265 self.keywords = {}

	266 self.tokens = []

	267 self._AddConstDefs()

	268 self._AddTokens(IDLLexer.tokens)

	269 self._AddKeywords(IDLLexer.keywords)

	270 self._lexobj = None

242	271

	272 # If run by itself, attempt to build the lexer

	273 if __name__ == '__main__':

	274 lexer = IDLLexer()

OLD	NEW

« no previous file with comments | « no previous file | tools/idl_parser/idl_node.py » ('j') | no next file with comments »