OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
3 # for details. All rights reserved. Use of this source code is governed by a | |
4 # BSD-style license that can be found in the LICENSE file. | |
5 | |
6 '''Generates the Tokenizer class into tokenenizer.g.dart.''' | |
7 | |
8 import re | |
9 from token_info import tokens, keywords | |
10 from codegen import CodeWriter, HEADER | |
11 | |
12 def makeSafe(ch): | |
13 ch_s = ch | |
14 if ch in ' \t\n\r*/': ch_s = repr(ch) | |
15 return '%d/*%s*/' % (ord(ch), ch_s) | |
16 | |
17 | |
18 class Case: | |
19 def __init__(self, ch, token, includeWhitespace=False): | |
20 self.ch = ch | |
21 self.cases = {} | |
22 self.token = None | |
23 self.includeWhitespace = includeWhitespace | |
24 if len(ch) > 0: | |
25 self.cases[ch[0]] = Case(ch[1:], token) | |
26 else: | |
27 self.token = token | |
28 | |
29 def addCase(self, ch, token): | |
30 if len(ch) == 0: | |
31 self.token = token | |
32 else: | |
33 searchChar = ch[0] | |
34 if self.cases.has_key(searchChar): | |
35 self.cases[searchChar].addCase(ch[1:], token) | |
36 else: | |
37 self.cases[searchChar] = Case(ch[1:], token) | |
38 | |
39 def defaultReturn(self): | |
40 if self.token is not None: | |
41 return 'return %s;' % self.token.getFinishCode() | |
42 else: | |
43 return 'return _errorToken();' | |
44 | |
45 def writeCases(self, cw): | |
46 ret = [] | |
47 if len(self.cases) == 0: | |
48 cw.writeln(self.defaultReturn()) | |
49 elif len(self.cases) < 4 and not self.includeWhitespace: | |
50 optElse = '' | |
51 for key, case in sorted(self.cases.items()): | |
52 cw.enterBlock('%sif (_maybeEatChar(%s)) {' % (optElse, makeSafe(key))) | |
53 case.writeCases(cw) | |
54 cw.exitBlock() | |
55 optElse = '} else ' | |
56 cw.enterBlock('} else {') | |
57 cw.writeln(self.defaultReturn()) | |
58 | |
59 cw.exitBlock('}') | |
60 else: | |
61 cw.writeln('ch = _nextChar();') | |
62 cw.enterBlock('switch(ch) {') | |
63 if self.includeWhitespace: | |
64 self.writeWhitespace(cw) | |
65 for key, case in sorted(self.cases.items()): | |
66 cw.enterBlock('case %s:' % makeSafe(key)) | |
67 | |
68 case.writeCases(cw) | |
69 cw.exitBlock() | |
70 if self.includeWhitespace: | |
71 cw.enterBlock('default:') | |
72 cw.enterBlock('if (TokenizerHelpers.isIdentifierStart(ch)) {') | |
73 cw.writeln('return this.finishIdentifier(ch);') | |
74 cw.exitBlock('} else if (TokenizerHelpers.isDigit(ch)) {') | |
75 cw.enterBlock() | |
76 cw.writeln('return this.finishNumber();') | |
77 cw.exitBlock('} else {') | |
78 cw.enterBlock() | |
79 cw.writeln(self.defaultReturn()) | |
80 cw.exitBlock('}') | |
81 else: | |
82 cw.writeln('default: ' + self.defaultReturn()) | |
83 cw.exitBlock('}') | |
84 | |
85 def writeWhitespace(self, cw): | |
86 cw.writeln('case 0: return _finishToken(TokenKind.END_OF_FILE);') | |
87 cw.enterBlock(r"case %s: case %s: case %s: case %s:" % | |
88 tuple([makeSafe(ch) for ch in ' \t\n\r'])) | |
89 cw.writeln('return finishWhitespace();') | |
90 cw.exitBlock() | |
91 | |
92 def computeCases(): | |
93 top = Case('', None, True) | |
94 for tok in tokens: | |
95 #print tok.text | |
96 if tok.text != '': | |
97 top.addCase(tok.text, tok) | |
98 return top | |
99 | |
100 cases = computeCases() | |
101 | |
102 TOKENIZER = ''' | |
103 /** A generated file that extends the hand coded methods in TokenizerBase. */ | |
104 class Tokenizer extends TokenizerBase { | |
105 | |
106 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0]) | |
107 : super(source, skipWhitespace, index); | |
108 | |
109 Token next() { | |
110 // keep track of our starting position | |
111 _startIndex = _index; | |
112 | |
113 if (_interpStack != null && _interpStack.depth == 0) { | |
114 var istack = _interpStack; | |
115 _interpStack = _interpStack.pop(); | |
116 if (istack.isMultiline) { | |
117 return finishMultilineString(istack.quote); | |
118 } else { | |
119 return finishStringBody(istack.quote); | |
120 } | |
121 } | |
122 | |
123 int ch; | |
124 %(cases)s | |
125 } | |
126 | |
127 %(extraMethods)s | |
128 } | |
129 | |
130 /** Static helper methods. */ | |
131 class TokenizerHelpers { | |
132 %(helperMethods)s | |
133 } | |
134 ''' | |
135 | |
136 | |
137 | |
138 def charAsInt(ch): | |
139 return '%d/*%r*/' % (ord(ch), ch) | |
140 | |
141 class CharTest: | |
142 def __init__(self, fromChar, toChar=None): | |
143 self.fromChar = fromChar | |
144 self.toChar = toChar | |
145 | |
146 def toCode(self): | |
147 if self.toChar is None: | |
148 return 'c == %s' % makeSafe(self.fromChar) | |
149 else: | |
150 return '(c >= %s && c <= %s)' % ( | |
151 makeSafe(self.fromChar), makeSafe(self.toChar)) | |
152 | |
153 class OrTest: | |
154 def __init__(self, *args): | |
155 self.tests = args | |
156 | |
157 def toCode(self): | |
158 return '(' + ' || '.join([test.toCode() for test in self.tests]) + ')' | |
159 | |
160 class ExplicitTest: | |
161 def __init__(self, text): | |
162 self.text = text | |
163 | |
164 def toCode(self): | |
165 return self.text | |
166 | |
167 | |
168 def writeClass(cw, name, test): | |
169 cw.enterBlock('static bool is%s(int c) {' % name) | |
170 cw.writeln('return %s;' % test.toCode()) | |
171 cw.exitBlock('}') | |
172 cw.writeln() | |
173 | |
174 # TODO(jimhug): if (_restMatches(_text, i0+1, 'ase')) would be good! | |
175 class LengthGroup: | |
176 def __init__(self, length): | |
177 self.length = length | |
178 self.kws = [] | |
179 | |
180 def add(self, kw): | |
181 self.kws.append(kw) | |
182 | |
183 def writeCode(self, cw): | |
184 cw.enterBlock('case %d:' % self.length) | |
185 self.writeTests(cw, self.kws) | |
186 cw.writeln('return TokenKind.IDENTIFIER;') | |
187 cw.exitBlock() | |
188 | |
189 | |
190 def writeTests(self, cw, kws, index=0): | |
191 if len(kws) == 1: | |
192 kw = kws[0].text | |
193 if index == len(kw): | |
194 cw.writeln('return TokenKind.%s;' % (kws[0].name)) | |
195 else: | |
196 clauses = [ | |
197 "_text.charCodeAt(%s) == %s" % ( | |
198 makeIndex('i0', i), makeSafe(kw[i])) | |
199 for i in range(index, len(kw))] | |
200 test = 'if (%s) return TokenKind.%s;' % ( | |
201 ' && '.join(clauses), kws[0].name) | |
202 cw.writeln(test) | |
203 else: | |
204 starts = {} | |
205 for kw in kws: | |
206 c0 = kw.text[index] | |
207 if not starts.has_key(c0): | |
208 starts[c0] = [] | |
209 starts[c0].append(kw) | |
210 | |
211 cw.writeln('ch = _text.charCodeAt(%s);' % makeIndex('i0', index)) | |
212 prefix = '' | |
213 for key, value in sorted(starts.items()): | |
214 cw.enterBlock('%sif (ch == %s) {' % (prefix, makeSafe(key))) | |
215 #cw.writeln(repr(value)) | |
216 self.writeTests(cw, value, index+1) | |
217 cw.exitBlock() | |
218 prefix = '} else ' | |
219 cw.writeln('}') | |
220 #cw.writeln(repr(kws)) | |
221 | |
222 def __str__(self): | |
223 return '%d: %r' % (self.length, self.kws) | |
224 | |
225 def makeIndex(index, offset): | |
226 if offset == 0: | |
227 return index | |
228 else: | |
229 return '%s+%d' % (index, offset) | |
230 | |
231 def writeHelperMethods(cw): | |
232 cw.enterBlock() | |
233 cw.writeln() | |
234 writeClass(cw, 'IdentifierStart', OrTest( | |
235 CharTest('a', 'z'), CharTest('A', 'Z'), CharTest('_'))) #TODO: CharTest('$') | |
236 writeClass(cw, 'Digit', CharTest('0', '9')) | |
237 writeClass(cw, 'HexDigit', OrTest( | |
238 ExplicitTest('isDigit(c)'), CharTest('a', 'f'), CharTest('A', 'F'))) | |
239 writeClass(cw, 'Whitespace', OrTest( | |
240 CharTest(' '), CharTest('\t'), CharTest('\n'), CharTest('\r'))) | |
241 writeClass(cw, 'IdentifierPart', OrTest( | |
242 ExplicitTest('isIdentifierStart(c)'), | |
243 ExplicitTest('isDigit(c)'), | |
244 CharTest('$'))) | |
245 # This is like IdentifierPart, but without $ | |
246 writeClass(cw, 'InterpIdentifierPart', OrTest( | |
247 ExplicitTest('isIdentifierStart(c)'), | |
248 ExplicitTest('isDigit(c)'))) | |
249 | |
250 def writeExtraMethods(cw): | |
251 lengths = {} | |
252 for kw in keywords: | |
253 l = len(kw.text) | |
254 if not lengths.has_key(l): | |
255 lengths[l] = LengthGroup(l) | |
256 lengths[l].add(kw) | |
257 | |
258 # TODO(jimhug): Consider merging this with the finishIdentifier code. | |
259 cw.enterBlock() | |
260 cw.enterBlock('int getIdentifierKind() {') | |
261 cw.writeln('final i0 = _startIndex;') | |
262 cw.writeln('int ch;') | |
263 cw.enterBlock('switch (_index - i0) {') | |
264 for key, value in sorted(lengths.items()): | |
265 value.writeCode(cw) | |
266 cw.writeln('default: return TokenKind.IDENTIFIER;') | |
267 cw.exitBlock('}') | |
268 cw.exitBlock('}') | |
269 | |
270 def makeSafe1(match): | |
271 return makeSafe(match.group(1)) | |
272 | |
273 def main(): | |
274 cw = CodeWriter(__file__) | |
275 cw._indent += 2; | |
276 cases.writeCases(cw) | |
277 casesCode = str(cw) | |
278 | |
279 cw = CodeWriter(__file__) | |
280 writeExtraMethods(cw) | |
281 extraMethods = str(cw) | |
282 | |
283 cw = CodeWriter(__file__) | |
284 writeHelperMethods(cw) | |
285 helperMethods = str(cw) | |
286 | |
287 out = open('tokenizer.g.dart', 'w') | |
288 out.write(HEADER % __file__) | |
289 pat = re.compile('@(.)', re.DOTALL) | |
290 text = pat.sub(makeSafe1, TOKENIZER) | |
291 out.write(text % { | |
292 'cases': casesCode, | |
293 'extraMethods': extraMethods, | |
294 'helperMethods': helperMethods }) | |
295 out.close() | |
296 | |
297 | |
298 if __name__ == '__main__': main() | |
OLD | NEW |