Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(188)

Side by Side Diff: frog/scripts/tokenizer_gen.py

Issue 10548047: Remove frog from the repository. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Move test and update apidoc.gyp. Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « frog/scripts/token_kind_gen.py ('k') | frog/scripts/tree_gen.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
3 # for details. All rights reserved. Use of this source code is governed by a
4 # BSD-style license that can be found in the LICENSE file.
5
6 '''Generates the Tokenizer class into tokenenizer.g.dart.'''
7
8 import re
9 from token_info import tokens, keywords
10 from codegen import CodeWriter, HEADER
11
12 def makeSafe(ch):
13 ch_s = ch
14 if ch in ' \t\n\r*/': ch_s = repr(ch)
15 return '%d/*%s*/' % (ord(ch), ch_s)
16
17
18 class Case:
19 def __init__(self, ch, token, includeWhitespace=False):
20 self.ch = ch
21 self.cases = {}
22 self.token = None
23 self.includeWhitespace = includeWhitespace
24 if len(ch) > 0:
25 self.cases[ch[0]] = Case(ch[1:], token)
26 else:
27 self.token = token
28
29 def addCase(self, ch, token):
30 if len(ch) == 0:
31 self.token = token
32 else:
33 searchChar = ch[0]
34 if self.cases.has_key(searchChar):
35 self.cases[searchChar].addCase(ch[1:], token)
36 else:
37 self.cases[searchChar] = Case(ch[1:], token)
38
39 def defaultReturn(self):
40 if self.token is not None:
41 return 'return %s;' % self.token.getFinishCode()
42 else:
43 return 'return _errorToken();'
44
45 def writeCases(self, cw):
46 ret = []
47 if len(self.cases) == 0:
48 cw.writeln(self.defaultReturn())
49 elif len(self.cases) < 4 and not self.includeWhitespace:
50 optElse = ''
51 for key, case in sorted(self.cases.items()):
52 cw.enterBlock('%sif (_maybeEatChar(%s)) {' % (optElse, makeSafe(key)))
53 case.writeCases(cw)
54 cw.exitBlock()
55 optElse = '} else '
56 cw.enterBlock('} else {')
57 cw.writeln(self.defaultReturn())
58
59 cw.exitBlock('}')
60 else:
61 cw.writeln('ch = _nextChar();')
62 cw.enterBlock('switch(ch) {')
63 if self.includeWhitespace:
64 self.writeWhitespace(cw)
65 for key, case in sorted(self.cases.items()):
66 cw.enterBlock('case %s:' % makeSafe(key))
67
68 case.writeCases(cw)
69 cw.exitBlock()
70 if self.includeWhitespace:
71 cw.enterBlock('default:')
72 cw.enterBlock('if (TokenizerHelpers.isIdentifierStart(ch)) {')
73 cw.writeln('return this.finishIdentifier(ch);')
74 cw.exitBlock('} else if (TokenizerHelpers.isDigit(ch)) {')
75 cw.enterBlock()
76 cw.writeln('return this.finishNumber();')
77 cw.exitBlock('} else {')
78 cw.enterBlock()
79 cw.writeln(self.defaultReturn())
80 cw.exitBlock('}')
81 else:
82 cw.writeln('default: ' + self.defaultReturn())
83 cw.exitBlock('}')
84
85 def writeWhitespace(self, cw):
86 cw.writeln('case 0: return _finishToken(TokenKind.END_OF_FILE);')
87 cw.enterBlock(r"case %s: case %s: case %s: case %s:" %
88 tuple([makeSafe(ch) for ch in ' \t\n\r']))
89 cw.writeln('return finishWhitespace();')
90 cw.exitBlock()
91
92 def computeCases():
93 top = Case('', None, True)
94 for tok in tokens:
95 #print tok.text
96 if tok.text != '':
97 top.addCase(tok.text, tok)
98 return top
99
100 cases = computeCases()
101
102 TOKENIZER = '''
103 /** A generated file that extends the hand coded methods in TokenizerBase. */
104 class Tokenizer extends TokenizerBase {
105
106 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0])
107 : super(source, skipWhitespace, index);
108
109 Token next() {
110 // keep track of our starting position
111 _startIndex = _index;
112
113 if (_interpStack != null && _interpStack.depth == 0) {
114 var istack = _interpStack;
115 _interpStack = _interpStack.pop();
116 if (istack.isMultiline) {
117 return finishMultilineString(istack.quote);
118 } else {
119 return finishStringBody(istack.quote);
120 }
121 }
122
123 int ch;
124 %(cases)s
125 }
126
127 %(extraMethods)s
128 }
129
130 /** Static helper methods. */
131 class TokenizerHelpers {
132 %(helperMethods)s
133 }
134 '''
135
136
137
138 def charAsInt(ch):
139 return '%d/*%r*/' % (ord(ch), ch)
140
141 class CharTest:
142 def __init__(self, fromChar, toChar=None):
143 self.fromChar = fromChar
144 self.toChar = toChar
145
146 def toCode(self):
147 if self.toChar is None:
148 return 'c == %s' % makeSafe(self.fromChar)
149 else:
150 return '(c >= %s && c <= %s)' % (
151 makeSafe(self.fromChar), makeSafe(self.toChar))
152
153 class OrTest:
154 def __init__(self, *args):
155 self.tests = args
156
157 def toCode(self):
158 return '(' + ' || '.join([test.toCode() for test in self.tests]) + ')'
159
160 class ExplicitTest:
161 def __init__(self, text):
162 self.text = text
163
164 def toCode(self):
165 return self.text
166
167
168 def writeClass(cw, name, test):
169 cw.enterBlock('static bool is%s(int c) {' % name)
170 cw.writeln('return %s;' % test.toCode())
171 cw.exitBlock('}')
172 cw.writeln()
173
174 # TODO(jimhug): if (_restMatches(_text, i0+1, 'ase')) would be good!
175 class LengthGroup:
176 def __init__(self, length):
177 self.length = length
178 self.kws = []
179
180 def add(self, kw):
181 self.kws.append(kw)
182
183 def writeCode(self, cw):
184 cw.enterBlock('case %d:' % self.length)
185 self.writeTests(cw, self.kws)
186 cw.writeln('return TokenKind.IDENTIFIER;')
187 cw.exitBlock()
188
189
190 def writeTests(self, cw, kws, index=0):
191 if len(kws) == 1:
192 kw = kws[0].text
193 if index == len(kw):
194 cw.writeln('return TokenKind.%s;' % (kws[0].name))
195 else:
196 clauses = [
197 "_text.charCodeAt(%s) == %s" % (
198 makeIndex('i0', i), makeSafe(kw[i]))
199 for i in range(index, len(kw))]
200 test = 'if (%s) return TokenKind.%s;' % (
201 ' && '.join(clauses), kws[0].name)
202 cw.writeln(test)
203 else:
204 starts = {}
205 for kw in kws:
206 c0 = kw.text[index]
207 if not starts.has_key(c0):
208 starts[c0] = []
209 starts[c0].append(kw)
210
211 cw.writeln('ch = _text.charCodeAt(%s);' % makeIndex('i0', index))
212 prefix = ''
213 for key, value in sorted(starts.items()):
214 cw.enterBlock('%sif (ch == %s) {' % (prefix, makeSafe(key)))
215 #cw.writeln(repr(value))
216 self.writeTests(cw, value, index+1)
217 cw.exitBlock()
218 prefix = '} else '
219 cw.writeln('}')
220 #cw.writeln(repr(kws))
221
222 def __str__(self):
223 return '%d: %r' % (self.length, self.kws)
224
225 def makeIndex(index, offset):
226 if offset == 0:
227 return index
228 else:
229 return '%s+%d' % (index, offset)
230
231 def writeHelperMethods(cw):
232 cw.enterBlock()
233 cw.writeln()
234 writeClass(cw, 'IdentifierStart', OrTest(
235 CharTest('a', 'z'), CharTest('A', 'Z'), CharTest('_'))) #TODO: CharTest('$')
236 writeClass(cw, 'Digit', CharTest('0', '9'))
237 writeClass(cw, 'HexDigit', OrTest(
238 ExplicitTest('isDigit(c)'), CharTest('a', 'f'), CharTest('A', 'F')))
239 writeClass(cw, 'Whitespace', OrTest(
240 CharTest(' '), CharTest('\t'), CharTest('\n'), CharTest('\r')))
241 writeClass(cw, 'IdentifierPart', OrTest(
242 ExplicitTest('isIdentifierStart(c)'),
243 ExplicitTest('isDigit(c)'),
244 CharTest('$')))
245 # This is like IdentifierPart, but without $
246 writeClass(cw, 'InterpIdentifierPart', OrTest(
247 ExplicitTest('isIdentifierStart(c)'),
248 ExplicitTest('isDigit(c)')))
249
250 def writeExtraMethods(cw):
251 lengths = {}
252 for kw in keywords:
253 l = len(kw.text)
254 if not lengths.has_key(l):
255 lengths[l] = LengthGroup(l)
256 lengths[l].add(kw)
257
258 # TODO(jimhug): Consider merging this with the finishIdentifier code.
259 cw.enterBlock()
260 cw.enterBlock('int getIdentifierKind() {')
261 cw.writeln('final i0 = _startIndex;')
262 cw.writeln('int ch;')
263 cw.enterBlock('switch (_index - i0) {')
264 for key, value in sorted(lengths.items()):
265 value.writeCode(cw)
266 cw.writeln('default: return TokenKind.IDENTIFIER;')
267 cw.exitBlock('}')
268 cw.exitBlock('}')
269
270 def makeSafe1(match):
271 return makeSafe(match.group(1))
272
273 def main():
274 cw = CodeWriter(__file__)
275 cw._indent += 2;
276 cases.writeCases(cw)
277 casesCode = str(cw)
278
279 cw = CodeWriter(__file__)
280 writeExtraMethods(cw)
281 extraMethods = str(cw)
282
283 cw = CodeWriter(__file__)
284 writeHelperMethods(cw)
285 helperMethods = str(cw)
286
287 out = open('tokenizer.g.dart', 'w')
288 out.write(HEADER % __file__)
289 pat = re.compile('@(.)', re.DOTALL)
290 text = pat.sub(makeSafe1, TOKENIZER)
291 out.write(text % {
292 'cases': casesCode,
293 'extraMethods': extraMethods,
294 'helperMethods': helperMethods })
295 out.close()
296
297
298 if __name__ == '__main__': main()
OLDNEW
« no previous file with comments | « frog/scripts/token_kind_gen.py ('k') | frog/scripts/tree_gen.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698