| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 3 # for details. All rights reserved. Use of this source code is governed by a | |
| 4 # BSD-style license that can be found in the LICENSE file. | |
| 5 | |
| 6 '''Generates the Tokenizer class into tokenenizer.g.dart.''' | |
| 7 | |
| 8 import re | |
| 9 from token_info import tokens, keywords | |
| 10 from codegen import CodeWriter, HEADER | |
| 11 | |
| 12 def makeSafe(ch): | |
| 13 ch_s = ch | |
| 14 if ch in ' \t\n\r*/': ch_s = repr(ch) | |
| 15 return '%d/*%s*/' % (ord(ch), ch_s) | |
| 16 | |
| 17 | |
| 18 class Case: | |
| 19 def __init__(self, ch, token, includeWhitespace=False): | |
| 20 self.ch = ch | |
| 21 self.cases = {} | |
| 22 self.token = None | |
| 23 self.includeWhitespace = includeWhitespace | |
| 24 if len(ch) > 0: | |
| 25 self.cases[ch[0]] = Case(ch[1:], token) | |
| 26 else: | |
| 27 self.token = token | |
| 28 | |
| 29 def addCase(self, ch, token): | |
| 30 if len(ch) == 0: | |
| 31 self.token = token | |
| 32 else: | |
| 33 searchChar = ch[0] | |
| 34 if self.cases.has_key(searchChar): | |
| 35 self.cases[searchChar].addCase(ch[1:], token) | |
| 36 else: | |
| 37 self.cases[searchChar] = Case(ch[1:], token) | |
| 38 | |
| 39 def defaultReturn(self): | |
| 40 if self.token is not None: | |
| 41 return 'return %s;' % self.token.getFinishCode() | |
| 42 else: | |
| 43 return 'return _errorToken();' | |
| 44 | |
| 45 def writeCases(self, cw): | |
| 46 ret = [] | |
| 47 if len(self.cases) == 0: | |
| 48 cw.writeln(self.defaultReturn()) | |
| 49 elif len(self.cases) < 4 and not self.includeWhitespace: | |
| 50 optElse = '' | |
| 51 for key, case in sorted(self.cases.items()): | |
| 52 cw.enterBlock('%sif (_maybeEatChar(%s)) {' % (optElse, makeSafe(key))) | |
| 53 case.writeCases(cw) | |
| 54 cw.exitBlock() | |
| 55 optElse = '} else ' | |
| 56 cw.enterBlock('} else {') | |
| 57 cw.writeln(self.defaultReturn()) | |
| 58 | |
| 59 cw.exitBlock('}') | |
| 60 else: | |
| 61 cw.writeln('ch = _nextChar();') | |
| 62 cw.enterBlock('switch(ch) {') | |
| 63 if self.includeWhitespace: | |
| 64 self.writeWhitespace(cw) | |
| 65 for key, case in sorted(self.cases.items()): | |
| 66 cw.enterBlock('case %s:' % makeSafe(key)) | |
| 67 | |
| 68 case.writeCases(cw) | |
| 69 cw.exitBlock() | |
| 70 if self.includeWhitespace: | |
| 71 cw.enterBlock('default:') | |
| 72 cw.enterBlock('if (TokenizerHelpers.isIdentifierStart(ch)) {') | |
| 73 cw.writeln('return this.finishIdentifier(ch);') | |
| 74 cw.exitBlock('} else if (TokenizerHelpers.isDigit(ch)) {') | |
| 75 cw.enterBlock() | |
| 76 cw.writeln('return this.finishNumber();') | |
| 77 cw.exitBlock('} else {') | |
| 78 cw.enterBlock() | |
| 79 cw.writeln(self.defaultReturn()) | |
| 80 cw.exitBlock('}') | |
| 81 else: | |
| 82 cw.writeln('default: ' + self.defaultReturn()) | |
| 83 cw.exitBlock('}') | |
| 84 | |
| 85 def writeWhitespace(self, cw): | |
| 86 cw.writeln('case 0: return _finishToken(TokenKind.END_OF_FILE);') | |
| 87 cw.enterBlock(r"case %s: case %s: case %s: case %s:" % | |
| 88 tuple([makeSafe(ch) for ch in ' \t\n\r'])) | |
| 89 cw.writeln('return finishWhitespace();') | |
| 90 cw.exitBlock() | |
| 91 | |
| 92 def computeCases(): | |
| 93 top = Case('', None, True) | |
| 94 for tok in tokens: | |
| 95 #print tok.text | |
| 96 if tok.text != '': | |
| 97 top.addCase(tok.text, tok) | |
| 98 return top | |
| 99 | |
| 100 cases = computeCases() | |
| 101 | |
| 102 TOKENIZER = ''' | |
| 103 /** A generated file that extends the hand coded methods in TokenizerBase. */ | |
| 104 class Tokenizer extends TokenizerBase { | |
| 105 | |
| 106 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0]) | |
| 107 : super(source, skipWhitespace, index); | |
| 108 | |
| 109 Token next() { | |
| 110 // keep track of our starting position | |
| 111 _startIndex = _index; | |
| 112 | |
| 113 if (_interpStack != null && _interpStack.depth == 0) { | |
| 114 var istack = _interpStack; | |
| 115 _interpStack = _interpStack.pop(); | |
| 116 if (istack.isMultiline) { | |
| 117 return finishMultilineString(istack.quote); | |
| 118 } else { | |
| 119 return finishStringBody(istack.quote); | |
| 120 } | |
| 121 } | |
| 122 | |
| 123 int ch; | |
| 124 %(cases)s | |
| 125 } | |
| 126 | |
| 127 %(extraMethods)s | |
| 128 } | |
| 129 | |
| 130 /** Static helper methods. */ | |
| 131 class TokenizerHelpers { | |
| 132 %(helperMethods)s | |
| 133 } | |
| 134 ''' | |
| 135 | |
| 136 | |
| 137 | |
| 138 def charAsInt(ch): | |
| 139 return '%d/*%r*/' % (ord(ch), ch) | |
| 140 | |
| 141 class CharTest: | |
| 142 def __init__(self, fromChar, toChar=None): | |
| 143 self.fromChar = fromChar | |
| 144 self.toChar = toChar | |
| 145 | |
| 146 def toCode(self): | |
| 147 if self.toChar is None: | |
| 148 return 'c == %s' % makeSafe(self.fromChar) | |
| 149 else: | |
| 150 return '(c >= %s && c <= %s)' % ( | |
| 151 makeSafe(self.fromChar), makeSafe(self.toChar)) | |
| 152 | |
| 153 class OrTest: | |
| 154 def __init__(self, *args): | |
| 155 self.tests = args | |
| 156 | |
| 157 def toCode(self): | |
| 158 return '(' + ' || '.join([test.toCode() for test in self.tests]) + ')' | |
| 159 | |
| 160 class ExplicitTest: | |
| 161 def __init__(self, text): | |
| 162 self.text = text | |
| 163 | |
| 164 def toCode(self): | |
| 165 return self.text | |
| 166 | |
| 167 | |
| 168 def writeClass(cw, name, test): | |
| 169 cw.enterBlock('static bool is%s(int c) {' % name) | |
| 170 cw.writeln('return %s;' % test.toCode()) | |
| 171 cw.exitBlock('}') | |
| 172 cw.writeln() | |
| 173 | |
| 174 # TODO(jimhug): if (_restMatches(_text, i0+1, 'ase')) would be good! | |
| 175 class LengthGroup: | |
| 176 def __init__(self, length): | |
| 177 self.length = length | |
| 178 self.kws = [] | |
| 179 | |
| 180 def add(self, kw): | |
| 181 self.kws.append(kw) | |
| 182 | |
| 183 def writeCode(self, cw): | |
| 184 cw.enterBlock('case %d:' % self.length) | |
| 185 self.writeTests(cw, self.kws) | |
| 186 cw.writeln('return TokenKind.IDENTIFIER;') | |
| 187 cw.exitBlock() | |
| 188 | |
| 189 | |
| 190 def writeTests(self, cw, kws, index=0): | |
| 191 if len(kws) == 1: | |
| 192 kw = kws[0].text | |
| 193 if index == len(kw): | |
| 194 cw.writeln('return TokenKind.%s;' % (kws[0].name)) | |
| 195 else: | |
| 196 clauses = [ | |
| 197 "_text.charCodeAt(%s) == %s" % ( | |
| 198 makeIndex('i0', i), makeSafe(kw[i])) | |
| 199 for i in range(index, len(kw))] | |
| 200 test = 'if (%s) return TokenKind.%s;' % ( | |
| 201 ' && '.join(clauses), kws[0].name) | |
| 202 cw.writeln(test) | |
| 203 else: | |
| 204 starts = {} | |
| 205 for kw in kws: | |
| 206 c0 = kw.text[index] | |
| 207 if not starts.has_key(c0): | |
| 208 starts[c0] = [] | |
| 209 starts[c0].append(kw) | |
| 210 | |
| 211 cw.writeln('ch = _text.charCodeAt(%s);' % makeIndex('i0', index)) | |
| 212 prefix = '' | |
| 213 for key, value in sorted(starts.items()): | |
| 214 cw.enterBlock('%sif (ch == %s) {' % (prefix, makeSafe(key))) | |
| 215 #cw.writeln(repr(value)) | |
| 216 self.writeTests(cw, value, index+1) | |
| 217 cw.exitBlock() | |
| 218 prefix = '} else ' | |
| 219 cw.writeln('}') | |
| 220 #cw.writeln(repr(kws)) | |
| 221 | |
| 222 def __str__(self): | |
| 223 return '%d: %r' % (self.length, self.kws) | |
| 224 | |
| 225 def makeIndex(index, offset): | |
| 226 if offset == 0: | |
| 227 return index | |
| 228 else: | |
| 229 return '%s+%d' % (index, offset) | |
| 230 | |
| 231 def writeHelperMethods(cw): | |
| 232 cw.enterBlock() | |
| 233 cw.writeln() | |
| 234 writeClass(cw, 'IdentifierStart', OrTest( | |
| 235 CharTest('a', 'z'), CharTest('A', 'Z'), CharTest('_'))) #TODO: CharTest('$') | |
| 236 writeClass(cw, 'Digit', CharTest('0', '9')) | |
| 237 writeClass(cw, 'HexDigit', OrTest( | |
| 238 ExplicitTest('isDigit(c)'), CharTest('a', 'f'), CharTest('A', 'F'))) | |
| 239 writeClass(cw, 'Whitespace', OrTest( | |
| 240 CharTest(' '), CharTest('\t'), CharTest('\n'), CharTest('\r'))) | |
| 241 writeClass(cw, 'IdentifierPart', OrTest( | |
| 242 ExplicitTest('isIdentifierStart(c)'), | |
| 243 ExplicitTest('isDigit(c)'), | |
| 244 CharTest('$'))) | |
| 245 # This is like IdentifierPart, but without $ | |
| 246 writeClass(cw, 'InterpIdentifierPart', OrTest( | |
| 247 ExplicitTest('isIdentifierStart(c)'), | |
| 248 ExplicitTest('isDigit(c)'))) | |
| 249 | |
| 250 def writeExtraMethods(cw): | |
| 251 lengths = {} | |
| 252 for kw in keywords: | |
| 253 l = len(kw.text) | |
| 254 if not lengths.has_key(l): | |
| 255 lengths[l] = LengthGroup(l) | |
| 256 lengths[l].add(kw) | |
| 257 | |
| 258 # TODO(jimhug): Consider merging this with the finishIdentifier code. | |
| 259 cw.enterBlock() | |
| 260 cw.enterBlock('int getIdentifierKind() {') | |
| 261 cw.writeln('final i0 = _startIndex;') | |
| 262 cw.writeln('int ch;') | |
| 263 cw.enterBlock('switch (_index - i0) {') | |
| 264 for key, value in sorted(lengths.items()): | |
| 265 value.writeCode(cw) | |
| 266 cw.writeln('default: return TokenKind.IDENTIFIER;') | |
| 267 cw.exitBlock('}') | |
| 268 cw.exitBlock('}') | |
| 269 | |
| 270 def makeSafe1(match): | |
| 271 return makeSafe(match.group(1)) | |
| 272 | |
| 273 def main(): | |
| 274 cw = CodeWriter(__file__) | |
| 275 cw._indent += 2; | |
| 276 cases.writeCases(cw) | |
| 277 casesCode = str(cw) | |
| 278 | |
| 279 cw = CodeWriter(__file__) | |
| 280 writeExtraMethods(cw) | |
| 281 extraMethods = str(cw) | |
| 282 | |
| 283 cw = CodeWriter(__file__) | |
| 284 writeHelperMethods(cw) | |
| 285 helperMethods = str(cw) | |
| 286 | |
| 287 out = open('tokenizer.g.dart', 'w') | |
| 288 out.write(HEADER % __file__) | |
| 289 pat = re.compile('@(.)', re.DOTALL) | |
| 290 text = pat.sub(makeSafe1, TOKENIZER) | |
| 291 out.write(text % { | |
| 292 'cases': casesCode, | |
| 293 'extraMethods': extraMethods, | |
| 294 'helperMethods': helperMethods }) | |
| 295 out.close() | |
| 296 | |
| 297 | |
| 298 if __name__ == '__main__': main() | |
| OLD | NEW |