OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 // Generated by scripts/tokenizer_gen.py. | |
5 | |
6 | |
7 interface TokenSource { | |
8 Token next(); | |
9 } | |
10 | |
11 class InterpStack { | |
12 InterpStack next, previous; | |
13 final int quote; | |
14 final bool isMultiline; | |
15 int depth; | |
16 | |
17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1; | |
18 | |
19 InterpStack pop() { | |
20 return this.previous; | |
21 } | |
22 | |
23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) { | |
24 var newStack = new InterpStack(stack, quote, isMultiline); | |
25 if (stack != null) newStack.previous = stack; | |
26 return newStack; | |
27 } | |
28 } | |
29 | |
30 /** | |
31 * The base class for our tokenizer. The hand coded parts are in this file, with | |
32 * the generated parts in the subclass Tokenizer. | |
33 */ | |
34 class TokenizerBase extends TokenizerHelpers implements TokenSource { | |
35 final SourceFile _source; | |
36 final bool _skipWhitespace; | |
37 String _text; | |
38 | |
39 int _index; | |
40 int _startIndex; | |
41 | |
42 /** Keeps track of string interpolation state. */ | |
43 InterpStack _interpStack; | |
44 | |
45 TokenizerBase(this._source, this._skipWhitespace, [index = 0]) | |
46 : this._index = index { | |
47 _text = _source.text; | |
48 } | |
49 | |
50 abstract Token next(); | |
51 abstract int getIdentifierKind(); | |
52 | |
53 int _nextChar() { | |
54 if (_index < _text.length) { | |
55 return _text.charCodeAt(_index++); | |
56 } else { | |
57 return 0; | |
58 } | |
59 } | |
60 | |
61 int _peekChar() { | |
62 if (_index < _text.length) { | |
63 return _text.charCodeAt(_index); | |
64 } else { | |
65 return 0; | |
66 } | |
67 } | |
68 | |
69 bool _maybeEatChar(int ch) { | |
70 if (_index < _text.length) { | |
71 if (_text.charCodeAt(_index) == ch) { | |
72 _index++; | |
73 return true; | |
74 } else { | |
75 return false; | |
76 } | |
77 } else { | |
78 return false; | |
79 } | |
80 } | |
81 | |
82 String _tokenText() { | |
83 if (_index < _text.length) { | |
84 return _text.substring(_startIndex, _index); | |
85 } else { | |
86 return _text.substring(_startIndex, _text.length); | |
87 } | |
88 } | |
89 | |
90 Token _finishToken(int kind) { | |
91 return new Token(kind, _source, _startIndex, _index); | |
92 } | |
93 | |
94 Token _errorToken([String message = null]) { | |
95 return new ErrorToken( | |
96 TokenKind.ERROR, _source, _startIndex, _index, message); | |
97 } | |
98 | |
99 Token finishWhitespace() { | |
100 _index--; | |
101 while (_index < _text.length) { | |
102 final ch = _text.charCodeAt(_index++); | |
103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) { | |
104 // do nothing | |
105 } else if (ch == 10/*'\n'*/) { | |
106 if (!_skipWhitespace) { | |
107 return _finishToken(TokenKind.WHITESPACE); // note the newline? | |
108 } | |
109 } else { | |
110 _index--; | |
111 if (_skipWhitespace) { | |
112 return next(); | |
113 } else { | |
114 return _finishToken(TokenKind.WHITESPACE); | |
115 } | |
116 } | |
117 | |
118 } | |
119 return _finishToken(TokenKind.END_OF_FILE); | |
120 } | |
121 | |
122 Token finishHashBang() { | |
123 while (true) { | |
124 int ch = _nextChar(); | |
125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { | |
126 return _finishToken(TokenKind.HASHBANG); | |
127 } | |
128 } | |
129 } | |
130 | |
131 Token finishSingleLineComment() { | |
132 while (true) { | |
133 int ch = _nextChar(); | |
134 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { | |
135 if (_skipWhitespace) { | |
136 return next(); | |
137 } else { | |
138 return _finishToken(TokenKind.COMMENT); | |
139 } | |
140 } | |
141 } | |
142 } | |
143 | |
144 Token finishMultiLineComment() { | |
145 int nesting = 1; | |
146 do { | |
147 int ch = _nextChar(); | |
148 if (ch == 0) { | |
149 return _errorToken(); | |
150 } else if (ch == 42/*'*'*/) { | |
151 if (_maybeEatChar(47/*'/'*/)) { | |
152 nesting--; | |
153 } | |
154 } else if (ch == 47/*'/'*/) { | |
155 if (_maybeEatChar(42/*'*'*/)) { | |
156 nesting++; | |
157 } | |
158 } | |
159 } while (nesting > 0); | |
160 | |
161 if (_skipWhitespace) { | |
162 return next(); | |
163 } else { | |
164 return _finishToken(TokenKind.COMMENT); | |
165 } | |
166 } | |
167 | |
168 void eatDigits() { | |
169 while (_index < _text.length) { | |
170 if (TokenizerHelpers.isDigit(_text.charCodeAt(_index))) { | |
171 _index++; | |
172 } else { | |
173 return; | |
174 } | |
175 } | |
176 } | |
177 | |
178 static int _hexDigit(int c) { | |
179 if(c >= 48/*0*/ && c <= 57/*9*/) { | |
180 return c - 48; | |
181 } else if (c >= 97/*a*/ && c <= 102/*f*/) { | |
182 return c - 87; | |
183 } else if (c >= 65/*A*/ && c <= 70/*F*/) { | |
184 return c - 55; | |
185 } else { | |
186 return -1; | |
187 } | |
188 } | |
189 | |
190 int readHex([int hexLength]) { | |
191 int maxIndex; | |
192 if (hexLength === null) { | |
193 maxIndex = _text.length - 1; | |
194 } else { | |
195 // TODO(jimhug): What if this is too long? | |
196 maxIndex = _index + hexLength; | |
197 if (maxIndex >= _text.length) return -1; | |
198 } | |
199 var result = 0; | |
200 while (_index < maxIndex) { | |
201 final digit = _hexDigit(_text.charCodeAt(_index)); | |
202 if (digit == -1) { | |
203 if (hexLength === null) { | |
204 return result; | |
205 } else { | |
206 return -1; | |
207 } | |
208 } | |
209 _hexDigit(_text.charCodeAt(_index)); | |
210 // Multiply by 16 rather than shift by 4 since that will result in a | |
211 // correct value for numbers that exceed the 32 bit precision of JS | |
212 // 'integers'. | |
213 // TODO: Figure out a better solution to integer truncation. Issue 638. | |
214 result = (result * 16) + digit; | |
215 _index++; | |
216 } | |
217 | |
218 return result; | |
219 } | |
220 | |
221 Token finishHex() { | |
222 final value = readHex(); | |
223 return new LiteralToken(TokenKind.HEX_INTEGER, _source, _startIndex, | |
224 _index, value); | |
225 } | |
226 | |
227 Token finishNumber() { | |
228 eatDigits(); | |
229 | |
230 if (_peekChar() == 46/*.*/) { | |
231 // Handle the case of 1.toString(). | |
232 _nextChar(); | |
233 if (TokenizerHelpers.isDigit(_peekChar())) { | |
234 eatDigits(); | |
235 return finishNumberExtra(TokenKind.DOUBLE); | |
236 } else { | |
237 _index--; | |
238 } | |
239 } | |
240 | |
241 return finishNumberExtra(TokenKind.INTEGER); | |
242 } | |
243 | |
244 Token finishNumberExtra(int kind) { | |
245 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) { | |
246 kind = TokenKind.DOUBLE; | |
247 _maybeEatChar(45/*-*/); | |
248 _maybeEatChar(43/*+*/); | |
249 eatDigits(); | |
250 } | |
251 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) { | |
252 _nextChar(); | |
253 return _errorToken("illegal character in number"); | |
254 } | |
255 | |
256 return _finishToken(kind); | |
257 } | |
258 | |
259 Token _makeStringToken(List<int> buf, bool isPart) { | |
260 final s = new String.fromCharCodes(buf); | |
261 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; | |
262 return new LiteralToken(kind, _source, _startIndex, _index, s); | |
263 } | |
264 | |
265 Token _makeRawStringToken(bool isMultiline) { | |
266 String s; | |
267 if (isMultiline) { | |
268 // Skip initial newline in multiline strings | |
269 int start = _startIndex + 4; | |
270 if (_source.text[start] == '\n') start++; | |
271 s = _source.text.substring(start, _index - 3); | |
272 } else { | |
273 s = _source.text.substring(_startIndex + 2, _index - 1); | |
274 } | |
275 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s); | |
276 } | |
277 | |
278 Token finishMultilineString(int quote) { | |
279 var buf = <int>[]; | |
280 while (true) { | |
281 int ch = _nextChar(); | |
282 if (ch == 0) { | |
283 return _errorToken(); | |
284 } else if (ch == quote) { | |
285 if (_maybeEatChar(quote)) { | |
286 if (_maybeEatChar(quote)) { | |
287 return _makeStringToken(buf, false); | |
288 } | |
289 buf.add(quote); | |
290 } | |
291 buf.add(quote); | |
292 } else if (ch == 36/*$*/) { | |
293 // start of string interp | |
294 _interpStack = InterpStack.push(_interpStack, quote, true); | |
295 return _makeStringToken(buf, true); | |
296 } else if (ch == 92/*\*/) { | |
297 var escapeVal = readEscapeSequence(); | |
298 if (escapeVal == -1) { | |
299 return _errorToken("invalid hex escape sequence"); | |
300 } else { | |
301 buf.add(escapeVal); | |
302 } | |
303 } else { | |
304 buf.add(ch); | |
305 } | |
306 } | |
307 } | |
308 | |
309 Token _finishOpenBrace() { | |
310 if (_interpStack != null) { | |
311 if (_interpStack.depth == -1) { | |
312 _interpStack.depth = 1; | |
313 } else { | |
314 assert(_interpStack.depth >= 0); | |
315 _interpStack.depth += 1; | |
316 } | |
317 } | |
318 return _finishToken(TokenKind.LBRACE); | |
319 } | |
320 | |
321 Token _finishCloseBrace() { | |
322 if (_interpStack != null) { | |
323 _interpStack.depth -= 1; | |
324 assert(_interpStack.depth >= 0); | |
325 } | |
326 return _finishToken(TokenKind.RBRACE); | |
327 } | |
328 | |
329 Token finishString(int quote) { | |
330 if (_maybeEatChar(quote)) { | |
331 if (_maybeEatChar(quote)) { | |
332 // skip an initial newline | |
333 _maybeEatChar(10/*'\n'*/); | |
334 return finishMultilineString(quote); | |
335 } else { | |
336 return _makeStringToken(new List<int>(), false); | |
337 } | |
338 } | |
339 return finishStringBody(quote); | |
340 } | |
341 | |
342 Token finishRawString(int quote) { | |
343 if (_maybeEatChar(quote)) { | |
344 if (_maybeEatChar(quote)) { | |
345 return finishMultilineRawString(quote); | |
346 } else { | |
347 return _makeStringToken(<int>[], false); | |
348 } | |
349 } | |
350 while (true) { | |
351 int ch = _nextChar(); | |
352 if (ch == quote) { | |
353 return _makeRawStringToken(false); | |
354 } else if (ch == 0) { | |
355 return _errorToken(); | |
356 } | |
357 } | |
358 } | |
359 | |
360 Token finishMultilineRawString(int quote) { | |
361 while (true) { | |
362 int ch = _nextChar(); | |
363 if (ch == 0) { | |
364 return _errorToken(); | |
365 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { | |
366 return _makeRawStringToken(true); | |
367 } | |
368 } | |
369 } | |
370 | |
371 Token finishStringBody(int quote) { | |
372 var buf = new List<int>(); | |
373 while (true) { | |
374 int ch = _nextChar(); | |
375 if (ch == quote) { | |
376 return _makeStringToken(buf, false); | |
377 } else if (ch == 36/*$*/) { | |
378 // start of string interp | |
379 _interpStack = InterpStack.push(_interpStack, quote, false); | |
380 return _makeStringToken(buf, true); | |
381 } else if (ch == 0) { | |
382 return _errorToken(); | |
383 } else if (ch == 92/*\*/) { | |
384 var escapeVal = readEscapeSequence(); | |
385 if (escapeVal == -1) { | |
386 return _errorToken("invalid hex escape sequence"); | |
387 } else { | |
388 buf.add(escapeVal); | |
389 } | |
390 } else { | |
391 buf.add(ch); | |
392 } | |
393 } | |
394 } | |
395 | |
396 int readEscapeSequence() { | |
397 final ch = _nextChar(); | |
398 int hexValue; | |
399 switch (ch) { | |
400 case 110/*n*/: | |
401 return 0x0a/*'\n'*/; | |
402 case 114/*r*/: | |
403 return 0x0d/*'\r'*/; | |
404 case 102/*f*/: | |
405 return 0x0c/*'\f'*/; | |
406 case 98/*b*/: | |
407 return 0x08/*'\b'*/; | |
408 case 116/*t*/: | |
409 return 0x09/*'\t'*/; | |
410 case 118/*v*/: | |
411 return 0x0b/*'\v'*/; | |
412 case 120/*x*/: | |
413 hexValue = readHex(2); | |
414 break; | |
415 case 117/*u*/: | |
416 if (_maybeEatChar(123/*{*/)) { | |
417 hexValue = readHex(); | |
418 if (!_maybeEatChar(125/*}*/)) { | |
419 return -1; | |
420 } else { | |
421 break; | |
422 } | |
423 } else { | |
424 hexValue = readHex(4); | |
425 break; | |
426 } | |
427 default: return ch; | |
428 } | |
429 | |
430 if (hexValue == -1) return -1; | |
431 | |
432 // According to the Unicode standard the high and low surrogate halves | |
433 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF | |
434 // are not legal Unicode values. | |
435 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { | |
436 return hexValue; | |
437 } else if (hexValue <= 0x10FFFF){ | |
438 world.fatal('unicode values greater than 2 bytes not implemented yet'); | |
439 return -1; | |
440 } else { | |
441 return -1; | |
442 } | |
443 } | |
444 | |
445 Token finishDot() { | |
446 if (TokenizerHelpers.isDigit(_peekChar())) { | |
447 eatDigits(); | |
448 return finishNumberExtra(TokenKind.DOUBLE); | |
449 } else { | |
450 return _finishToken(TokenKind.DOT); | |
451 } | |
452 } | |
453 | |
454 Token finishIdentifier(int ch) { | |
455 if (_interpStack != null && _interpStack.depth == -1) { | |
456 _interpStack.depth = 0; | |
457 if (ch == 36/*$*/) { | |
458 return _errorToken( | |
459 @"illegal character after $ in string interpolation"); | |
460 } | |
461 while (_index < _text.length) { | |
462 if (!TokenizerHelpers.isInterpIdentifierPart(_text.charCodeAt(_index++))
) { | |
463 _index--; | |
464 break; | |
465 } | |
466 } | |
467 } else { | |
468 while (_index < _text.length) { | |
469 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { | |
470 _index--; | |
471 break; | |
472 } | |
473 } | |
474 } | |
475 int kind = getIdentifierKind(); | |
476 if (kind == TokenKind.IDENTIFIER) { | |
477 return _finishToken(TokenKind.IDENTIFIER); | |
478 } else { | |
479 return _finishToken(kind); | |
480 } | |
481 } | |
482 } | |
OLD | NEW |