Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: frog/leg/scanner/scanner.dart

Issue 9873021: Move frog/leg to lib/compiler/implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « frog/leg/scanner/partial_parser.dart ('k') | frog/leg/scanner/scanner_bench.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 interface Scanner {
6 Token tokenize();
7 }
8
9 /**
10 * Common base class for a Dart scanner.
11 */
12 class AbstractScanner<T> implements Scanner {
13 abstract int advance();
14 abstract int nextByte();
15 abstract int peek();
16 abstract int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);
17 abstract void appendPrecenceToken(PrecedenceInfo info);
18 abstract void appendStringToken(PrecedenceInfo info, String value);
19 abstract void appendByteStringToken(PrecedenceInfo info, T value);
20 abstract void appendKeywordToken(Keyword keyword);
21 abstract void appendWhiteSpace(int next);
22 abstract void appendEofToken();
23 abstract T asciiString(int start, int offset);
24 abstract T utf8String(int start, int offset);
25 abstract Token firstToken();
26 abstract void beginToken();
27 abstract void addToCharOffset(int offset);
28 abstract int get charOffset();
29 abstract int get byteOffset();
30 abstract void appendBeginGroup(PrecedenceInfo info, String value);
31 abstract int appendEndGroup(PrecedenceInfo info, String value, int openKind);
32 abstract void appendGt(PrecedenceInfo info, String value);
33 abstract void appendGtGt(PrecedenceInfo info, String value);
34 abstract void appendGtGtGt(PrecedenceInfo info, String value);
35 abstract void discardOpenLt();
36
37 // TODO(ahe): Move this class to implementation.
38
39 Token tokenize() {
40 int next = advance();
41 while (next !== $EOF) {
42 next = bigSwitch(next);
43 }
44 appendEofToken();
45 return firstToken();
46 }
47
48 int bigSwitch(int next) {
49 beginToken();
50 if (next === $TAB || next === $LF || next === $CR || next === $SPACE) {
51 appendWhiteSpace(next);
52 return advance();
53 }
54
55 if ($a <= next && next <= $z) {
56 return tokenizeKeywordOrIdentifier(next, true);
57 }
58
59 if (($A <= next && next <= $Z) || next === $_ || next === $$) {
60 return tokenizeIdentifier(next, byteOffset, true);
61 }
62
63 if (next === $LT) {
64 return tokenizeLessThan(next);
65 }
66
67 if (next === $GT) {
68 return tokenizeGreaterThan(next);
69 }
70
71 if (next === $EQ) {
72 return tokenizeEquals(next);
73 }
74
75 if (next === $BANG) {
76 return tokenizeExclamation(next);
77 }
78
79 if (next === $PLUS) {
80 return tokenizePlus(next);
81 }
82
83 if (next === $MINUS) {
84 return tokenizeMinus(next);
85 }
86
87 if (next === $STAR) {
88 return tokenizeMultiply(next);
89 }
90
91 if (next === $PERCENT) {
92 return tokenizePercent(next);
93 }
94
95 if (next === $AMPERSAND) {
96 return tokenizeAmpersand(next);
97 }
98
99 if (next === $BAR) {
100 return tokenizeBar(next);
101 }
102
103 if (next === $CARET) {
104 return tokenizeCaret(next);
105 }
106
107 if (next === $OPEN_SQUARE_BRACKET) {
108 return tokenizeOpenSquareBracket(next);
109 }
110
111 if (next === $TILDE) {
112 return tokenizeTilde(next);
113 }
114
115 if (next === $BACKSLASH) {
116 appendPrecenceToken(BACKSLASH_INFO);
117 return advance();
118 }
119
120 if (next === $HASH) {
121 return tokenizeTag(next);
122 }
123
124 if (next === $OPEN_PAREN) {
125 appendBeginGroup(OPEN_PAREN_INFO, "(");
126 return advance();
127 }
128
129 if (next === $CLOSE_PAREN) {
130 return appendEndGroup(CLOSE_PAREN_INFO, ")", OPEN_PAREN_TOKEN);
131 }
132
133 if (next === $COMMA) {
134 appendPrecenceToken(COMMA_INFO);
135 return advance();
136 }
137
138 if (next === $COLON) {
139 appendPrecenceToken(COLON_INFO);
140 return advance();
141 }
142
143 if (next === $SEMICOLON) {
144 appendPrecenceToken(SEMICOLON_INFO);
145 discardOpenLt();
146 return advance();
147 }
148
149 if (next === $QUESTION) {
150 appendPrecenceToken(QUESTION_INFO);
151 return advance();
152 }
153
154 if (next === $CLOSE_SQUARE_BRACKET) {
155 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, "]",
156 OPEN_SQUARE_BRACKET_TOKEN);
157 }
158
159 if (next === $BACKPING) {
160 appendPrecenceToken(BACKPING_INFO);
161 return advance();
162 }
163
164 if (next === $OPEN_CURLY_BRACKET) {
165 appendBeginGroup(OPEN_CURLY_BRACKET_INFO, "{");
166 return advance();
167 }
168
169 if (next === $CLOSE_CURLY_BRACKET) {
170 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}",
171 OPEN_CURLY_BRACKET_TOKEN);
172 }
173
174 if (next === $SLASH) {
175 return tokenizeSlashOrComment(next);
176 }
177
178 if (next === $AT) {
179 return tokenizeRawString(next);
180 }
181
182 if (next === $DQ || next === $SQ) {
183 return tokenizeString(next, byteOffset, false);
184 }
185
186 if (next === $PERIOD) {
187 return tokenizeDotOrNumber(next);
188 }
189
190 if (next === $0) {
191 return tokenizeHexOrNumber(next);
192 }
193
194 // TODO(ahe): Would a range check be faster?
195 if (next === $1 || next === $2 || next === $3 || next === $4 || next === $5
196 || next === $6 || next === $7 || next === $8 || next === $9) {
197 return tokenizeNumber(next);
198 }
199
200 if (next === $EOF) {
201 return $EOF;
202 }
203 if (next < 0x1f) {
204 throw new MalformedInputException("illegal character $next", charOffset);
205 }
206
207 // The following are non-ASCII characters.
208
209 if (next === $NBSP) {
210 appendWhiteSpace(next);
211 return advance();
212 }
213
214 return tokenizeIdentifier(next, byteOffset, true);
215 }
216
217 int tokenizeTag(int next) {
218 // # or #!.*[\n\r]
219 if (byteOffset === 0) {
220 if (peek() === $BANG) {
221 do {
222 next = advance();
223 } while (next !== $LF && next !== $CR && next !== $EOF);
224 return next;
225 }
226 }
227 appendPrecenceToken(HASH_INFO);
228 return advance();
229 }
230
231 int tokenizeTilde(int next) {
232 // ~ ~/ ~/=
233 next = advance();
234 if (next === $SLASH) {
235 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);
236 } else {
237 appendPrecenceToken(TILDE_INFO);
238 return next;
239 }
240 }
241
242 int tokenizeOpenSquareBracket(int next) {
243 // [ [] []=
244 next = advance();
245 if (next === $CLOSE_SQUARE_BRACKET) {
246 return select($EQ, INDEX_EQ_INFO, INDEX_INFO);
247 } else {
248 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO, "[");
249 return next;
250 }
251 }
252
253 int tokenizeCaret(int next) {
254 // ^ ^=
255 return select($EQ, CARET_EQ_INFO, CARET_INFO);
256 }
257
258 int tokenizeBar(int next) {
259 // | || |=
260 next = advance();
261 if (next === $BAR) {
262 appendPrecenceToken(BAR_BAR_INFO);
263 return advance();
264 } else if (next === $EQ) {
265 appendPrecenceToken(BAR_EQ_INFO);
266 return advance();
267 } else {
268 appendPrecenceToken(BAR_INFO);
269 return next;
270 }
271 }
272
273 int tokenizeAmpersand(int next) {
274 // && &= &
275 next = advance();
276 if (next === $AMPERSAND) {
277 appendPrecenceToken(AMPERSAND_AMPERSAND_INFO);
278 return advance();
279 } else if (next === $EQ) {
280 appendPrecenceToken(AMPERSAND_EQ_INFO);
281 return advance();
282 } else {
283 appendPrecenceToken(AMPERSAND_INFO);
284 return next;
285 }
286 }
287
288 int tokenizePercent(int next) {
289 // % %=
290 return select($EQ, PERCENT_EQ_INFO, PERCENT_INFO);
291 }
292
293 int tokenizeMultiply(int next) {
294 // * *=
295 return select($EQ, STAR_EQ_INFO, STAR_INFO);
296 }
297
298 int tokenizeMinus(int next) {
299 // - -- -=
300 next = advance();
301 if (next === $MINUS) {
302 appendPrecenceToken(MINUS_MINUS_INFO);
303 return advance();
304 } else if (next === $EQ) {
305 appendPrecenceToken(MINUS_EQ_INFO);
306 return advance();
307 } else {
308 appendPrecenceToken(MINUS_INFO);
309 return next;
310 }
311 }
312
313
314 int tokenizePlus(int next) {
315 // + ++ +=
316 next = advance();
317 if ($PLUS === next) {
318 appendPrecenceToken(PLUS_PLUS_INFO);
319 return advance();
320 } else if ($EQ === next) {
321 appendPrecenceToken(PLUS_EQ_INFO);
322 return advance();
323 } else {
324 appendPrecenceToken(PLUS_INFO);
325 return next;
326 }
327 }
328
329 int tokenizeExclamation(int next) {
330 // ! != !==
331 next = advance();
332 if (next === $EQ) {
333 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);
334 }
335 appendPrecenceToken(BANG_INFO);
336 return next;
337 }
338
339 int tokenizeEquals(int next) {
340 // = == ===
341 next = advance();
342 if (next === $EQ) {
343 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);
344 } else if (next === $GT) {
345 appendPrecenceToken(FUNCTION_INFO);
346 return advance();
347 }
348 appendPrecenceToken(EQ_INFO);
349 return next;
350 }
351
352 int tokenizeGreaterThan(int next) {
353 // > >= >> >>= >>> >>>=
354 next = advance();
355 if ($EQ === next) {
356 appendPrecenceToken(GT_EQ_INFO);
357 return advance();
358 } else if ($GT === next) {
359 next = advance();
360 if ($EQ === next) {
361 appendPrecenceToken(GT_GT_EQ_INFO);
362 return advance();
363 } else if ($GT === next) {
364 next = advance();
365 if (next === $EQ) {
366 appendPrecenceToken(GT_GT_GT_EQ_INFO);
367 return advance();
368 } else {
369 appendGtGtGt(GT_GT_GT_INFO, ">>>");
370 return next;
371 }
372 } else {
373 appendGtGt(GT_GT_INFO, ">>");
374 return next;
375 }
376 } else {
377 appendGt(GT_INFO, ">");
378 return next;
379 }
380 }
381
382 int tokenizeLessThan(int next) {
383 // < <= << <<=
384 next = advance();
385 if ($EQ === next) {
386 appendPrecenceToken(LT_EQ_INFO);
387 return advance();
388 } else if ($LT === next) {
389 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);
390 } else {
391 appendBeginGroup(LT_INFO, "<");
392 return next;
393 }
394 }
395
396 int tokenizeNumber(int next) {
397 int start = byteOffset;
398 while (true) {
399 next = advance();
400 if ($0 <= next && next <= $9) {
401 continue;
402 } else if (next === $PERIOD) {
403 return tokenizeFractionPart(advance(), start);
404 } else if (next === $e || next === $E || next === $d || next === $D) {
405 return tokenizeFractionPart(next, start);
406 } else {
407 appendByteStringToken(INT_INFO, asciiString(start, 0));
408 return next;
409 }
410 }
411 }
412
413 int tokenizeHexOrNumber(int next) {
414 int x = peek();
415 if (x === $x || x === $X) {
416 advance();
417 return tokenizeHex(x);
418 }
419 return tokenizeNumber(next);
420 }
421
422 int tokenizeHex(int next) {
423 int start = byteOffset - 1;
424 bool hasDigits = false;
425 while (true) {
426 next = advance();
427 if (($0 <= next && next <= $9)
428 || ($A <= next && next <= $F)
429 || ($a <= next && next <= $f)) {
430 hasDigits = true;
431 } else {
432 if (!hasDigits) {
433 throw new MalformedInputException("hex digit expected", charOffset);
434 }
435 appendByteStringToken(HEXADECIMAL_INFO, asciiString(start, 0));
436 return next;
437 }
438 }
439 }
440
441 int tokenizeDotOrNumber(int next) {
442 int start = byteOffset;
443 next = advance();
444 if (($0 <= next && next <= $9)) {
445 return tokenizeFractionPart(next, start);
446 } else if ($PERIOD === next) {
447 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);
448 } else {
449 appendPrecenceToken(PERIOD_INFO);
450 return next;
451 }
452 }
453
454 int tokenizeFractionPart(int next, int start) {
455 bool done = false;
456 bool hasDigit = false;
457 LOOP: while (!done) {
458 if ($0 <= next && next <= $9) {
459 hasDigit = true;
460 } else if ($e === next || $E === next) {
461 hasDigit = true;
462 next = tokenizeExponent(advance());
463 done = true;
464 continue LOOP;
465 } else {
466 done = true;
467 continue LOOP;
468 }
469 next = advance();
470 }
471 if (!hasDigit) {
472 appendByteStringToken(INT_INFO, asciiString(start, -1));
473 // TODO(ahe): Wrong offset for the period.
474 appendPrecenceToken(PERIOD_INFO);
475 return bigSwitch(next);
476 }
477 if (next === $d || next === $D) {
478 next = advance();
479 }
480 appendByteStringToken(DOUBLE_INFO, asciiString(start, 0));
481 return next;
482 }
483
484 int tokenizeExponent(int next) {
485 if (next === $PLUS || next === $MINUS) {
486 next = advance();
487 }
488 bool hasDigits = false;
489 while (true) {
490 if ($0 <= next && next <= $9) {
491 hasDigits = true;
492 } else {
493 if (!hasDigits) {
494 throw new MalformedInputException("digit expected", charOffset);
495 }
496 return next;
497 }
498 next = advance();
499 }
500 }
501
502 int tokenizeSlashOrComment(int next) {
503 next = advance();
504 if ($STAR === next) {
505 return tokenizeMultiLineComment(next);
506 } else if ($SLASH === next) {
507 return tokenizeSingleLineComment(next);
508 } else if ($EQ === next) {
509 appendPrecenceToken(SLASH_EQ_INFO);
510 return advance();
511 } else {
512 appendPrecenceToken(SLASH_INFO);
513 return next;
514 }
515 }
516
517 int tokenizeSingleLineComment(int next) {
518 while (true) {
519 next = advance();
520 if ($LF === next || $CR === next || $EOF === next) {
521 return next;
522 }
523 }
524 }
525
526 int tokenizeMultiLineComment(int next) {
527 int nesting = 1;
528 next = advance();
529 while (true) {
530 if ($EOF === next) {
531 // TODO(ahe): Report error.
532 return next;
533 } else if ($STAR === next) {
534 next = advance();
535 if ($SLASH === next) {
536 --nesting;
537 if (0 === nesting) {
538 return advance();
539 } else {
540 next = advance();
541 }
542 }
543 } else if ($SLASH === next) {
544 next = advance();
545 if ($STAR === next) {
546 next = advance();
547 ++nesting;
548 }
549 } else {
550 next = advance();
551 }
552 }
553 }
554
555 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {
556 KeywordState state = KeywordState.KEYWORD_STATE;
557 int start = byteOffset;
558 while (state !== null && $a <= next && next <= $z) {
559 state = state.next(next);
560 next = advance();
561 }
562 if (state === null || state.keyword === null) {
563 return tokenizeIdentifier(next, start, allowDollar);
564 }
565 if (($A <= next && next <= $Z) ||
566 ($0 <= next && next <= $9) ||
567 next === $_ ||
568 next === $$) {
569 return tokenizeIdentifier(next, start, allowDollar);
570 } else if (next < 128) {
571 appendKeywordToken(state.keyword);
572 return next;
573 } else {
574 return tokenizeIdentifier(next, start, allowDollar);
575 }
576 }
577
578 int tokenizeIdentifier(int next, int start, bool allowDollar) {
579 bool isAscii = true;
580 while (true) {
581 if (($a <= next && next <= $z) ||
582 ($A <= next && next <= $Z) ||
583 ($0 <= next && next <= $9) ||
584 next === $_ ||
585 (next === $$ && allowDollar)) {
586 next = advance();
587 } else if (next < 128) {
588 if (isAscii) {
589 appendByteStringToken(IDENTIFIER_INFO, asciiString(start, 0));
590 } else {
591 appendByteStringToken(IDENTIFIER_INFO, utf8String(start, -1));
592 }
593 return next;
594 } else {
595 int nonAsciiStart = byteOffset;
596 do {
597 next = nextByte();
598 } while (next > 127);
599 String string = utf8String(nonAsciiStart, -1).slowToString();
600 isAscii = false;
601 int byteLength = nonAsciiStart - byteOffset;
602 addToCharOffset(string.length - byteLength);
603 }
604 }
605 }
606
607 int tokenizeRawString(int next) {
608 int start = byteOffset;
609 next = advance();
610 if (next === $DQ || next === $SQ) {
611 return tokenizeString(next, start, true);
612 } else {
613 throw new MalformedInputException("expected ' or \"", charOffset);
614 }
615 }
616
617 int tokenizeString(int next, int start, bool raw) {
618 int quoteChar = next;
619 next = advance();
620 if (quoteChar === next) {
621 next = advance();
622 if (quoteChar === next) {
623 // Multiline string.
624 return tokenizeMultiLineString(quoteChar, start, raw);
625 } else {
626 // Empty string.
627 appendByteStringToken(STRING_INFO, utf8String(start, -1));
628 return next;
629 }
630 }
631 if (raw) {
632 return tokenizeSingleLineRawString(next, quoteChar, start);
633 } else {
634 return tokenizeSingleLineString(next, quoteChar, start);
635 }
636 }
637
638 static bool isHexDigit(int character) {
639 if ($0 <= character && character <= $9) return true;
640 character |= 0x20;
641 return ($a <= character && character <= $f);
642 }
643
644 int tokenizeSingleLineString(int next, int quoteChar, int start) {
645 while (next !== quoteChar) {
646 if (next === $BACKSLASH) {
647 next = advance();
648 } else if (next === $$) {
649 next = tokenizeStringInterpolation(start);
650 start = byteOffset;
651 continue;
652 }
653 if (next <= $CR && (next === $LF || next === $CR || next === $EOF)) {
654 throw new MalformedInputException("unterminated string literal",
655 charOffset);
656 }
657 next = advance();
658 }
659 appendByteStringToken(STRING_INFO, utf8String(start, 0));
660 return advance();
661 }
662
663 int tokenizeStringInterpolation(int start) {
664 beginToken();
665 int next = advance();
666 if (next === $OPEN_CURLY_BRACKET) {
667 return tokenizeInterpolatedExpression(next, start);
668 } else {
669 return tokenizeInterpolatedIdentifier(next, start);
670 }
671 }
672
673 int tokenizeInterpolatedExpression(int next, int start) {
674 appendByteStringToken(STRING_INFO, utf8String(start, -2));
675 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");
676 next = advance();
677 while (next !== $EOF && next !== $STX) {
678 next = bigSwitch(next);
679 }
680 if (next === $EOF) return next;
681 return advance();
682 }
683
684 int tokenizeInterpolatedIdentifier(int next, int start) {
685 appendByteStringToken(STRING_INFO, utf8String(start, -2));
686 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");
687 next = tokenizeKeywordOrIdentifier(next, false);
688 appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}", OPEN_CURLY_BRACKET_TOKEN);
689 return next;
690 }
691
692 int tokenizeSingleLineRawString(int next, int quoteChar, int start) {
693 next = advance();
694 while (next != $EOF) {
695 if (next === quoteChar) {
696 appendByteStringToken(STRING_INFO, utf8String(start, 0));
697 return advance();
698 } else if (next === $LF || next === $CR) {
699 throw new MalformedInputException("unterminated string literal",
700 charOffset);
701 }
702 next = advance();
703 }
704 throw new MalformedInputException("unterminated string literal",
705 charOffset);
706 }
707
708 int tokenizeMultiLineRawString(int quoteChar, int start) {
709 int next = advance();
710 outer: while (next !== $EOF) {
711 while (next !== quoteChar) {
712 next = advance();
713 if (next === $EOF) break outer;
714 }
715 next = advance();
716 if (next === quoteChar) {
717 next = advance();
718 if (next === quoteChar) {
719 appendByteStringToken(STRING_INFO, utf8String(start, 0));
720 return advance();
721 }
722 }
723 }
724 throw new MalformedInputException("unterminated string literal",
725 charOffset);
726 }
727
728 int tokenizeMultiLineString(int quoteChar, int start, bool raw) {
729 if (raw) return tokenizeMultiLineRawString(quoteChar, start);
730 int next = advance();
731 while (next !== $EOF) {
732 if (next === $$) {
733 next = tokenizeStringInterpolation(start);
734 start = byteOffset;
735 continue;
736 }
737 if (next === quoteChar) {
738 next = advance();
739 if (next === quoteChar) {
740 next = advance();
741 if (next === quoteChar) {
742 appendByteStringToken(STRING_INFO, utf8String(start, 0));
743 return advance();
744 }
745 }
746 continue;
747 }
748 if (next === $BACKSLASH) {
749 next = advance();
750 if (next === $EOF) break;
751 }
752 next = advance();
753 }
754 throw new MalformedInputException("unterminated string literal",
755 charOffset);
756 }
757 }
758
759 class MalformedInputException {
760 final String message;
761 final position;
762 MalformedInputException(this.message, this.position);
763 toString() => message;
764 }
OLDNEW
« no previous file with comments | « frog/leg/scanner/partial_parser.dart ('k') | frog/leg/scanner/scanner_bench.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698