Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Unified Diff: frog/leg/scanner/scanner.dart

Issue 9873021: Move frog/leg to lib/compiler/implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « frog/leg/scanner/partial_parser.dart ('k') | frog/leg/scanner/scanner_bench.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: frog/leg/scanner/scanner.dart
===================================================================
--- frog/leg/scanner/scanner.dart (revision 5925)
+++ frog/leg/scanner/scanner.dart (working copy)
@@ -1,764 +0,0 @@
-// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-interface Scanner {
- Token tokenize();
-}
-
-/**
- * Common base class for a Dart scanner.
- */
-class AbstractScanner<T> implements Scanner {
- abstract int advance();
- abstract int nextByte();
- abstract int peek();
- abstract int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);
- abstract void appendPrecenceToken(PrecedenceInfo info);
- abstract void appendStringToken(PrecedenceInfo info, String value);
- abstract void appendByteStringToken(PrecedenceInfo info, T value);
- abstract void appendKeywordToken(Keyword keyword);
- abstract void appendWhiteSpace(int next);
- abstract void appendEofToken();
- abstract T asciiString(int start, int offset);
- abstract T utf8String(int start, int offset);
- abstract Token firstToken();
- abstract void beginToken();
- abstract void addToCharOffset(int offset);
- abstract int get charOffset();
- abstract int get byteOffset();
- abstract void appendBeginGroup(PrecedenceInfo info, String value);
- abstract int appendEndGroup(PrecedenceInfo info, String value, int openKind);
- abstract void appendGt(PrecedenceInfo info, String value);
- abstract void appendGtGt(PrecedenceInfo info, String value);
- abstract void appendGtGtGt(PrecedenceInfo info, String value);
- abstract void discardOpenLt();
-
- // TODO(ahe): Move this class to implementation.
-
- Token tokenize() {
- int next = advance();
- while (next !== $EOF) {
- next = bigSwitch(next);
- }
- appendEofToken();
- return firstToken();
- }
-
- int bigSwitch(int next) {
- beginToken();
- if (next === $TAB || next === $LF || next === $CR || next === $SPACE) {
- appendWhiteSpace(next);
- return advance();
- }
-
- if ($a <= next && next <= $z) {
- return tokenizeKeywordOrIdentifier(next, true);
- }
-
- if (($A <= next && next <= $Z) || next === $_ || next === $$) {
- return tokenizeIdentifier(next, byteOffset, true);
- }
-
- if (next === $LT) {
- return tokenizeLessThan(next);
- }
-
- if (next === $GT) {
- return tokenizeGreaterThan(next);
- }
-
- if (next === $EQ) {
- return tokenizeEquals(next);
- }
-
- if (next === $BANG) {
- return tokenizeExclamation(next);
- }
-
- if (next === $PLUS) {
- return tokenizePlus(next);
- }
-
- if (next === $MINUS) {
- return tokenizeMinus(next);
- }
-
- if (next === $STAR) {
- return tokenizeMultiply(next);
- }
-
- if (next === $PERCENT) {
- return tokenizePercent(next);
- }
-
- if (next === $AMPERSAND) {
- return tokenizeAmpersand(next);
- }
-
- if (next === $BAR) {
- return tokenizeBar(next);
- }
-
- if (next === $CARET) {
- return tokenizeCaret(next);
- }
-
- if (next === $OPEN_SQUARE_BRACKET) {
- return tokenizeOpenSquareBracket(next);
- }
-
- if (next === $TILDE) {
- return tokenizeTilde(next);
- }
-
- if (next === $BACKSLASH) {
- appendPrecenceToken(BACKSLASH_INFO);
- return advance();
- }
-
- if (next === $HASH) {
- return tokenizeTag(next);
- }
-
- if (next === $OPEN_PAREN) {
- appendBeginGroup(OPEN_PAREN_INFO, "(");
- return advance();
- }
-
- if (next === $CLOSE_PAREN) {
- return appendEndGroup(CLOSE_PAREN_INFO, ")", OPEN_PAREN_TOKEN);
- }
-
- if (next === $COMMA) {
- appendPrecenceToken(COMMA_INFO);
- return advance();
- }
-
- if (next === $COLON) {
- appendPrecenceToken(COLON_INFO);
- return advance();
- }
-
- if (next === $SEMICOLON) {
- appendPrecenceToken(SEMICOLON_INFO);
- discardOpenLt();
- return advance();
- }
-
- if (next === $QUESTION) {
- appendPrecenceToken(QUESTION_INFO);
- return advance();
- }
-
- if (next === $CLOSE_SQUARE_BRACKET) {
- return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, "]",
- OPEN_SQUARE_BRACKET_TOKEN);
- }
-
- if (next === $BACKPING) {
- appendPrecenceToken(BACKPING_INFO);
- return advance();
- }
-
- if (next === $OPEN_CURLY_BRACKET) {
- appendBeginGroup(OPEN_CURLY_BRACKET_INFO, "{");
- return advance();
- }
-
- if (next === $CLOSE_CURLY_BRACKET) {
- return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}",
- OPEN_CURLY_BRACKET_TOKEN);
- }
-
- if (next === $SLASH) {
- return tokenizeSlashOrComment(next);
- }
-
- if (next === $AT) {
- return tokenizeRawString(next);
- }
-
- if (next === $DQ || next === $SQ) {
- return tokenizeString(next, byteOffset, false);
- }
-
- if (next === $PERIOD) {
- return tokenizeDotOrNumber(next);
- }
-
- if (next === $0) {
- return tokenizeHexOrNumber(next);
- }
-
- // TODO(ahe): Would a range check be faster?
- if (next === $1 || next === $2 || next === $3 || next === $4 || next === $5
- || next === $6 || next === $7 || next === $8 || next === $9) {
- return tokenizeNumber(next);
- }
-
- if (next === $EOF) {
- return $EOF;
- }
- if (next < 0x1f) {
- throw new MalformedInputException("illegal character $next", charOffset);
- }
-
- // The following are non-ASCII characters.
-
- if (next === $NBSP) {
- appendWhiteSpace(next);
- return advance();
- }
-
- return tokenizeIdentifier(next, byteOffset, true);
- }
-
- int tokenizeTag(int next) {
- // # or #!.*[\n\r]
- if (byteOffset === 0) {
- if (peek() === $BANG) {
- do {
- next = advance();
- } while (next !== $LF && next !== $CR && next !== $EOF);
- return next;
- }
- }
- appendPrecenceToken(HASH_INFO);
- return advance();
- }
-
- int tokenizeTilde(int next) {
- // ~ ~/ ~/=
- next = advance();
- if (next === $SLASH) {
- return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);
- } else {
- appendPrecenceToken(TILDE_INFO);
- return next;
- }
- }
-
- int tokenizeOpenSquareBracket(int next) {
- // [ [] []=
- next = advance();
- if (next === $CLOSE_SQUARE_BRACKET) {
- return select($EQ, INDEX_EQ_INFO, INDEX_INFO);
- } else {
- appendBeginGroup(OPEN_SQUARE_BRACKET_INFO, "[");
- return next;
- }
- }
-
- int tokenizeCaret(int next) {
- // ^ ^=
- return select($EQ, CARET_EQ_INFO, CARET_INFO);
- }
-
- int tokenizeBar(int next) {
- // | || |=
- next = advance();
- if (next === $BAR) {
- appendPrecenceToken(BAR_BAR_INFO);
- return advance();
- } else if (next === $EQ) {
- appendPrecenceToken(BAR_EQ_INFO);
- return advance();
- } else {
- appendPrecenceToken(BAR_INFO);
- return next;
- }
- }
-
- int tokenizeAmpersand(int next) {
- // && &= &
- next = advance();
- if (next === $AMPERSAND) {
- appendPrecenceToken(AMPERSAND_AMPERSAND_INFO);
- return advance();
- } else if (next === $EQ) {
- appendPrecenceToken(AMPERSAND_EQ_INFO);
- return advance();
- } else {
- appendPrecenceToken(AMPERSAND_INFO);
- return next;
- }
- }
-
- int tokenizePercent(int next) {
- // % %=
- return select($EQ, PERCENT_EQ_INFO, PERCENT_INFO);
- }
-
- int tokenizeMultiply(int next) {
- // * *=
- return select($EQ, STAR_EQ_INFO, STAR_INFO);
- }
-
- int tokenizeMinus(int next) {
- // - -- -=
- next = advance();
- if (next === $MINUS) {
- appendPrecenceToken(MINUS_MINUS_INFO);
- return advance();
- } else if (next === $EQ) {
- appendPrecenceToken(MINUS_EQ_INFO);
- return advance();
- } else {
- appendPrecenceToken(MINUS_INFO);
- return next;
- }
- }
-
-
- int tokenizePlus(int next) {
- // + ++ +=
- next = advance();
- if ($PLUS === next) {
- appendPrecenceToken(PLUS_PLUS_INFO);
- return advance();
- } else if ($EQ === next) {
- appendPrecenceToken(PLUS_EQ_INFO);
- return advance();
- } else {
- appendPrecenceToken(PLUS_INFO);
- return next;
- }
- }
-
- int tokenizeExclamation(int next) {
- // ! != !==
- next = advance();
- if (next === $EQ) {
- return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);
- }
- appendPrecenceToken(BANG_INFO);
- return next;
- }
-
- int tokenizeEquals(int next) {
- // = == ===
- next = advance();
- if (next === $EQ) {
- return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);
- } else if (next === $GT) {
- appendPrecenceToken(FUNCTION_INFO);
- return advance();
- }
- appendPrecenceToken(EQ_INFO);
- return next;
- }
-
- int tokenizeGreaterThan(int next) {
- // > >= >> >>= >>> >>>=
- next = advance();
- if ($EQ === next) {
- appendPrecenceToken(GT_EQ_INFO);
- return advance();
- } else if ($GT === next) {
- next = advance();
- if ($EQ === next) {
- appendPrecenceToken(GT_GT_EQ_INFO);
- return advance();
- } else if ($GT === next) {
- next = advance();
- if (next === $EQ) {
- appendPrecenceToken(GT_GT_GT_EQ_INFO);
- return advance();
- } else {
- appendGtGtGt(GT_GT_GT_INFO, ">>>");
- return next;
- }
- } else {
- appendGtGt(GT_GT_INFO, ">>");
- return next;
- }
- } else {
- appendGt(GT_INFO, ">");
- return next;
- }
- }
-
- int tokenizeLessThan(int next) {
- // < <= << <<=
- next = advance();
- if ($EQ === next) {
- appendPrecenceToken(LT_EQ_INFO);
- return advance();
- } else if ($LT === next) {
- return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);
- } else {
- appendBeginGroup(LT_INFO, "<");
- return next;
- }
- }
-
- int tokenizeNumber(int next) {
- int start = byteOffset;
- while (true) {
- next = advance();
- if ($0 <= next && next <= $9) {
- continue;
- } else if (next === $PERIOD) {
- return tokenizeFractionPart(advance(), start);
- } else if (next === $e || next === $E || next === $d || next === $D) {
- return tokenizeFractionPart(next, start);
- } else {
- appendByteStringToken(INT_INFO, asciiString(start, 0));
- return next;
- }
- }
- }
-
- int tokenizeHexOrNumber(int next) {
- int x = peek();
- if (x === $x || x === $X) {
- advance();
- return tokenizeHex(x);
- }
- return tokenizeNumber(next);
- }
-
- int tokenizeHex(int next) {
- int start = byteOffset - 1;
- bool hasDigits = false;
- while (true) {
- next = advance();
- if (($0 <= next && next <= $9)
- || ($A <= next && next <= $F)
- || ($a <= next && next <= $f)) {
- hasDigits = true;
- } else {
- if (!hasDigits) {
- throw new MalformedInputException("hex digit expected", charOffset);
- }
- appendByteStringToken(HEXADECIMAL_INFO, asciiString(start, 0));
- return next;
- }
- }
- }
-
- int tokenizeDotOrNumber(int next) {
- int start = byteOffset;
- next = advance();
- if (($0 <= next && next <= $9)) {
- return tokenizeFractionPart(next, start);
- } else if ($PERIOD === next) {
- return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);
- } else {
- appendPrecenceToken(PERIOD_INFO);
- return next;
- }
- }
-
- int tokenizeFractionPart(int next, int start) {
- bool done = false;
- bool hasDigit = false;
- LOOP: while (!done) {
- if ($0 <= next && next <= $9) {
- hasDigit = true;
- } else if ($e === next || $E === next) {
- hasDigit = true;
- next = tokenizeExponent(advance());
- done = true;
- continue LOOP;
- } else {
- done = true;
- continue LOOP;
- }
- next = advance();
- }
- if (!hasDigit) {
- appendByteStringToken(INT_INFO, asciiString(start, -1));
- // TODO(ahe): Wrong offset for the period.
- appendPrecenceToken(PERIOD_INFO);
- return bigSwitch(next);
- }
- if (next === $d || next === $D) {
- next = advance();
- }
- appendByteStringToken(DOUBLE_INFO, asciiString(start, 0));
- return next;
- }
-
- int tokenizeExponent(int next) {
- if (next === $PLUS || next === $MINUS) {
- next = advance();
- }
- bool hasDigits = false;
- while (true) {
- if ($0 <= next && next <= $9) {
- hasDigits = true;
- } else {
- if (!hasDigits) {
- throw new MalformedInputException("digit expected", charOffset);
- }
- return next;
- }
- next = advance();
- }
- }
-
- int tokenizeSlashOrComment(int next) {
- next = advance();
- if ($STAR === next) {
- return tokenizeMultiLineComment(next);
- } else if ($SLASH === next) {
- return tokenizeSingleLineComment(next);
- } else if ($EQ === next) {
- appendPrecenceToken(SLASH_EQ_INFO);
- return advance();
- } else {
- appendPrecenceToken(SLASH_INFO);
- return next;
- }
- }
-
- int tokenizeSingleLineComment(int next) {
- while (true) {
- next = advance();
- if ($LF === next || $CR === next || $EOF === next) {
- return next;
- }
- }
- }
-
- int tokenizeMultiLineComment(int next) {
- int nesting = 1;
- next = advance();
- while (true) {
- if ($EOF === next) {
- // TODO(ahe): Report error.
- return next;
- } else if ($STAR === next) {
- next = advance();
- if ($SLASH === next) {
- --nesting;
- if (0 === nesting) {
- return advance();
- } else {
- next = advance();
- }
- }
- } else if ($SLASH === next) {
- next = advance();
- if ($STAR === next) {
- next = advance();
- ++nesting;
- }
- } else {
- next = advance();
- }
- }
- }
-
- int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {
- KeywordState state = KeywordState.KEYWORD_STATE;
- int start = byteOffset;
- while (state !== null && $a <= next && next <= $z) {
- state = state.next(next);
- next = advance();
- }
- if (state === null || state.keyword === null) {
- return tokenizeIdentifier(next, start, allowDollar);
- }
- if (($A <= next && next <= $Z) ||
- ($0 <= next && next <= $9) ||
- next === $_ ||
- next === $$) {
- return tokenizeIdentifier(next, start, allowDollar);
- } else if (next < 128) {
- appendKeywordToken(state.keyword);
- return next;
- } else {
- return tokenizeIdentifier(next, start, allowDollar);
- }
- }
-
- int tokenizeIdentifier(int next, int start, bool allowDollar) {
- bool isAscii = true;
- while (true) {
- if (($a <= next && next <= $z) ||
- ($A <= next && next <= $Z) ||
- ($0 <= next && next <= $9) ||
- next === $_ ||
- (next === $$ && allowDollar)) {
- next = advance();
- } else if (next < 128) {
- if (isAscii) {
- appendByteStringToken(IDENTIFIER_INFO, asciiString(start, 0));
- } else {
- appendByteStringToken(IDENTIFIER_INFO, utf8String(start, -1));
- }
- return next;
- } else {
- int nonAsciiStart = byteOffset;
- do {
- next = nextByte();
- } while (next > 127);
- String string = utf8String(nonAsciiStart, -1).slowToString();
- isAscii = false;
- int byteLength = nonAsciiStart - byteOffset;
- addToCharOffset(string.length - byteLength);
- }
- }
- }
-
- int tokenizeRawString(int next) {
- int start = byteOffset;
- next = advance();
- if (next === $DQ || next === $SQ) {
- return tokenizeString(next, start, true);
- } else {
- throw new MalformedInputException("expected ' or \"", charOffset);
- }
- }
-
- int tokenizeString(int next, int start, bool raw) {
- int quoteChar = next;
- next = advance();
- if (quoteChar === next) {
- next = advance();
- if (quoteChar === next) {
- // Multiline string.
- return tokenizeMultiLineString(quoteChar, start, raw);
- } else {
- // Empty string.
- appendByteStringToken(STRING_INFO, utf8String(start, -1));
- return next;
- }
- }
- if (raw) {
- return tokenizeSingleLineRawString(next, quoteChar, start);
- } else {
- return tokenizeSingleLineString(next, quoteChar, start);
- }
- }
-
- static bool isHexDigit(int character) {
- if ($0 <= character && character <= $9) return true;
- character |= 0x20;
- return ($a <= character && character <= $f);
- }
-
- int tokenizeSingleLineString(int next, int quoteChar, int start) {
- while (next !== quoteChar) {
- if (next === $BACKSLASH) {
- next = advance();
- } else if (next === $$) {
- next = tokenizeStringInterpolation(start);
- start = byteOffset;
- continue;
- }
- if (next <= $CR && (next === $LF || next === $CR || next === $EOF)) {
- throw new MalformedInputException("unterminated string literal",
- charOffset);
- }
- next = advance();
- }
- appendByteStringToken(STRING_INFO, utf8String(start, 0));
- return advance();
- }
-
- int tokenizeStringInterpolation(int start) {
- beginToken();
- int next = advance();
- if (next === $OPEN_CURLY_BRACKET) {
- return tokenizeInterpolatedExpression(next, start);
- } else {
- return tokenizeInterpolatedIdentifier(next, start);
- }
- }
-
- int tokenizeInterpolatedExpression(int next, int start) {
- appendByteStringToken(STRING_INFO, utf8String(start, -2));
- appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");
- next = advance();
- while (next !== $EOF && next !== $STX) {
- next = bigSwitch(next);
- }
- if (next === $EOF) return next;
- return advance();
- }
-
- int tokenizeInterpolatedIdentifier(int next, int start) {
- appendByteStringToken(STRING_INFO, utf8String(start, -2));
- appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");
- next = tokenizeKeywordOrIdentifier(next, false);
- appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}", OPEN_CURLY_BRACKET_TOKEN);
- return next;
- }
-
- int tokenizeSingleLineRawString(int next, int quoteChar, int start) {
- next = advance();
- while (next != $EOF) {
- if (next === quoteChar) {
- appendByteStringToken(STRING_INFO, utf8String(start, 0));
- return advance();
- } else if (next === $LF || next === $CR) {
- throw new MalformedInputException("unterminated string literal",
- charOffset);
- }
- next = advance();
- }
- throw new MalformedInputException("unterminated string literal",
- charOffset);
- }
-
- int tokenizeMultiLineRawString(int quoteChar, int start) {
- int next = advance();
- outer: while (next !== $EOF) {
- while (next !== quoteChar) {
- next = advance();
- if (next === $EOF) break outer;
- }
- next = advance();
- if (next === quoteChar) {
- next = advance();
- if (next === quoteChar) {
- appendByteStringToken(STRING_INFO, utf8String(start, 0));
- return advance();
- }
- }
- }
- throw new MalformedInputException("unterminated string literal",
- charOffset);
- }
-
- int tokenizeMultiLineString(int quoteChar, int start, bool raw) {
- if (raw) return tokenizeMultiLineRawString(quoteChar, start);
- int next = advance();
- while (next !== $EOF) {
- if (next === $$) {
- next = tokenizeStringInterpolation(start);
- start = byteOffset;
- continue;
- }
- if (next === quoteChar) {
- next = advance();
- if (next === quoteChar) {
- next = advance();
- if (next === quoteChar) {
- appendByteStringToken(STRING_INFO, utf8String(start, 0));
- return advance();
- }
- }
- continue;
- }
- if (next === $BACKSLASH) {
- next = advance();
- if (next === $EOF) break;
- }
- next = advance();
- }
- throw new MalformedInputException("unterminated string literal",
- charOffset);
- }
-}
-
-class MalformedInputException {
- final String message;
- final position;
- MalformedInputException(this.message, this.position);
- toString() => message;
-}
« no previous file with comments | « frog/leg/scanner/partial_parser.dart ('k') | frog/leg/scanner/scanner_bench.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698