| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 // Generated by scripts/tokenizer_gen.py. | |
| 5 | |
| 6 | |
| 7 interface TokenSource { | |
| 8 Token next(); | |
| 9 } | |
| 10 | |
| 11 class InterpStack { | |
| 12 InterpStack next, previous; | |
| 13 final int quote; | |
| 14 final bool isMultiline; | |
| 15 int depth; | |
| 16 | |
| 17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1; | |
| 18 | |
| 19 InterpStack pop() { | |
| 20 return this.previous; | |
| 21 } | |
| 22 | |
| 23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) { | |
| 24 var newStack = new InterpStack(stack, quote, isMultiline); | |
| 25 if (stack != null) newStack.previous = stack; | |
| 26 return newStack; | |
| 27 } | |
| 28 } | |
| 29 | |
| 30 /** | |
| 31 * The base class for our tokenizer. The hand coded parts are in this file, with | |
| 32 * the generated parts in the subclass Tokenizer. | |
| 33 */ | |
| 34 class TokenizerBase extends TokenizerHelpers implements TokenSource { | |
| 35 final SourceFile _source; | |
| 36 final bool _skipWhitespace; | |
| 37 String _text; | |
| 38 | |
| 39 int _index; | |
| 40 int _startIndex; | |
| 41 | |
| 42 /** Keeps track of string interpolation state. */ | |
| 43 InterpStack _interpStack; | |
| 44 | |
| 45 TokenizerBase(this._source, this._skipWhitespace, [index = 0]) | |
| 46 : this._index = index { | |
| 47 _text = _source.text; | |
| 48 } | |
| 49 | |
| 50 abstract Token next(); | |
| 51 abstract int getIdentifierKind(); | |
| 52 | |
| 53 int _nextChar() { | |
| 54 if (_index < _text.length) { | |
| 55 return _text.charCodeAt(_index++); | |
| 56 } else { | |
| 57 return 0; | |
| 58 } | |
| 59 } | |
| 60 | |
| 61 int _peekChar() { | |
| 62 if (_index < _text.length) { | |
| 63 return _text.charCodeAt(_index); | |
| 64 } else { | |
| 65 return 0; | |
| 66 } | |
| 67 } | |
| 68 | |
| 69 bool _maybeEatChar(int ch) { | |
| 70 if (_index < _text.length) { | |
| 71 if (_text.charCodeAt(_index) == ch) { | |
| 72 _index++; | |
| 73 return true; | |
| 74 } else { | |
| 75 return false; | |
| 76 } | |
| 77 } else { | |
| 78 return false; | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 String _tokenText() { | |
| 83 if (_index < _text.length) { | |
| 84 return _text.substring(_startIndex, _index); | |
| 85 } else { | |
| 86 return _text.substring(_startIndex, _text.length); | |
| 87 } | |
| 88 } | |
| 89 | |
| 90 Token _finishToken(int kind) { | |
| 91 return new Token(kind, _source, _startIndex, _index); | |
| 92 } | |
| 93 | |
| 94 Token _errorToken([String message = null]) { | |
| 95 return new ErrorToken( | |
| 96 TokenKind.ERROR, _source, _startIndex, _index, message); | |
| 97 } | |
| 98 | |
| 99 Token finishWhitespace() { | |
| 100 _index--; | |
| 101 while (_index < _text.length) { | |
| 102 final ch = _text.charCodeAt(_index++); | |
| 103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) { | |
| 104 // do nothing | |
| 105 } else if (ch == 10/*'\n'*/) { | |
| 106 if (!_skipWhitespace) { | |
| 107 return _finishToken(TokenKind.WHITESPACE); // note the newline? | |
| 108 } | |
| 109 } else { | |
| 110 _index--; | |
| 111 if (_skipWhitespace) { | |
| 112 return next(); | |
| 113 } else { | |
| 114 return _finishToken(TokenKind.WHITESPACE); | |
| 115 } | |
| 116 } | |
| 117 | |
| 118 } | |
| 119 return _finishToken(TokenKind.END_OF_FILE); | |
| 120 } | |
| 121 | |
| 122 Token finishHashBang() { | |
| 123 while (true) { | |
| 124 int ch = _nextChar(); | |
| 125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { | |
| 126 return _finishToken(TokenKind.HASHBANG); | |
| 127 } | |
| 128 } | |
| 129 } | |
| 130 | |
| 131 Token finishSingleLineComment() { | |
| 132 while (true) { | |
| 133 int ch = _nextChar(); | |
| 134 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { | |
| 135 if (_skipWhitespace) { | |
| 136 return next(); | |
| 137 } else { | |
| 138 return _finishToken(TokenKind.COMMENT); | |
| 139 } | |
| 140 } | |
| 141 } | |
| 142 } | |
| 143 | |
| 144 Token finishMultiLineComment() { | |
| 145 int nesting = 1; | |
| 146 do { | |
| 147 int ch = _nextChar(); | |
| 148 if (ch == 0) { | |
| 149 return _errorToken(); | |
| 150 } else if (ch == 42/*'*'*/) { | |
| 151 if (_maybeEatChar(47/*'/'*/)) { | |
| 152 nesting--; | |
| 153 } | |
| 154 } else if (ch == 47/*'/'*/) { | |
| 155 if (_maybeEatChar(42/*'*'*/)) { | |
| 156 nesting++; | |
| 157 } | |
| 158 } | |
| 159 } while (nesting > 0); | |
| 160 | |
| 161 if (_skipWhitespace) { | |
| 162 return next(); | |
| 163 } else { | |
| 164 return _finishToken(TokenKind.COMMENT); | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 void eatDigits() { | |
| 169 while (_index < _text.length) { | |
| 170 if (TokenizerHelpers.isDigit(_text.charCodeAt(_index))) { | |
| 171 _index++; | |
| 172 } else { | |
| 173 return; | |
| 174 } | |
| 175 } | |
| 176 } | |
| 177 | |
| 178 static int _hexDigit(int c) { | |
| 179 if(c >= 48/*0*/ && c <= 57/*9*/) { | |
| 180 return c - 48; | |
| 181 } else if (c >= 97/*a*/ && c <= 102/*f*/) { | |
| 182 return c - 87; | |
| 183 } else if (c >= 65/*A*/ && c <= 70/*F*/) { | |
| 184 return c - 55; | |
| 185 } else { | |
| 186 return -1; | |
| 187 } | |
| 188 } | |
| 189 | |
| 190 int readHex([int hexLength]) { | |
| 191 int maxIndex; | |
| 192 if (hexLength === null) { | |
| 193 maxIndex = _text.length - 1; | |
| 194 } else { | |
| 195 // TODO(jimhug): What if this is too long? | |
| 196 maxIndex = _index + hexLength; | |
| 197 if (maxIndex >= _text.length) return -1; | |
| 198 } | |
| 199 var result = 0; | |
| 200 while (_index < maxIndex) { | |
| 201 final digit = _hexDigit(_text.charCodeAt(_index)); | |
| 202 if (digit == -1) { | |
| 203 if (hexLength === null) { | |
| 204 return result; | |
| 205 } else { | |
| 206 return -1; | |
| 207 } | |
| 208 } | |
| 209 _hexDigit(_text.charCodeAt(_index)); | |
| 210 // Multiply by 16 rather than shift by 4 since that will result in a | |
| 211 // correct value for numbers that exceed the 32 bit precision of JS | |
| 212 // 'integers'. | |
| 213 // TODO: Figure out a better solution to integer truncation. Issue 638. | |
| 214 result = (result * 16) + digit; | |
| 215 _index++; | |
| 216 } | |
| 217 | |
| 218 return result; | |
| 219 } | |
| 220 | |
| 221 Token finishHex() { | |
| 222 final value = readHex(); | |
| 223 return new LiteralToken(TokenKind.HEX_INTEGER, _source, _startIndex, | |
| 224 _index, value); | |
| 225 } | |
| 226 | |
| 227 Token finishNumber() { | |
| 228 eatDigits(); | |
| 229 | |
| 230 if (_peekChar() == 46/*.*/) { | |
| 231 // Handle the case of 1.toString(). | |
| 232 _nextChar(); | |
| 233 if (TokenizerHelpers.isDigit(_peekChar())) { | |
| 234 eatDigits(); | |
| 235 return finishNumberExtra(TokenKind.DOUBLE); | |
| 236 } else { | |
| 237 _index--; | |
| 238 } | |
| 239 } | |
| 240 | |
| 241 return finishNumberExtra(TokenKind.INTEGER); | |
| 242 } | |
| 243 | |
| 244 Token finishNumberExtra(int kind) { | |
| 245 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) { | |
| 246 kind = TokenKind.DOUBLE; | |
| 247 _maybeEatChar(45/*-*/); | |
| 248 _maybeEatChar(43/*+*/); | |
| 249 eatDigits(); | |
| 250 } | |
| 251 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) { | |
| 252 _nextChar(); | |
| 253 return _errorToken("illegal character in number"); | |
| 254 } | |
| 255 | |
| 256 return _finishToken(kind); | |
| 257 } | |
| 258 | |
| 259 Token _makeStringToken(List<int> buf, bool isPart) { | |
| 260 final s = new String.fromCharCodes(buf); | |
| 261 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; | |
| 262 return new LiteralToken(kind, _source, _startIndex, _index, s); | |
| 263 } | |
| 264 | |
| 265 Token _makeRawStringToken(bool isMultiline) { | |
| 266 String s; | |
| 267 if (isMultiline) { | |
| 268 // Skip initial newline in multiline strings | |
| 269 int start = _startIndex + 4; | |
| 270 if (_source.text[start] == '\n') start++; | |
| 271 s = _source.text.substring(start, _index - 3); | |
| 272 } else { | |
| 273 s = _source.text.substring(_startIndex + 2, _index - 1); | |
| 274 } | |
| 275 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s); | |
| 276 } | |
| 277 | |
| 278 Token finishMultilineString(int quote) { | |
| 279 var buf = <int>[]; | |
| 280 while (true) { | |
| 281 int ch = _nextChar(); | |
| 282 if (ch == 0) { | |
| 283 return _errorToken(); | |
| 284 } else if (ch == quote) { | |
| 285 if (_maybeEatChar(quote)) { | |
| 286 if (_maybeEatChar(quote)) { | |
| 287 return _makeStringToken(buf, false); | |
| 288 } | |
| 289 buf.add(quote); | |
| 290 } | |
| 291 buf.add(quote); | |
| 292 } else if (ch == 36/*$*/) { | |
| 293 // start of string interp | |
| 294 _interpStack = InterpStack.push(_interpStack, quote, true); | |
| 295 return _makeStringToken(buf, true); | |
| 296 } else if (ch == 92/*\*/) { | |
| 297 var escapeVal = readEscapeSequence(); | |
| 298 if (escapeVal == -1) { | |
| 299 return _errorToken("invalid hex escape sequence"); | |
| 300 } else { | |
| 301 buf.add(escapeVal); | |
| 302 } | |
| 303 } else { | |
| 304 buf.add(ch); | |
| 305 } | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 Token _finishOpenBrace() { | |
| 310 if (_interpStack != null) { | |
| 311 if (_interpStack.depth == -1) { | |
| 312 _interpStack.depth = 1; | |
| 313 } else { | |
| 314 assert(_interpStack.depth >= 0); | |
| 315 _interpStack.depth += 1; | |
| 316 } | |
| 317 } | |
| 318 return _finishToken(TokenKind.LBRACE); | |
| 319 } | |
| 320 | |
| 321 Token _finishCloseBrace() { | |
| 322 if (_interpStack != null) { | |
| 323 _interpStack.depth -= 1; | |
| 324 assert(_interpStack.depth >= 0); | |
| 325 } | |
| 326 return _finishToken(TokenKind.RBRACE); | |
| 327 } | |
| 328 | |
| 329 Token finishString(int quote) { | |
| 330 if (_maybeEatChar(quote)) { | |
| 331 if (_maybeEatChar(quote)) { | |
| 332 // skip an initial newline | |
| 333 _maybeEatChar(10/*'\n'*/); | |
| 334 return finishMultilineString(quote); | |
| 335 } else { | |
| 336 return _makeStringToken(new List<int>(), false); | |
| 337 } | |
| 338 } | |
| 339 return finishStringBody(quote); | |
| 340 } | |
| 341 | |
| 342 Token finishRawString(int quote) { | |
| 343 if (_maybeEatChar(quote)) { | |
| 344 if (_maybeEatChar(quote)) { | |
| 345 return finishMultilineRawString(quote); | |
| 346 } else { | |
| 347 return _makeStringToken(<int>[], false); | |
| 348 } | |
| 349 } | |
| 350 while (true) { | |
| 351 int ch = _nextChar(); | |
| 352 if (ch == quote) { | |
| 353 return _makeRawStringToken(false); | |
| 354 } else if (ch == 0) { | |
| 355 return _errorToken(); | |
| 356 } | |
| 357 } | |
| 358 } | |
| 359 | |
| 360 Token finishMultilineRawString(int quote) { | |
| 361 while (true) { | |
| 362 int ch = _nextChar(); | |
| 363 if (ch == 0) { | |
| 364 return _errorToken(); | |
| 365 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { | |
| 366 return _makeRawStringToken(true); | |
| 367 } | |
| 368 } | |
| 369 } | |
| 370 | |
| 371 Token finishStringBody(int quote) { | |
| 372 var buf = new List<int>(); | |
| 373 while (true) { | |
| 374 int ch = _nextChar(); | |
| 375 if (ch == quote) { | |
| 376 return _makeStringToken(buf, false); | |
| 377 } else if (ch == 36/*$*/) { | |
| 378 // start of string interp | |
| 379 _interpStack = InterpStack.push(_interpStack, quote, false); | |
| 380 return _makeStringToken(buf, true); | |
| 381 } else if (ch == 0) { | |
| 382 return _errorToken(); | |
| 383 } else if (ch == 92/*\*/) { | |
| 384 var escapeVal = readEscapeSequence(); | |
| 385 if (escapeVal == -1) { | |
| 386 return _errorToken("invalid hex escape sequence"); | |
| 387 } else { | |
| 388 buf.add(escapeVal); | |
| 389 } | |
| 390 } else { | |
| 391 buf.add(ch); | |
| 392 } | |
| 393 } | |
| 394 } | |
| 395 | |
| 396 int readEscapeSequence() { | |
| 397 final ch = _nextChar(); | |
| 398 int hexValue; | |
| 399 switch (ch) { | |
| 400 case 110/*n*/: | |
| 401 return 0x0a/*'\n'*/; | |
| 402 case 114/*r*/: | |
| 403 return 0x0d/*'\r'*/; | |
| 404 case 102/*f*/: | |
| 405 return 0x0c/*'\f'*/; | |
| 406 case 98/*b*/: | |
| 407 return 0x08/*'\b'*/; | |
| 408 case 116/*t*/: | |
| 409 return 0x09/*'\t'*/; | |
| 410 case 118/*v*/: | |
| 411 return 0x0b/*'\v'*/; | |
| 412 case 120/*x*/: | |
| 413 hexValue = readHex(2); | |
| 414 break; | |
| 415 case 117/*u*/: | |
| 416 if (_maybeEatChar(123/*{*/)) { | |
| 417 hexValue = readHex(); | |
| 418 if (!_maybeEatChar(125/*}*/)) { | |
| 419 return -1; | |
| 420 } else { | |
| 421 break; | |
| 422 } | |
| 423 } else { | |
| 424 hexValue = readHex(4); | |
| 425 break; | |
| 426 } | |
| 427 default: return ch; | |
| 428 } | |
| 429 | |
| 430 if (hexValue == -1) return -1; | |
| 431 | |
| 432 // According to the Unicode standard the high and low surrogate halves | |
| 433 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF | |
| 434 // are not legal Unicode values. | |
| 435 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { | |
| 436 return hexValue; | |
| 437 } else if (hexValue <= 0x10FFFF){ | |
| 438 world.fatal('unicode values greater than 2 bytes not implemented yet'); | |
| 439 return -1; | |
| 440 } else { | |
| 441 return -1; | |
| 442 } | |
| 443 } | |
| 444 | |
| 445 Token finishDot() { | |
| 446 if (TokenizerHelpers.isDigit(_peekChar())) { | |
| 447 eatDigits(); | |
| 448 return finishNumberExtra(TokenKind.DOUBLE); | |
| 449 } else { | |
| 450 return _finishToken(TokenKind.DOT); | |
| 451 } | |
| 452 } | |
| 453 | |
| 454 Token finishIdentifier(int ch) { | |
| 455 if (_interpStack != null && _interpStack.depth == -1) { | |
| 456 _interpStack.depth = 0; | |
| 457 if (ch == 36/*$*/) { | |
| 458 return _errorToken( | |
| 459 @"illegal character after $ in string interpolation"); | |
| 460 } | |
| 461 while (_index < _text.length) { | |
| 462 if (!TokenizerHelpers.isInterpIdentifierPart(_text.charCodeAt(_index++))
) { | |
| 463 _index--; | |
| 464 break; | |
| 465 } | |
| 466 } | |
| 467 } else { | |
| 468 while (_index < _text.length) { | |
| 469 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { | |
| 470 _index--; | |
| 471 break; | |
| 472 } | |
| 473 } | |
| 474 } | |
| 475 int kind = getIdentifierKind(); | |
| 476 if (kind == TokenKind.IDENTIFIER) { | |
| 477 return _finishToken(TokenKind.IDENTIFIER); | |
| 478 } else { | |
| 479 return _finishToken(kind); | |
| 480 } | |
| 481 } | |
| 482 } | |
| OLD | NEW |