| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 interface Scanner { | |
| 6 Token tokenize(); | |
| 7 } | |
| 8 | |
| 9 /** | |
| 10 * Common base class for a Dart scanner. | |
| 11 */ | |
| 12 class AbstractScanner<T> implements Scanner { | |
| 13 abstract int advance(); | |
| 14 abstract int nextByte(); | |
| 15 abstract int peek(); | |
| 16 abstract int select(int choice, PrecedenceInfo yes, PrecedenceInfo no); | |
| 17 abstract void appendPrecenceToken(PrecedenceInfo info); | |
| 18 abstract void appendStringToken(PrecedenceInfo info, String value); | |
| 19 abstract void appendByteStringToken(PrecedenceInfo info, T value); | |
| 20 abstract void appendKeywordToken(Keyword keyword); | |
| 21 abstract void appendWhiteSpace(int next); | |
| 22 abstract void appendEofToken(); | |
| 23 abstract T asciiString(int start, int offset); | |
| 24 abstract T utf8String(int start, int offset); | |
| 25 abstract Token firstToken(); | |
| 26 abstract void beginToken(); | |
| 27 abstract void addToCharOffset(int offset); | |
| 28 abstract int get charOffset(); | |
| 29 abstract int get byteOffset(); | |
| 30 abstract void appendBeginGroup(PrecedenceInfo info, String value); | |
| 31 abstract int appendEndGroup(PrecedenceInfo info, String value, int openKind); | |
| 32 abstract void appendGt(PrecedenceInfo info, String value); | |
| 33 abstract void appendGtGt(PrecedenceInfo info, String value); | |
| 34 abstract void appendGtGtGt(PrecedenceInfo info, String value); | |
| 35 abstract void discardOpenLt(); | |
| 36 | |
| 37 // TODO(ahe): Move this class to implementation. | |
| 38 | |
| 39 Token tokenize() { | |
| 40 int next = advance(); | |
| 41 while (next !== $EOF) { | |
| 42 next = bigSwitch(next); | |
| 43 } | |
| 44 appendEofToken(); | |
| 45 return firstToken(); | |
| 46 } | |
| 47 | |
| 48 int bigSwitch(int next) { | |
| 49 beginToken(); | |
| 50 if (next === $TAB || next === $LF || next === $CR || next === $SPACE) { | |
| 51 appendWhiteSpace(next); | |
| 52 return advance(); | |
| 53 } | |
| 54 | |
| 55 if ($a <= next && next <= $z) { | |
| 56 return tokenizeKeywordOrIdentifier(next, true); | |
| 57 } | |
| 58 | |
| 59 if (($A <= next && next <= $Z) || next === $_ || next === $$) { | |
| 60 return tokenizeIdentifier(next, byteOffset, true); | |
| 61 } | |
| 62 | |
| 63 if (next === $LT) { | |
| 64 return tokenizeLessThan(next); | |
| 65 } | |
| 66 | |
| 67 if (next === $GT) { | |
| 68 return tokenizeGreaterThan(next); | |
| 69 } | |
| 70 | |
| 71 if (next === $EQ) { | |
| 72 return tokenizeEquals(next); | |
| 73 } | |
| 74 | |
| 75 if (next === $BANG) { | |
| 76 return tokenizeExclamation(next); | |
| 77 } | |
| 78 | |
| 79 if (next === $PLUS) { | |
| 80 return tokenizePlus(next); | |
| 81 } | |
| 82 | |
| 83 if (next === $MINUS) { | |
| 84 return tokenizeMinus(next); | |
| 85 } | |
| 86 | |
| 87 if (next === $STAR) { | |
| 88 return tokenizeMultiply(next); | |
| 89 } | |
| 90 | |
| 91 if (next === $PERCENT) { | |
| 92 return tokenizePercent(next); | |
| 93 } | |
| 94 | |
| 95 if (next === $AMPERSAND) { | |
| 96 return tokenizeAmpersand(next); | |
| 97 } | |
| 98 | |
| 99 if (next === $BAR) { | |
| 100 return tokenizeBar(next); | |
| 101 } | |
| 102 | |
| 103 if (next === $CARET) { | |
| 104 return tokenizeCaret(next); | |
| 105 } | |
| 106 | |
| 107 if (next === $OPEN_SQUARE_BRACKET) { | |
| 108 return tokenizeOpenSquareBracket(next); | |
| 109 } | |
| 110 | |
| 111 if (next === $TILDE) { | |
| 112 return tokenizeTilde(next); | |
| 113 } | |
| 114 | |
| 115 if (next === $BACKSLASH) { | |
| 116 appendPrecenceToken(BACKSLASH_INFO); | |
| 117 return advance(); | |
| 118 } | |
| 119 | |
| 120 if (next === $HASH) { | |
| 121 return tokenizeTag(next); | |
| 122 } | |
| 123 | |
| 124 if (next === $OPEN_PAREN) { | |
| 125 appendBeginGroup(OPEN_PAREN_INFO, "("); | |
| 126 return advance(); | |
| 127 } | |
| 128 | |
| 129 if (next === $CLOSE_PAREN) { | |
| 130 return appendEndGroup(CLOSE_PAREN_INFO, ")", OPEN_PAREN_TOKEN); | |
| 131 } | |
| 132 | |
| 133 if (next === $COMMA) { | |
| 134 appendPrecenceToken(COMMA_INFO); | |
| 135 return advance(); | |
| 136 } | |
| 137 | |
| 138 if (next === $COLON) { | |
| 139 appendPrecenceToken(COLON_INFO); | |
| 140 return advance(); | |
| 141 } | |
| 142 | |
| 143 if (next === $SEMICOLON) { | |
| 144 appendPrecenceToken(SEMICOLON_INFO); | |
| 145 discardOpenLt(); | |
| 146 return advance(); | |
| 147 } | |
| 148 | |
| 149 if (next === $QUESTION) { | |
| 150 appendPrecenceToken(QUESTION_INFO); | |
| 151 return advance(); | |
| 152 } | |
| 153 | |
| 154 if (next === $CLOSE_SQUARE_BRACKET) { | |
| 155 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, "]", | |
| 156 OPEN_SQUARE_BRACKET_TOKEN); | |
| 157 } | |
| 158 | |
| 159 if (next === $BACKPING) { | |
| 160 appendPrecenceToken(BACKPING_INFO); | |
| 161 return advance(); | |
| 162 } | |
| 163 | |
| 164 if (next === $OPEN_CURLY_BRACKET) { | |
| 165 appendBeginGroup(OPEN_CURLY_BRACKET_INFO, "{"); | |
| 166 return advance(); | |
| 167 } | |
| 168 | |
| 169 if (next === $CLOSE_CURLY_BRACKET) { | |
| 170 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}", | |
| 171 OPEN_CURLY_BRACKET_TOKEN); | |
| 172 } | |
| 173 | |
| 174 if (next === $SLASH) { | |
| 175 return tokenizeSlashOrComment(next); | |
| 176 } | |
| 177 | |
| 178 if (next === $AT) { | |
| 179 return tokenizeRawString(next); | |
| 180 } | |
| 181 | |
| 182 if (next === $DQ || next === $SQ) { | |
| 183 return tokenizeString(next, byteOffset, false); | |
| 184 } | |
| 185 | |
| 186 if (next === $PERIOD) { | |
| 187 return tokenizeDotOrNumber(next); | |
| 188 } | |
| 189 | |
| 190 if (next === $0) { | |
| 191 return tokenizeHexOrNumber(next); | |
| 192 } | |
| 193 | |
| 194 // TODO(ahe): Would a range check be faster? | |
| 195 if (next === $1 || next === $2 || next === $3 || next === $4 || next === $5 | |
| 196 || next === $6 || next === $7 || next === $8 || next === $9) { | |
| 197 return tokenizeNumber(next); | |
| 198 } | |
| 199 | |
| 200 if (next === $EOF) { | |
| 201 return $EOF; | |
| 202 } | |
| 203 if (next < 0x1f) { | |
| 204 throw new MalformedInputException("illegal character $next", charOffset); | |
| 205 } | |
| 206 | |
| 207 // The following are non-ASCII characters. | |
| 208 | |
| 209 if (next === $NBSP) { | |
| 210 appendWhiteSpace(next); | |
| 211 return advance(); | |
| 212 } | |
| 213 | |
| 214 return tokenizeIdentifier(next, byteOffset, true); | |
| 215 } | |
| 216 | |
| 217 int tokenizeTag(int next) { | |
| 218 // # or #!.*[\n\r] | |
| 219 if (byteOffset === 0) { | |
| 220 if (peek() === $BANG) { | |
| 221 do { | |
| 222 next = advance(); | |
| 223 } while (next !== $LF && next !== $CR && next !== $EOF); | |
| 224 return next; | |
| 225 } | |
| 226 } | |
| 227 appendPrecenceToken(HASH_INFO); | |
| 228 return advance(); | |
| 229 } | |
| 230 | |
| 231 int tokenizeTilde(int next) { | |
| 232 // ~ ~/ ~/= | |
| 233 next = advance(); | |
| 234 if (next === $SLASH) { | |
| 235 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO); | |
| 236 } else { | |
| 237 appendPrecenceToken(TILDE_INFO); | |
| 238 return next; | |
| 239 } | |
| 240 } | |
| 241 | |
| 242 int tokenizeOpenSquareBracket(int next) { | |
| 243 // [ [] []= | |
| 244 next = advance(); | |
| 245 if (next === $CLOSE_SQUARE_BRACKET) { | |
| 246 return select($EQ, INDEX_EQ_INFO, INDEX_INFO); | |
| 247 } else { | |
| 248 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO, "["); | |
| 249 return next; | |
| 250 } | |
| 251 } | |
| 252 | |
| 253 int tokenizeCaret(int next) { | |
| 254 // ^ ^= | |
| 255 return select($EQ, CARET_EQ_INFO, CARET_INFO); | |
| 256 } | |
| 257 | |
| 258 int tokenizeBar(int next) { | |
| 259 // | || |= | |
| 260 next = advance(); | |
| 261 if (next === $BAR) { | |
| 262 appendPrecenceToken(BAR_BAR_INFO); | |
| 263 return advance(); | |
| 264 } else if (next === $EQ) { | |
| 265 appendPrecenceToken(BAR_EQ_INFO); | |
| 266 return advance(); | |
| 267 } else { | |
| 268 appendPrecenceToken(BAR_INFO); | |
| 269 return next; | |
| 270 } | |
| 271 } | |
| 272 | |
| 273 int tokenizeAmpersand(int next) { | |
| 274 // && &= & | |
| 275 next = advance(); | |
| 276 if (next === $AMPERSAND) { | |
| 277 appendPrecenceToken(AMPERSAND_AMPERSAND_INFO); | |
| 278 return advance(); | |
| 279 } else if (next === $EQ) { | |
| 280 appendPrecenceToken(AMPERSAND_EQ_INFO); | |
| 281 return advance(); | |
| 282 } else { | |
| 283 appendPrecenceToken(AMPERSAND_INFO); | |
| 284 return next; | |
| 285 } | |
| 286 } | |
| 287 | |
| 288 int tokenizePercent(int next) { | |
| 289 // % %= | |
| 290 return select($EQ, PERCENT_EQ_INFO, PERCENT_INFO); | |
| 291 } | |
| 292 | |
| 293 int tokenizeMultiply(int next) { | |
| 294 // * *= | |
| 295 return select($EQ, STAR_EQ_INFO, STAR_INFO); | |
| 296 } | |
| 297 | |
| 298 int tokenizeMinus(int next) { | |
| 299 // - -- -= | |
| 300 next = advance(); | |
| 301 if (next === $MINUS) { | |
| 302 appendPrecenceToken(MINUS_MINUS_INFO); | |
| 303 return advance(); | |
| 304 } else if (next === $EQ) { | |
| 305 appendPrecenceToken(MINUS_EQ_INFO); | |
| 306 return advance(); | |
| 307 } else { | |
| 308 appendPrecenceToken(MINUS_INFO); | |
| 309 return next; | |
| 310 } | |
| 311 } | |
| 312 | |
| 313 | |
| 314 int tokenizePlus(int next) { | |
| 315 // + ++ += | |
| 316 next = advance(); | |
| 317 if ($PLUS === next) { | |
| 318 appendPrecenceToken(PLUS_PLUS_INFO); | |
| 319 return advance(); | |
| 320 } else if ($EQ === next) { | |
| 321 appendPrecenceToken(PLUS_EQ_INFO); | |
| 322 return advance(); | |
| 323 } else { | |
| 324 appendPrecenceToken(PLUS_INFO); | |
| 325 return next; | |
| 326 } | |
| 327 } | |
| 328 | |
| 329 int tokenizeExclamation(int next) { | |
| 330 // ! != !== | |
| 331 next = advance(); | |
| 332 if (next === $EQ) { | |
| 333 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO); | |
| 334 } | |
| 335 appendPrecenceToken(BANG_INFO); | |
| 336 return next; | |
| 337 } | |
| 338 | |
| 339 int tokenizeEquals(int next) { | |
| 340 // = == === | |
| 341 next = advance(); | |
| 342 if (next === $EQ) { | |
| 343 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO); | |
| 344 } else if (next === $GT) { | |
| 345 appendPrecenceToken(FUNCTION_INFO); | |
| 346 return advance(); | |
| 347 } | |
| 348 appendPrecenceToken(EQ_INFO); | |
| 349 return next; | |
| 350 } | |
| 351 | |
| 352 int tokenizeGreaterThan(int next) { | |
| 353 // > >= >> >>= >>> >>>= | |
| 354 next = advance(); | |
| 355 if ($EQ === next) { | |
| 356 appendPrecenceToken(GT_EQ_INFO); | |
| 357 return advance(); | |
| 358 } else if ($GT === next) { | |
| 359 next = advance(); | |
| 360 if ($EQ === next) { | |
| 361 appendPrecenceToken(GT_GT_EQ_INFO); | |
| 362 return advance(); | |
| 363 } else if ($GT === next) { | |
| 364 next = advance(); | |
| 365 if (next === $EQ) { | |
| 366 appendPrecenceToken(GT_GT_GT_EQ_INFO); | |
| 367 return advance(); | |
| 368 } else { | |
| 369 appendGtGtGt(GT_GT_GT_INFO, ">>>"); | |
| 370 return next; | |
| 371 } | |
| 372 } else { | |
| 373 appendGtGt(GT_GT_INFO, ">>"); | |
| 374 return next; | |
| 375 } | |
| 376 } else { | |
| 377 appendGt(GT_INFO, ">"); | |
| 378 return next; | |
| 379 } | |
| 380 } | |
| 381 | |
| 382 int tokenizeLessThan(int next) { | |
| 383 // < <= << <<= | |
| 384 next = advance(); | |
| 385 if ($EQ === next) { | |
| 386 appendPrecenceToken(LT_EQ_INFO); | |
| 387 return advance(); | |
| 388 } else if ($LT === next) { | |
| 389 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO); | |
| 390 } else { | |
| 391 appendBeginGroup(LT_INFO, "<"); | |
| 392 return next; | |
| 393 } | |
| 394 } | |
| 395 | |
| 396 int tokenizeNumber(int next) { | |
| 397 int start = byteOffset; | |
| 398 while (true) { | |
| 399 next = advance(); | |
| 400 if ($0 <= next && next <= $9) { | |
| 401 continue; | |
| 402 } else if (next === $PERIOD) { | |
| 403 return tokenizeFractionPart(advance(), start); | |
| 404 } else if (next === $e || next === $E || next === $d || next === $D) { | |
| 405 return tokenizeFractionPart(next, start); | |
| 406 } else { | |
| 407 appendByteStringToken(INT_INFO, asciiString(start, 0)); | |
| 408 return next; | |
| 409 } | |
| 410 } | |
| 411 } | |
| 412 | |
| 413 int tokenizeHexOrNumber(int next) { | |
| 414 int x = peek(); | |
| 415 if (x === $x || x === $X) { | |
| 416 advance(); | |
| 417 return tokenizeHex(x); | |
| 418 } | |
| 419 return tokenizeNumber(next); | |
| 420 } | |
| 421 | |
| 422 int tokenizeHex(int next) { | |
| 423 int start = byteOffset - 1; | |
| 424 bool hasDigits = false; | |
| 425 while (true) { | |
| 426 next = advance(); | |
| 427 if (($0 <= next && next <= $9) | |
| 428 || ($A <= next && next <= $F) | |
| 429 || ($a <= next && next <= $f)) { | |
| 430 hasDigits = true; | |
| 431 } else { | |
| 432 if (!hasDigits) { | |
| 433 throw new MalformedInputException("hex digit expected", charOffset); | |
| 434 } | |
| 435 appendByteStringToken(HEXADECIMAL_INFO, asciiString(start, 0)); | |
| 436 return next; | |
| 437 } | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 int tokenizeDotOrNumber(int next) { | |
| 442 int start = byteOffset; | |
| 443 next = advance(); | |
| 444 if (($0 <= next && next <= $9)) { | |
| 445 return tokenizeFractionPart(next, start); | |
| 446 } else if ($PERIOD === next) { | |
| 447 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO); | |
| 448 } else { | |
| 449 appendPrecenceToken(PERIOD_INFO); | |
| 450 return next; | |
| 451 } | |
| 452 } | |
| 453 | |
| 454 int tokenizeFractionPart(int next, int start) { | |
| 455 bool done = false; | |
| 456 bool hasDigit = false; | |
| 457 LOOP: while (!done) { | |
| 458 if ($0 <= next && next <= $9) { | |
| 459 hasDigit = true; | |
| 460 } else if ($e === next || $E === next) { | |
| 461 hasDigit = true; | |
| 462 next = tokenizeExponent(advance()); | |
| 463 done = true; | |
| 464 continue LOOP; | |
| 465 } else { | |
| 466 done = true; | |
| 467 continue LOOP; | |
| 468 } | |
| 469 next = advance(); | |
| 470 } | |
| 471 if (!hasDigit) { | |
| 472 appendByteStringToken(INT_INFO, asciiString(start, -1)); | |
| 473 // TODO(ahe): Wrong offset for the period. | |
| 474 appendPrecenceToken(PERIOD_INFO); | |
| 475 return bigSwitch(next); | |
| 476 } | |
| 477 if (next === $d || next === $D) { | |
| 478 next = advance(); | |
| 479 } | |
| 480 appendByteStringToken(DOUBLE_INFO, asciiString(start, 0)); | |
| 481 return next; | |
| 482 } | |
| 483 | |
| 484 int tokenizeExponent(int next) { | |
| 485 if (next === $PLUS || next === $MINUS) { | |
| 486 next = advance(); | |
| 487 } | |
| 488 bool hasDigits = false; | |
| 489 while (true) { | |
| 490 if ($0 <= next && next <= $9) { | |
| 491 hasDigits = true; | |
| 492 } else { | |
| 493 if (!hasDigits) { | |
| 494 throw new MalformedInputException("digit expected", charOffset); | |
| 495 } | |
| 496 return next; | |
| 497 } | |
| 498 next = advance(); | |
| 499 } | |
| 500 } | |
| 501 | |
| 502 int tokenizeSlashOrComment(int next) { | |
| 503 next = advance(); | |
| 504 if ($STAR === next) { | |
| 505 return tokenizeMultiLineComment(next); | |
| 506 } else if ($SLASH === next) { | |
| 507 return tokenizeSingleLineComment(next); | |
| 508 } else if ($EQ === next) { | |
| 509 appendPrecenceToken(SLASH_EQ_INFO); | |
| 510 return advance(); | |
| 511 } else { | |
| 512 appendPrecenceToken(SLASH_INFO); | |
| 513 return next; | |
| 514 } | |
| 515 } | |
| 516 | |
| 517 int tokenizeSingleLineComment(int next) { | |
| 518 while (true) { | |
| 519 next = advance(); | |
| 520 if ($LF === next || $CR === next || $EOF === next) { | |
| 521 return next; | |
| 522 } | |
| 523 } | |
| 524 } | |
| 525 | |
| 526 int tokenizeMultiLineComment(int next) { | |
| 527 int nesting = 1; | |
| 528 next = advance(); | |
| 529 while (true) { | |
| 530 if ($EOF === next) { | |
| 531 // TODO(ahe): Report error. | |
| 532 return next; | |
| 533 } else if ($STAR === next) { | |
| 534 next = advance(); | |
| 535 if ($SLASH === next) { | |
| 536 --nesting; | |
| 537 if (0 === nesting) { | |
| 538 return advance(); | |
| 539 } else { | |
| 540 next = advance(); | |
| 541 } | |
| 542 } | |
| 543 } else if ($SLASH === next) { | |
| 544 next = advance(); | |
| 545 if ($STAR === next) { | |
| 546 next = advance(); | |
| 547 ++nesting; | |
| 548 } | |
| 549 } else { | |
| 550 next = advance(); | |
| 551 } | |
| 552 } | |
| 553 } | |
| 554 | |
| 555 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) { | |
| 556 KeywordState state = KeywordState.KEYWORD_STATE; | |
| 557 int start = byteOffset; | |
| 558 while (state !== null && $a <= next && next <= $z) { | |
| 559 state = state.next(next); | |
| 560 next = advance(); | |
| 561 } | |
| 562 if (state === null || state.keyword === null) { | |
| 563 return tokenizeIdentifier(next, start, allowDollar); | |
| 564 } | |
| 565 if (($A <= next && next <= $Z) || | |
| 566 ($0 <= next && next <= $9) || | |
| 567 next === $_ || | |
| 568 next === $$) { | |
| 569 return tokenizeIdentifier(next, start, allowDollar); | |
| 570 } else if (next < 128) { | |
| 571 appendKeywordToken(state.keyword); | |
| 572 return next; | |
| 573 } else { | |
| 574 return tokenizeIdentifier(next, start, allowDollar); | |
| 575 } | |
| 576 } | |
| 577 | |
| 578 int tokenizeIdentifier(int next, int start, bool allowDollar) { | |
| 579 bool isAscii = true; | |
| 580 while (true) { | |
| 581 if (($a <= next && next <= $z) || | |
| 582 ($A <= next && next <= $Z) || | |
| 583 ($0 <= next && next <= $9) || | |
| 584 next === $_ || | |
| 585 (next === $$ && allowDollar)) { | |
| 586 next = advance(); | |
| 587 } else if (next < 128) { | |
| 588 if (isAscii) { | |
| 589 appendByteStringToken(IDENTIFIER_INFO, asciiString(start, 0)); | |
| 590 } else { | |
| 591 appendByteStringToken(IDENTIFIER_INFO, utf8String(start, -1)); | |
| 592 } | |
| 593 return next; | |
| 594 } else { | |
| 595 int nonAsciiStart = byteOffset; | |
| 596 do { | |
| 597 next = nextByte(); | |
| 598 } while (next > 127); | |
| 599 String string = utf8String(nonAsciiStart, -1).slowToString(); | |
| 600 isAscii = false; | |
| 601 int byteLength = nonAsciiStart - byteOffset; | |
| 602 addToCharOffset(string.length - byteLength); | |
| 603 } | |
| 604 } | |
| 605 } | |
| 606 | |
| 607 int tokenizeRawString(int next) { | |
| 608 int start = byteOffset; | |
| 609 next = advance(); | |
| 610 if (next === $DQ || next === $SQ) { | |
| 611 return tokenizeString(next, start, true); | |
| 612 } else { | |
| 613 throw new MalformedInputException("expected ' or \"", charOffset); | |
| 614 } | |
| 615 } | |
| 616 | |
| 617 int tokenizeString(int next, int start, bool raw) { | |
| 618 int quoteChar = next; | |
| 619 next = advance(); | |
| 620 if (quoteChar === next) { | |
| 621 next = advance(); | |
| 622 if (quoteChar === next) { | |
| 623 // Multiline string. | |
| 624 return tokenizeMultiLineString(quoteChar, start, raw); | |
| 625 } else { | |
| 626 // Empty string. | |
| 627 appendByteStringToken(STRING_INFO, utf8String(start, -1)); | |
| 628 return next; | |
| 629 } | |
| 630 } | |
| 631 if (raw) { | |
| 632 return tokenizeSingleLineRawString(next, quoteChar, start); | |
| 633 } else { | |
| 634 return tokenizeSingleLineString(next, quoteChar, start); | |
| 635 } | |
| 636 } | |
| 637 | |
| 638 static bool isHexDigit(int character) { | |
| 639 if ($0 <= character && character <= $9) return true; | |
| 640 character |= 0x20; | |
| 641 return ($a <= character && character <= $f); | |
| 642 } | |
| 643 | |
| 644 int tokenizeSingleLineString(int next, int quoteChar, int start) { | |
| 645 while (next !== quoteChar) { | |
| 646 if (next === $BACKSLASH) { | |
| 647 next = advance(); | |
| 648 } else if (next === $$) { | |
| 649 next = tokenizeStringInterpolation(start); | |
| 650 start = byteOffset; | |
| 651 continue; | |
| 652 } | |
| 653 if (next <= $CR && (next === $LF || next === $CR || next === $EOF)) { | |
| 654 throw new MalformedInputException("unterminated string literal", | |
| 655 charOffset); | |
| 656 } | |
| 657 next = advance(); | |
| 658 } | |
| 659 appendByteStringToken(STRING_INFO, utf8String(start, 0)); | |
| 660 return advance(); | |
| 661 } | |
| 662 | |
| 663 int tokenizeStringInterpolation(int start) { | |
| 664 beginToken(); | |
| 665 int next = advance(); | |
| 666 if (next === $OPEN_CURLY_BRACKET) { | |
| 667 return tokenizeInterpolatedExpression(next, start); | |
| 668 } else { | |
| 669 return tokenizeInterpolatedIdentifier(next, start); | |
| 670 } | |
| 671 } | |
| 672 | |
| 673 int tokenizeInterpolatedExpression(int next, int start) { | |
| 674 appendByteStringToken(STRING_INFO, utf8String(start, -2)); | |
| 675 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${"); | |
| 676 next = advance(); | |
| 677 while (next !== $EOF && next !== $STX) { | |
| 678 next = bigSwitch(next); | |
| 679 } | |
| 680 if (next === $EOF) return next; | |
| 681 return advance(); | |
| 682 } | |
| 683 | |
| 684 int tokenizeInterpolatedIdentifier(int next, int start) { | |
| 685 appendByteStringToken(STRING_INFO, utf8String(start, -2)); | |
| 686 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${"); | |
| 687 next = tokenizeKeywordOrIdentifier(next, false); | |
| 688 appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}", OPEN_CURLY_BRACKET_TOKEN); | |
| 689 return next; | |
| 690 } | |
| 691 | |
| 692 int tokenizeSingleLineRawString(int next, int quoteChar, int start) { | |
| 693 next = advance(); | |
| 694 while (next != $EOF) { | |
| 695 if (next === quoteChar) { | |
| 696 appendByteStringToken(STRING_INFO, utf8String(start, 0)); | |
| 697 return advance(); | |
| 698 } else if (next === $LF || next === $CR) { | |
| 699 throw new MalformedInputException("unterminated string literal", | |
| 700 charOffset); | |
| 701 } | |
| 702 next = advance(); | |
| 703 } | |
| 704 throw new MalformedInputException("unterminated string literal", | |
| 705 charOffset); | |
| 706 } | |
| 707 | |
| 708 int tokenizeMultiLineRawString(int quoteChar, int start) { | |
| 709 int next = advance(); | |
| 710 outer: while (next !== $EOF) { | |
| 711 while (next !== quoteChar) { | |
| 712 next = advance(); | |
| 713 if (next === $EOF) break outer; | |
| 714 } | |
| 715 next = advance(); | |
| 716 if (next === quoteChar) { | |
| 717 next = advance(); | |
| 718 if (next === quoteChar) { | |
| 719 appendByteStringToken(STRING_INFO, utf8String(start, 0)); | |
| 720 return advance(); | |
| 721 } | |
| 722 } | |
| 723 } | |
| 724 throw new MalformedInputException("unterminated string literal", | |
| 725 charOffset); | |
| 726 } | |
| 727 | |
| 728 int tokenizeMultiLineString(int quoteChar, int start, bool raw) { | |
| 729 if (raw) return tokenizeMultiLineRawString(quoteChar, start); | |
| 730 int next = advance(); | |
| 731 while (next !== $EOF) { | |
| 732 if (next === $$) { | |
| 733 next = tokenizeStringInterpolation(start); | |
| 734 start = byteOffset; | |
| 735 continue; | |
| 736 } | |
| 737 if (next === quoteChar) { | |
| 738 next = advance(); | |
| 739 if (next === quoteChar) { | |
| 740 next = advance(); | |
| 741 if (next === quoteChar) { | |
| 742 appendByteStringToken(STRING_INFO, utf8String(start, 0)); | |
| 743 return advance(); | |
| 744 } | |
| 745 } | |
| 746 continue; | |
| 747 } | |
| 748 if (next === $BACKSLASH) { | |
| 749 next = advance(); | |
| 750 if (next === $EOF) break; | |
| 751 } | |
| 752 next = advance(); | |
| 753 } | |
| 754 throw new MalformedInputException("unterminated string literal", | |
| 755 charOffset); | |
| 756 } | |
| 757 } | |
| 758 | |
| 759 class MalformedInputException { | |
| 760 final String message; | |
| 761 final position; | |
| 762 MalformedInputException(this.message, this.position); | |
| 763 toString() => message; | |
| 764 } | |
| OLD | NEW |