OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 // Generated by scripts/tokenizer_gen.py. |
| 5 |
| 6 |
| 7 interface TokenSource { |
| 8 Token next(); |
| 9 } |
| 10 |
| 11 class InterpStack { |
| 12 InterpStack next, previous; |
| 13 final int quote; |
| 14 final bool isMultiline; |
| 15 int depth; |
| 16 |
| 17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1; |
| 18 |
| 19 InterpStack pop() { |
| 20 return this.previous; |
| 21 } |
| 22 |
| 23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) { |
| 24 var newStack = new InterpStack(stack, quote, isMultiline); |
| 25 if (stack != null) newStack.previous = stack; |
| 26 return newStack; |
| 27 } |
| 28 } |
| 29 |
| 30 /** |
| 31 * The base class for our tokenizer. The hand coded parts are in this file, with |
| 32 * the generated parts in the subclass Tokenizer. |
| 33 */ |
| 34 class CSSTokenizerBase extends TokenizerHelpers implements TokenSource { |
| 35 final SourceFile _source; |
| 36 final bool _skipWhitespace; |
| 37 String _text; |
| 38 |
| 39 int _index; |
| 40 int _startIndex; |
| 41 |
| 42 /** Keeps track of string interpolation state. */ |
| 43 InterpStack _interpStack; |
| 44 |
| 45 CSSTokenizerBase(this._source, this._skipWhitespace, [index = 0]) |
| 46 : this._index = index { |
| 47 _text = _source.text; |
| 48 } |
| 49 |
| 50 abstract Token next(); |
| 51 abstract int getIdentifierKind(); |
| 52 |
| 53 int _nextChar() { |
| 54 if (_index < _text.length) { |
| 55 return _text.charCodeAt(_index++); |
| 56 } else { |
| 57 return 0; |
| 58 } |
| 59 } |
| 60 |
| 61 int _peekChar() { |
| 62 if (_index < _text.length) { |
| 63 return _text.charCodeAt(_index); |
| 64 } else { |
| 65 return 0; |
| 66 } |
| 67 } |
| 68 |
| 69 bool _maybeEatChar(int ch) { |
| 70 if (_index < _text.length) { |
| 71 if (_text.charCodeAt(_index) == ch) { |
| 72 _index++; |
| 73 return true; |
| 74 } else { |
| 75 return false; |
| 76 } |
| 77 } else { |
| 78 return false; |
| 79 } |
| 80 } |
| 81 |
| 82 String _tokenText() { |
| 83 if (_index < _text.length) { |
| 84 return _text.substring(_startIndex, _index); |
| 85 } else { |
| 86 return _text.substring(_startIndex, _text.length); |
| 87 } |
| 88 } |
| 89 |
| 90 Token _finishToken(int kind) { |
| 91 return new Token(kind, _source, _startIndex, _index); |
| 92 } |
| 93 |
| 94 Token _errorToken([String message = null]) { |
| 95 return new ErrorToken( |
| 96 TokenKind.ERROR, _source, _startIndex, _index, message); |
| 97 } |
| 98 |
| 99 Token finishWhitespace() { |
| 100 _index--; |
| 101 while (_index < _text.length) { |
| 102 final ch = _text.charCodeAt(_index++); |
| 103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) { |
| 104 // do nothing |
| 105 } else if (ch == 10/*'\n'*/) { |
| 106 if (!_skipWhitespace) { |
| 107 return _finishToken(TokenKind.WHITESPACE); // note the newline? |
| 108 } |
| 109 } else { |
| 110 _index--; |
| 111 if (_skipWhitespace) { |
| 112 return next(); |
| 113 } else { |
| 114 return _finishToken(TokenKind.WHITESPACE); |
| 115 } |
| 116 } |
| 117 |
| 118 } |
| 119 return _finishToken(TokenKind.END_OF_FILE); |
| 120 } |
| 121 |
| 122 Token finishSingleLineComment() { |
| 123 while (true) { |
| 124 int ch = _nextChar(); |
| 125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { |
| 126 if (_skipWhitespace) { |
| 127 return next(); |
| 128 } else { |
| 129 return _finishToken(TokenKind.COMMENT); |
| 130 } |
| 131 } |
| 132 } |
| 133 } |
| 134 |
| 135 Token finishMultiLineComment() { |
| 136 int nesting = 1; |
| 137 do { |
| 138 int ch = _nextChar(); |
| 139 if (ch == 0) { |
| 140 return _errorToken(); |
| 141 } else if (ch == 42/*'*'*/) { |
| 142 if (_maybeEatChar(47/*'/'*/)) { |
| 143 nesting--; |
| 144 } |
| 145 } else if (ch == 47/*'/'*/) { |
| 146 if (_maybeEatChar(42/*'*'*/)) { |
| 147 nesting++; |
| 148 } |
| 149 } |
| 150 } while (nesting > 0); |
| 151 |
| 152 if (_skipWhitespace) { |
| 153 return next(); |
| 154 } else { |
| 155 return _finishToken(TokenKind.COMMENT); |
| 156 } |
| 157 } |
| 158 |
| 159 void eatDigits() { |
| 160 while (_index < _text.length) { |
| 161 if (isDigit(_text.charCodeAt(_index))) { |
| 162 _index++; |
| 163 } else { |
| 164 return; |
| 165 } |
| 166 } |
| 167 } |
| 168 |
| 169 static int _hexDigit(int c) { |
| 170 if(c >= 48/*0*/ && c <= 57/*9*/) { |
| 171 return c - 48; |
| 172 } else if (c >= 97/*a*/ && c <= 102/*f*/) { |
| 173 return c - 87; |
| 174 } else if (c >= 65/*A*/ && c <= 70/*F*/) { |
| 175 return c - 55; |
| 176 } else { |
| 177 return -1; |
| 178 } |
| 179 } |
| 180 |
| 181 int readHex([int hexLength]) { |
| 182 int maxIndex; |
| 183 if (hexLength === null) { |
| 184 maxIndex = _text.length - 1; |
| 185 } else { |
| 186 // TODO(jimhug): What if this is too long? |
| 187 maxIndex = _index + hexLength; |
| 188 if (maxIndex >= _text.length) return -1; |
| 189 } |
| 190 var result = 0; |
| 191 while (_index < maxIndex) { |
| 192 final digit = _hexDigit(_text.charCodeAt(_index)); |
| 193 if (digit == -1) { |
| 194 if (hexLength === null) { |
| 195 return result; |
| 196 } else { |
| 197 return -1; |
| 198 } |
| 199 } |
| 200 _hexDigit(_text.charCodeAt(_index)); |
| 201 // Multiply by 16 rather than shift by 4 since that will result in a |
| 202 // correct value for numbers that exceed the 32 bit precision of JS |
| 203 // 'integers'. |
| 204 // TODO: Figure out a better solution to integer truncation. Issue 638. |
| 205 result = (result * 16) + digit; |
| 206 _index++; |
| 207 } |
| 208 |
| 209 return result; |
| 210 } |
| 211 |
| 212 Token finishNumber() { |
| 213 eatDigits(); |
| 214 |
| 215 if (_peekChar() == 46/*.*/) { |
| 216 // Handle the case of 1.toString(). |
| 217 _nextChar(); |
| 218 if (isDigit(_peekChar())) { |
| 219 eatDigits(); |
| 220 return finishNumberExtra(TokenKind.DOUBLE); |
| 221 } else { |
| 222 _index--; |
| 223 } |
| 224 } |
| 225 |
| 226 return finishNumberExtra(TokenKind.INTEGER); |
| 227 } |
| 228 |
| 229 Token finishNumberExtra(int kind) { |
| 230 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) { |
| 231 kind = TokenKind.DOUBLE; |
| 232 _maybeEatChar(45/*-*/); |
| 233 _maybeEatChar(43/*+*/); |
| 234 eatDigits(); |
| 235 } |
| 236 if (_peekChar() != 0 && isIdentifierStart(_peekChar())) { |
| 237 _nextChar(); |
| 238 return _errorToken("illegal character in number"); |
| 239 } |
| 240 |
| 241 return _finishToken(kind); |
| 242 } |
| 243 |
| 244 Token _makeStringToken(List<int> buf, bool isPart) { |
| 245 final s = new String.fromCharCodes(buf); |
| 246 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; |
| 247 return new LiteralToken(kind, _source, _startIndex, _index, s); |
| 248 } |
| 249 |
| 250 Token _makeRawStringToken(bool isMultiline) { |
| 251 String s; |
| 252 if (isMultiline) { |
| 253 // Skip initial newline in multiline strings |
| 254 int start = _startIndex + 4; |
| 255 if (_source.text[start] == '\n') start++; |
| 256 s = _source.text.substring(start, _index - 3); |
| 257 } else { |
| 258 s = _source.text.substring(_startIndex + 2, _index - 1); |
| 259 } |
| 260 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s); |
| 261 } |
| 262 |
| 263 Token finishMultilineString(int quote) { |
| 264 var buf = <int>[]; |
| 265 while (true) { |
| 266 int ch = _nextChar(); |
| 267 if (ch == 0) { |
| 268 return _errorToken(); |
| 269 } else if (ch == quote) { |
| 270 if (_maybeEatChar(quote)) { |
| 271 if (_maybeEatChar(quote)) { |
| 272 return _makeStringToken(buf, false); |
| 273 } |
| 274 buf.add(quote); |
| 275 } |
| 276 buf.add(quote); |
| 277 } else if (ch == 36/*$*/) { |
| 278 // start of string interp |
| 279 _interpStack = InterpStack.push(_interpStack, quote, true); |
| 280 return _makeStringToken(buf, true); |
| 281 } else if (ch == 92/*\*/) { |
| 282 var escapeVal = readEscapeSequence(); |
| 283 if (escapeVal == -1) { |
| 284 return _errorToken("invalid hex escape sequence"); |
| 285 } else { |
| 286 buf.add(escapeVal); |
| 287 } |
| 288 } else { |
| 289 buf.add(ch); |
| 290 } |
| 291 } |
| 292 } |
| 293 |
| 294 Token _finishOpenBrace() { |
| 295 if (_interpStack != null) { |
| 296 if (_interpStack.depth == -1) { |
| 297 _interpStack.depth = 1; |
| 298 } else { |
| 299 assert(_interpStack.depth >= 0); |
| 300 _interpStack.depth += 1; |
| 301 } |
| 302 } |
| 303 return _finishToken(TokenKind.LBRACE); |
| 304 } |
| 305 |
| 306 Token _finishCloseBrace() { |
| 307 if (_interpStack != null) { |
| 308 _interpStack.depth -= 1; |
| 309 assert(_interpStack.depth >= 0); |
| 310 } |
| 311 return _finishToken(TokenKind.RBRACE); |
| 312 } |
| 313 |
| 314 Token finishString(int quote) { |
| 315 if (_maybeEatChar(quote)) { |
| 316 if (_maybeEatChar(quote)) { |
| 317 // skip an initial newline |
| 318 _maybeEatChar(10/*'\n'*/); |
| 319 return finishMultilineString(quote); |
| 320 } else { |
| 321 return _makeStringToken(new List<int>(), false); |
| 322 } |
| 323 } |
| 324 return finishStringBody(quote); |
| 325 } |
| 326 |
| 327 Token finishRawString(int quote) { |
| 328 if (_maybeEatChar(quote)) { |
| 329 if (_maybeEatChar(quote)) { |
| 330 return finishMultilineRawString(quote); |
| 331 } else { |
| 332 return _makeStringToken(<int>[], false); |
| 333 } |
| 334 } |
| 335 while (true) { |
| 336 int ch = _nextChar(); |
| 337 if (ch == quote) { |
| 338 return _makeRawStringToken(false); |
| 339 } else if (ch == 0) { |
| 340 return _errorToken(); |
| 341 } |
| 342 } |
| 343 } |
| 344 |
| 345 Token finishMultilineRawString(int quote) { |
| 346 while (true) { |
| 347 int ch = _nextChar(); |
| 348 if (ch == 0) { |
| 349 return _errorToken(); |
| 350 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { |
| 351 return _makeRawStringToken(true); |
| 352 } |
| 353 } |
| 354 } |
| 355 |
| 356 Token finishStringBody(int quote) { |
| 357 var buf = new List<int>(); |
| 358 while (true) { |
| 359 int ch = _nextChar(); |
| 360 if (ch == quote) { |
| 361 return _makeStringToken(buf, false); |
| 362 } else if (ch == 36/*$*/) { |
| 363 // start of string interp |
| 364 _interpStack = InterpStack.push(_interpStack, quote, false); |
| 365 return _makeStringToken(buf, true); |
| 366 } else if (ch == 0) { |
| 367 return _errorToken(); |
| 368 } else if (ch == 92/*\*/) { |
| 369 var escapeVal = readEscapeSequence(); |
| 370 if (escapeVal == -1) { |
| 371 return _errorToken("invalid hex escape sequence"); |
| 372 } else { |
| 373 buf.add(escapeVal); |
| 374 } |
| 375 } else { |
| 376 buf.add(ch); |
| 377 } |
| 378 } |
| 379 } |
| 380 |
| 381 int readEscapeSequence() { |
| 382 final ch = _nextChar(); |
| 383 int hexValue; |
| 384 switch (ch) { |
| 385 case 110/*n*/: |
| 386 return 0x0a/*'\n'*/; |
| 387 case 114/*r*/: |
| 388 return 0x0d/*'\r'*/; |
| 389 case 102/*f*/: |
| 390 return 0x0c/*'\f'*/; |
| 391 case 98/*b*/: |
| 392 return 0x08/*'\b'*/; |
| 393 case 116/*t*/: |
| 394 return 0x09/*'\t'*/; |
| 395 case 118/*v*/: |
| 396 return 0x0b/*'\v'*/; |
| 397 case 120/*x*/: |
| 398 hexValue = readHex(2); |
| 399 break; |
| 400 case 117/*u*/: |
| 401 if (_maybeEatChar(123/*{*/)) { |
| 402 hexValue = readHex(); |
| 403 if (!_maybeEatChar(125/*}*/)) { |
| 404 return -1; |
| 405 } else { |
| 406 break; |
| 407 } |
| 408 } else { |
| 409 hexValue = readHex(4); |
| 410 break; |
| 411 } |
| 412 default: return ch; |
| 413 } |
| 414 |
| 415 if (hexValue == -1) return -1; |
| 416 |
| 417 // According to the Unicode standard the high and low surrogate halves |
| 418 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF |
| 419 // are not legal Unicode values. |
| 420 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { |
| 421 return hexValue; |
| 422 } else if (hexValue <= 0x10FFFF){ |
| 423 world.fatal('unicode values greater than 2 bytes not implemented yet'); |
| 424 return -1; |
| 425 } else { |
| 426 return -1; |
| 427 } |
| 428 } |
| 429 |
| 430 Token finishDot() { |
| 431 if (isDigit(_peekChar())) { |
| 432 eatDigits(); |
| 433 return finishNumberExtra(TokenKind.DOUBLE); |
| 434 } else { |
| 435 return _finishToken(TokenKind.DOT); |
| 436 } |
| 437 } |
| 438 |
| 439 Token finishIdentifier(int ch) { |
| 440 while (_index < _text.length) { |
| 441 if (!isIdentifierPart(_text.charCodeAt(_index++))) { |
| 442 _index--; |
| 443 break; |
| 444 } |
| 445 } |
| 446 int kind = getIdentifierKind(); |
| 447 if (kind == TokenKind.IDENTIFIER) { |
| 448 return _finishToken(TokenKind.IDENTIFIER); |
| 449 } else { |
| 450 return _finishToken(kind); |
| 451 } |
| 452 } |
| 453 } |
| 454 |
OLD | NEW |