utils/template/tokenizer_base.dart - Issue 9695048: Template parser

Side by Side Diff: utils/template/tokenizer_base.dart

Issue 9695048: Template parser (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Siggi's comments Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4 // Generated by scripts/tokenizer_gen.py.

	5

	6

	7 interface TokenSource {

	8 Token next();

	9 }

	10

	11 class InterpStack {

	12 InterpStack next, previous;

	13 final int quote;

	14 final bool isMultiline;

	15 int depth;

	16

	17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1;

	18

	19 InterpStack pop() {

	20 return this.previous;

	21 }

	22

	23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) {

	24 var newStack = new InterpStack(stack, quote, isMultiline);

	25 if (stack != null) newStack.previous = stack;

	26 return newStack;

	27 }

	28 }

	29

	30 /**

	31 * The base class for our tokenizer. The hand coded parts are in this file, with

	32 * the generated parts in the subclass Tokenizer.

	33 */

	34 class TokenizerBase extends TokenizerHelpers implements TokenSource {

	35 final SourceFile _source;

	36 final bool _skipWhitespace;

	37 String _text;

	38

	39 int _index;

	40 int _startIndex;

	41

	42 /** Keeps track of string interpolation state. */

	43 InterpStack _interpStack;

	44

	45 TokenizerBase(this._source, this._skipWhitespace, [index = 0])

	46 : this._index = index {

	47 _text = _source.text;

	48 }

	49

	50 abstract Token next();

	51 abstract int getIdentifierKind();

	52

	53 int _nextChar() {

	54 if (_index < _text.length) {

	55 return _text.charCodeAt(_index++);

	56 } else {

	57 return 0;

	58 }

	59 }

	60

	61 int _peekChar() {

	62 if (_index < _text.length) {

	63 return _text.charCodeAt(_index);

	64 } else {

	65 return 0;

	66 }

	67 }

	68

	69 bool _maybeEatChar(int ch) {

	70 if (_index < _text.length) {

	71 if (_text.charCodeAt(_index) == ch) {

	72 _index++;

	73 return true;

	74 } else {

	75 return false;

	76 }

	77 } else {

	78 return false;

	79 }

	80 }

	81

	82 String _tokenText() {

	83 if (_index < _text.length) {

	84 return _text.substring(_startIndex, _index);

	85 } else {

	86 return _text.substring(_startIndex, _text.length);

	87 }

	88 }

	89

	90 Token _finishToken(int kind) {

	91 return new Token(kind, _source, _startIndex, _index);

	92 }

	93

	94 Token _errorToken([String message = null]) {

	95 return new ErrorToken(

	96 TokenKind.ERROR, _source, _startIndex, _index, message);

	97 }

	98

	99 Token finishWhitespace() {

	100 _index--;

	101 while (_index < _text.length) {

	102 final ch = _text.charCodeAt(_index++);

	103 if (ch == 32/' '/ \|\| ch == 9/'\t'/ \|\| ch == 13/'\r'/) {

	104 // do nothing

	105 } else if (ch == 10/'\n'/) {

	106 if (!_skipWhitespace) {

	107 return _finishToken(TokenKind.WHITESPACE); // note the newline?

	108 }

	109 } else {

	110 _index--;

	111 if (_skipWhitespace) {

	112 return next();

	113 } else {

	114 return _finishToken(TokenKind.WHITESPACE);

	115 }

	116 }

	117

	118 }

	119 return _finishToken(TokenKind.END_OF_FILE);

	120 }

	121

	122 Token finishSingleLineComment() {

	123 while (true) {

	124 int ch = _nextChar();

	125 if (ch == 0 \|\| ch == 10/'\n'/ \|\| ch == 13/'\r'/) {

	126 if (_skipWhitespace) {

	127 return next();

	128 } else {

	129 return _finishToken(TokenKind.COMMENT);

	130 }

	131 }

	132 }

	133 }

	134

	135 Token finishMultiLineComment() {

	136 int nesting = 1;

	137 do {

	138 int ch = _nextChar();

	139 if (ch == 0) {

	140 return _errorToken();

	141 } else if (ch == 42/''*/) {

	142 if (_maybeEatChar(47/'/'/)) {

	143 nesting--;

	144 }

	145 } else if (ch == 47/'/'/) {

	146 if (_maybeEatChar(42/''*/)) {

	147 nesting++;

	148 }

	149 }

	150 } while (nesting > 0);

	151

	152 if (_skipWhitespace) {

	153 return next();

	154 } else {

	155 return _finishToken(TokenKind.COMMENT);

	156 }

	157 }

	158

	159 void eatDigits() {

	160 while (_index < _text.length) {

	161 if (isDigit(_text.charCodeAt(_index))) {

	162 _index++;

	163 } else {

	164 return;

	165 }

	166 }

	167 }

	168

	169 static int _hexDigit(int c) {

	170 if(c >= 48/0/ && c <= 57/9/) {

	171 return c - 48;

	172 } else if (c >= 97/a/ && c <= 102/f/) {

	173 return c - 87;

	174 } else if (c >= 65/A/ && c <= 70/F/) {

	175 return c - 55;

	176 } else {

	177 return -1;

	178 }

	179 }

	180

	181 int readHex([int hexLength]) {

	182 int maxIndex;

	183 if (hexLength === null) {

	184 maxIndex = _text.length - 1;

	185 } else {

	186 // TODO(jimhug): What if this is too long?

	187 maxIndex = _index + hexLength;

	188 if (maxIndex >= _text.length) return -1;

	189 }

	190 var result = 0;

	191 while (_index < maxIndex) {

	192 final digit = _hexDigit(_text.charCodeAt(_index));

	193 if (digit == -1) {

	194 if (hexLength === null) {

	195 return result;

	196 } else {

	197 return -1;

	198 }

	199 }

	200 _hexDigit(_text.charCodeAt(_index));

	201 // Multiply by 16 rather than shift by 4 since that will result in a

	202 // correct value for numbers that exceed the 32 bit precision of JS

	203 // 'integers'.

	204 // TODO: Figure out a better solution to integer truncation. Issue 638.

	205 result = (result * 16) + digit;

	206 _index++;

	207 }

	208

	209 return result;

	210 }

	211

	212 Token finishNumber() {

	213 eatDigits();

	214

	215 if (_peekChar() == 46/./) {

	216 // Handle the case of 1.toString().

	217 _nextChar();

	218 if (isDigit(_peekChar())) {

	219 eatDigits();

	220 return finishNumberExtra(TokenKind.DOUBLE);

	221 } else {

	222 _index--;

	223 }

	224 }

	225

	226 return finishNumberExtra(TokenKind.INTEGER);

	227 }

	228

	229 Token finishNumberExtra(int kind) {

	230 if (_maybeEatChar(101/e/) \|\| _maybeEatChar(69/E/)) {

	231 kind = TokenKind.DOUBLE;

	232 _maybeEatChar(45/-/);

	233 _maybeEatChar(43/+/);

	234 eatDigits();

	235 }

	236 if (_peekChar() != 0 && isIdentifierStart(_peekChar())) {

	237 _nextChar();

	238 return _errorToken("illegal character in number");

	239 }

	240

	241 return _finishToken(kind);

	242 }

	243

	244 Token _makeStringToken(List<int> buf, bool isPart) {

	245 final s = new String.fromCharCodes(buf);

	246 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;

	247 return new LiteralToken(kind, _source, _startIndex, _index, s);

	248 }

	249

	250 Token _makeRawStringToken(bool isMultiline) {

	251 String s;

	252 if (isMultiline) {

	253 // Skip initial newline in multiline strings

	254 int start = _startIndex + 4;

	255 if (_source.text[start] == '\n') start++;

	256 s = _source.text.substring(start, _index - 3);

	257 } else {

	258 s = _source.text.substring(_startIndex + 2, _index - 1);

	259 }

	260 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s);

	261 }

	262

	263 Token finishMultilineString(int quote) {

	264 var buf = <int>[];

	265 while (true) {

	266 int ch = _nextChar();

	267 if (ch == 0) {

	268 return _errorToken();

	269 } else if (ch == quote) {

	270 if (_maybeEatChar(quote)) {

	271 if (_maybeEatChar(quote)) {

	272 return _makeStringToken(buf, false);

	273 }

	274 buf.add(quote);

	275 }

	276 buf.add(quote);

	277 } else if (ch == 36/$/) {

	278 // start of string interp

	279 _interpStack = InterpStack.push(_interpStack, quote, true);

	280 return _makeStringToken(buf, true);

	281 } else if (ch == 92/\/) {

	282 var escapeVal = readEscapeSequence();

	283 if (escapeVal == -1) {

	284 return _errorToken("invalid hex escape sequence");

	285 } else {

	286 buf.add(escapeVal);

	287 }

	288 } else {

	289 buf.add(ch);

	290 }

	291 }

	292 }

	293

	294 Token _finishOpenBrace() {

	295 if (_interpStack != null) {

	296 if (_interpStack.depth == -1) {

	297 _interpStack.depth = 1;

	298 } else {

	299 assert(_interpStack.depth >= 0);

	300 _interpStack.depth += 1;

	301 }

	302 }

	303 return _finishToken(TokenKind.LBRACE);

	304 }

	305

	306 Token _finishCloseBrace() {

	307 if (_interpStack != null) {

	308 _interpStack.depth -= 1;

	309 assert(_interpStack.depth >= 0);

	310 }

	311 return _finishToken(TokenKind.RBRACE);

	312 }

	313

	314 Token finishString(int quote) {

	315 if (_maybeEatChar(quote)) {

	316 if (_maybeEatChar(quote)) {

	317 // skip an initial newline

	318 _maybeEatChar(10/'\n'/);

	319 return finishMultilineString(quote);

	320 } else {

	321 return _makeStringToken(new List<int>(), false);

	322 }

	323 }

	324 return finishStringBody(quote);

	325 }

	326

	327 Token finishRawString(int quote) {

	328 if (_maybeEatChar(quote)) {

	329 if (_maybeEatChar(quote)) {

	330 return finishMultilineRawString(quote);

	331 } else {

	332 return _makeStringToken(<int>[], false);

	333 }

	334 }

	335 while (true) {

	336 int ch = _nextChar();

	337 if (ch == quote) {

	338 return _makeRawStringToken(false);

	339 } else if (ch == 0) {

	340 return _errorToken();

	341 }

	342 }

	343 }

	344

	345 Token finishMultilineRawString(int quote) {

	346 while (true) {

	347 int ch = _nextChar();

	348 if (ch == 0) {

	349 return _errorToken();

	350 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {

	351 return _makeRawStringToken(true);

	352 }

	353 }

	354 }

	355

	356 Token finishStringBody(int quote) {

	357 var buf = new List<int>();

	358 while (true) {

	359 int ch = _nextChar();

	360 if (ch == quote) {

	361 return _makeStringToken(buf, false);

	362 } else if (ch == 36/$/) {

	363 // start of string interp

	364 _interpStack = InterpStack.push(_interpStack, quote, false);

	365 return _makeStringToken(buf, true);

	366 } else if (ch == 0) {

	367 return _errorToken();

	368 } else if (ch == 92/\/) {

	369 var escapeVal = readEscapeSequence();

	370 if (escapeVal == -1) {

	371 return _errorToken("invalid hex escape sequence");

	372 } else {

	373 buf.add(escapeVal);

	374 }

	375 } else {

	376 buf.add(ch);

	377 }

	378 }

	379 }

	380

	381 int readEscapeSequence() {

	382 final ch = _nextChar();

	383 int hexValue;

	384 switch (ch) {

	385 case 110/n/:

	386 return 0x0a/'\n'/;

	387 case 114/r/:

	388 return 0x0d/'\r'/;

	389 case 102/f/:

	390 return 0x0c/'\f'/;

	391 case 98/b/:

	392 return 0x08/'\b'/;

	393 case 116/t/:

	394 return 0x09/'\t'/;

	395 case 118/v/:

	396 return 0x0b/'\v'/;

	397 case 120/x/:

	398 hexValue = readHex(2);

	399 break;

	400 case 117/u/:

	401 if (_maybeEatChar(123/{/)) {

	402 hexValue = readHex();

	403 if (!_maybeEatChar(125/}/)) {

	404 return -1;

	405 } else {

	406 break;

	407 }

	408 } else {

	409 hexValue = readHex(4);

	410 break;

	411 }

	412 default: return ch;

	413 }

	414

	415 if (hexValue == -1) return -1;

	416

	417 // According to the Unicode standard the high and low surrogate halves

	418 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF

	419 // are not legal Unicode values.

	420 if (hexValue < 0xD800 \|\| hexValue > 0xDFFF && hexValue <= 0xFFFF) {

	421 return hexValue;

	422 } else if (hexValue <= 0x10FFFF){

	423 world.fatal('unicode values greater than 2 bytes not implemented yet');

	424 return -1;

	425 } else {

	426 return -1;

	427 }

	428 }

	429

	430 Token finishDot() {

	431 if (isDigit(_peekChar())) {

	432 eatDigits();

	433 return finishNumberExtra(TokenKind.DOUBLE);

	434 } else {

	435 return _finishToken(TokenKind.DOT);

	436 }

	437 }

	438

	439 Token finishIdentifier() {

	440 if (_interpStack != null && _interpStack.depth == -1) {

	441 _interpStack.depth = 0;

	442 while (_index < _text.length) {

	443 if (!isInterpIdentifierPart(_text.charCodeAt(_index++))) {

	444 _index--;

	445 break;

	446 }

	447 }

	448 } else {

	449 while (_index < _text.length) {

	450 if (!isIdentifierPart(_text.charCodeAt(_index++))) {

	451 _index--;

	452 break;

	453 }

	454 }

	455 }

	456 int kind = getIdentifierKind();

	457 if (kind == TokenKind.IDENTIFIER) {

	458 return _finishToken(TokenKind.IDENTIFIER);

	459 } else {

	460 return _finishToken(kind);

	461 }

	462 }

	463 }

	464

OLD	NEW

« no previous file with comments | « utils/template/tokenizer.dart ('k') | utils/template/tokenkind.dart » ('j') | no next file with comments »