Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: utils/template/tokenizer_base.dart

Issue 9695048: Template parser (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Siggi's comments Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « utils/template/tokenizer.dart ('k') | utils/template/tokenkind.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4 // Generated by scripts/tokenizer_gen.py.
5
6
7 interface TokenSource {
8 Token next();
9 }
10
11 class InterpStack {
12 InterpStack next, previous;
13 final int quote;
14 final bool isMultiline;
15 int depth;
16
17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1;
18
19 InterpStack pop() {
20 return this.previous;
21 }
22
23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) {
24 var newStack = new InterpStack(stack, quote, isMultiline);
25 if (stack != null) newStack.previous = stack;
26 return newStack;
27 }
28 }
29
30 /**
31 * The base class for our tokenizer. The hand coded parts are in this file, with
32 * the generated parts in the subclass Tokenizer.
33 */
34 class TokenizerBase extends TokenizerHelpers implements TokenSource {
35 final SourceFile _source;
36 final bool _skipWhitespace;
37 String _text;
38
39 int _index;
40 int _startIndex;
41
42 /** Keeps track of string interpolation state. */
43 InterpStack _interpStack;
44
45 TokenizerBase(this._source, this._skipWhitespace, [index = 0])
46 : this._index = index {
47 _text = _source.text;
48 }
49
50 abstract Token next();
51 abstract int getIdentifierKind();
52
53 int _nextChar() {
54 if (_index < _text.length) {
55 return _text.charCodeAt(_index++);
56 } else {
57 return 0;
58 }
59 }
60
61 int _peekChar() {
62 if (_index < _text.length) {
63 return _text.charCodeAt(_index);
64 } else {
65 return 0;
66 }
67 }
68
69 bool _maybeEatChar(int ch) {
70 if (_index < _text.length) {
71 if (_text.charCodeAt(_index) == ch) {
72 _index++;
73 return true;
74 } else {
75 return false;
76 }
77 } else {
78 return false;
79 }
80 }
81
82 String _tokenText() {
83 if (_index < _text.length) {
84 return _text.substring(_startIndex, _index);
85 } else {
86 return _text.substring(_startIndex, _text.length);
87 }
88 }
89
90 Token _finishToken(int kind) {
91 return new Token(kind, _source, _startIndex, _index);
92 }
93
94 Token _errorToken([String message = null]) {
95 return new ErrorToken(
96 TokenKind.ERROR, _source, _startIndex, _index, message);
97 }
98
99 Token finishWhitespace() {
100 _index--;
101 while (_index < _text.length) {
102 final ch = _text.charCodeAt(_index++);
103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) {
104 // do nothing
105 } else if (ch == 10/*'\n'*/) {
106 if (!_skipWhitespace) {
107 return _finishToken(TokenKind.WHITESPACE); // note the newline?
108 }
109 } else {
110 _index--;
111 if (_skipWhitespace) {
112 return next();
113 } else {
114 return _finishToken(TokenKind.WHITESPACE);
115 }
116 }
117
118 }
119 return _finishToken(TokenKind.END_OF_FILE);
120 }
121
122 Token finishSingleLineComment() {
123 while (true) {
124 int ch = _nextChar();
125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) {
126 if (_skipWhitespace) {
127 return next();
128 } else {
129 return _finishToken(TokenKind.COMMENT);
130 }
131 }
132 }
133 }
134
135 Token finishMultiLineComment() {
136 int nesting = 1;
137 do {
138 int ch = _nextChar();
139 if (ch == 0) {
140 return _errorToken();
141 } else if (ch == 42/*'*'*/) {
142 if (_maybeEatChar(47/*'/'*/)) {
143 nesting--;
144 }
145 } else if (ch == 47/*'/'*/) {
146 if (_maybeEatChar(42/*'*'*/)) {
147 nesting++;
148 }
149 }
150 } while (nesting > 0);
151
152 if (_skipWhitespace) {
153 return next();
154 } else {
155 return _finishToken(TokenKind.COMMENT);
156 }
157 }
158
159 void eatDigits() {
160 while (_index < _text.length) {
161 if (isDigit(_text.charCodeAt(_index))) {
162 _index++;
163 } else {
164 return;
165 }
166 }
167 }
168
169 static int _hexDigit(int c) {
170 if(c >= 48/*0*/ && c <= 57/*9*/) {
171 return c - 48;
172 } else if (c >= 97/*a*/ && c <= 102/*f*/) {
173 return c - 87;
174 } else if (c >= 65/*A*/ && c <= 70/*F*/) {
175 return c - 55;
176 } else {
177 return -1;
178 }
179 }
180
181 int readHex([int hexLength]) {
182 int maxIndex;
183 if (hexLength === null) {
184 maxIndex = _text.length - 1;
185 } else {
186 // TODO(jimhug): What if this is too long?
187 maxIndex = _index + hexLength;
188 if (maxIndex >= _text.length) return -1;
189 }
190 var result = 0;
191 while (_index < maxIndex) {
192 final digit = _hexDigit(_text.charCodeAt(_index));
193 if (digit == -1) {
194 if (hexLength === null) {
195 return result;
196 } else {
197 return -1;
198 }
199 }
200 _hexDigit(_text.charCodeAt(_index));
201 // Multiply by 16 rather than shift by 4 since that will result in a
202 // correct value for numbers that exceed the 32 bit precision of JS
203 // 'integers'.
204 // TODO: Figure out a better solution to integer truncation. Issue 638.
205 result = (result * 16) + digit;
206 _index++;
207 }
208
209 return result;
210 }
211
212 Token finishNumber() {
213 eatDigits();
214
215 if (_peekChar() == 46/*.*/) {
216 // Handle the case of 1.toString().
217 _nextChar();
218 if (isDigit(_peekChar())) {
219 eatDigits();
220 return finishNumberExtra(TokenKind.DOUBLE);
221 } else {
222 _index--;
223 }
224 }
225
226 return finishNumberExtra(TokenKind.INTEGER);
227 }
228
229 Token finishNumberExtra(int kind) {
230 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) {
231 kind = TokenKind.DOUBLE;
232 _maybeEatChar(45/*-*/);
233 _maybeEatChar(43/*+*/);
234 eatDigits();
235 }
236 if (_peekChar() != 0 && isIdentifierStart(_peekChar())) {
237 _nextChar();
238 return _errorToken("illegal character in number");
239 }
240
241 return _finishToken(kind);
242 }
243
244 Token _makeStringToken(List<int> buf, bool isPart) {
245 final s = new String.fromCharCodes(buf);
246 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
247 return new LiteralToken(kind, _source, _startIndex, _index, s);
248 }
249
250 Token _makeRawStringToken(bool isMultiline) {
251 String s;
252 if (isMultiline) {
253 // Skip initial newline in multiline strings
254 int start = _startIndex + 4;
255 if (_source.text[start] == '\n') start++;
256 s = _source.text.substring(start, _index - 3);
257 } else {
258 s = _source.text.substring(_startIndex + 2, _index - 1);
259 }
260 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s);
261 }
262
263 Token finishMultilineString(int quote) {
264 var buf = <int>[];
265 while (true) {
266 int ch = _nextChar();
267 if (ch == 0) {
268 return _errorToken();
269 } else if (ch == quote) {
270 if (_maybeEatChar(quote)) {
271 if (_maybeEatChar(quote)) {
272 return _makeStringToken(buf, false);
273 }
274 buf.add(quote);
275 }
276 buf.add(quote);
277 } else if (ch == 36/*$*/) {
278 // start of string interp
279 _interpStack = InterpStack.push(_interpStack, quote, true);
280 return _makeStringToken(buf, true);
281 } else if (ch == 92/*\*/) {
282 var escapeVal = readEscapeSequence();
283 if (escapeVal == -1) {
284 return _errorToken("invalid hex escape sequence");
285 } else {
286 buf.add(escapeVal);
287 }
288 } else {
289 buf.add(ch);
290 }
291 }
292 }
293
294 Token _finishOpenBrace() {
295 if (_interpStack != null) {
296 if (_interpStack.depth == -1) {
297 _interpStack.depth = 1;
298 } else {
299 assert(_interpStack.depth >= 0);
300 _interpStack.depth += 1;
301 }
302 }
303 return _finishToken(TokenKind.LBRACE);
304 }
305
306 Token _finishCloseBrace() {
307 if (_interpStack != null) {
308 _interpStack.depth -= 1;
309 assert(_interpStack.depth >= 0);
310 }
311 return _finishToken(TokenKind.RBRACE);
312 }
313
314 Token finishString(int quote) {
315 if (_maybeEatChar(quote)) {
316 if (_maybeEatChar(quote)) {
317 // skip an initial newline
318 _maybeEatChar(10/*'\n'*/);
319 return finishMultilineString(quote);
320 } else {
321 return _makeStringToken(new List<int>(), false);
322 }
323 }
324 return finishStringBody(quote);
325 }
326
327 Token finishRawString(int quote) {
328 if (_maybeEatChar(quote)) {
329 if (_maybeEatChar(quote)) {
330 return finishMultilineRawString(quote);
331 } else {
332 return _makeStringToken(<int>[], false);
333 }
334 }
335 while (true) {
336 int ch = _nextChar();
337 if (ch == quote) {
338 return _makeRawStringToken(false);
339 } else if (ch == 0) {
340 return _errorToken();
341 }
342 }
343 }
344
345 Token finishMultilineRawString(int quote) {
346 while (true) {
347 int ch = _nextChar();
348 if (ch == 0) {
349 return _errorToken();
350 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
351 return _makeRawStringToken(true);
352 }
353 }
354 }
355
356 Token finishStringBody(int quote) {
357 var buf = new List<int>();
358 while (true) {
359 int ch = _nextChar();
360 if (ch == quote) {
361 return _makeStringToken(buf, false);
362 } else if (ch == 36/*$*/) {
363 // start of string interp
364 _interpStack = InterpStack.push(_interpStack, quote, false);
365 return _makeStringToken(buf, true);
366 } else if (ch == 0) {
367 return _errorToken();
368 } else if (ch == 92/*\*/) {
369 var escapeVal = readEscapeSequence();
370 if (escapeVal == -1) {
371 return _errorToken("invalid hex escape sequence");
372 } else {
373 buf.add(escapeVal);
374 }
375 } else {
376 buf.add(ch);
377 }
378 }
379 }
380
381 int readEscapeSequence() {
382 final ch = _nextChar();
383 int hexValue;
384 switch (ch) {
385 case 110/*n*/:
386 return 0x0a/*'\n'*/;
387 case 114/*r*/:
388 return 0x0d/*'\r'*/;
389 case 102/*f*/:
390 return 0x0c/*'\f'*/;
391 case 98/*b*/:
392 return 0x08/*'\b'*/;
393 case 116/*t*/:
394 return 0x09/*'\t'*/;
395 case 118/*v*/:
396 return 0x0b/*'\v'*/;
397 case 120/*x*/:
398 hexValue = readHex(2);
399 break;
400 case 117/*u*/:
401 if (_maybeEatChar(123/*{*/)) {
402 hexValue = readHex();
403 if (!_maybeEatChar(125/*}*/)) {
404 return -1;
405 } else {
406 break;
407 }
408 } else {
409 hexValue = readHex(4);
410 break;
411 }
412 default: return ch;
413 }
414
415 if (hexValue == -1) return -1;
416
417 // According to the Unicode standard the high and low surrogate halves
418 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
419 // are not legal Unicode values.
420 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
421 return hexValue;
422 } else if (hexValue <= 0x10FFFF){
423 world.fatal('unicode values greater than 2 bytes not implemented yet');
424 return -1;
425 } else {
426 return -1;
427 }
428 }
429
430 Token finishDot() {
431 if (isDigit(_peekChar())) {
432 eatDigits();
433 return finishNumberExtra(TokenKind.DOUBLE);
434 } else {
435 return _finishToken(TokenKind.DOT);
436 }
437 }
438
439 Token finishIdentifier() {
440 if (_interpStack != null && _interpStack.depth == -1) {
441 _interpStack.depth = 0;
442 while (_index < _text.length) {
443 if (!isInterpIdentifierPart(_text.charCodeAt(_index++))) {
444 _index--;
445 break;
446 }
447 }
448 } else {
449 while (_index < _text.length) {
450 if (!isIdentifierPart(_text.charCodeAt(_index++))) {
451 _index--;
452 break;
453 }
454 }
455 }
456 int kind = getIdentifierKind();
457 if (kind == TokenKind.IDENTIFIER) {
458 return _finishToken(TokenKind.IDENTIFIER);
459 } else {
460 return _finishToken(kind);
461 }
462 }
463 }
464
OLDNEW
« no previous file with comments | « utils/template/tokenizer.dart ('k') | utils/template/tokenkind.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698