Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(151)

Side by Side Diff: frog/tokenizer.dart

Issue 10548047: Remove frog from the repository. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Move test and update apidoc.gyp. Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « frog/token_kind.g.dart ('k') | frog/tokenizer.g.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4 // Generated by scripts/tokenizer_gen.py.
5
6
7 interface TokenSource {
8 Token next();
9 }
10
11 class InterpStack {
12 InterpStack next, previous;
13 final int quote;
14 final bool isMultiline;
15 int depth;
16
17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1;
18
19 InterpStack pop() {
20 return this.previous;
21 }
22
23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) {
24 var newStack = new InterpStack(stack, quote, isMultiline);
25 if (stack != null) newStack.previous = stack;
26 return newStack;
27 }
28 }
29
30 /**
31 * The base class for our tokenizer. The hand coded parts are in this file, with
32 * the generated parts in the subclass Tokenizer.
33 */
34 class TokenizerBase extends TokenizerHelpers implements TokenSource {
35 final SourceFile _source;
36 final bool _skipWhitespace;
37 String _text;
38
39 int _index;
40 int _startIndex;
41
42 /** Keeps track of string interpolation state. */
43 InterpStack _interpStack;
44
45 TokenizerBase(this._source, this._skipWhitespace, [index = 0])
46 : this._index = index {
47 _text = _source.text;
48 }
49
50 abstract Token next();
51 abstract int getIdentifierKind();
52
53 int _nextChar() {
54 if (_index < _text.length) {
55 return _text.charCodeAt(_index++);
56 } else {
57 return 0;
58 }
59 }
60
61 int _peekChar() {
62 if (_index < _text.length) {
63 return _text.charCodeAt(_index);
64 } else {
65 return 0;
66 }
67 }
68
69 bool _maybeEatChar(int ch) {
70 if (_index < _text.length) {
71 if (_text.charCodeAt(_index) == ch) {
72 _index++;
73 return true;
74 } else {
75 return false;
76 }
77 } else {
78 return false;
79 }
80 }
81
82 String _tokenText() {
83 if (_index < _text.length) {
84 return _text.substring(_startIndex, _index);
85 } else {
86 return _text.substring(_startIndex, _text.length);
87 }
88 }
89
90 Token _finishToken(int kind) {
91 return new Token(kind, _source, _startIndex, _index);
92 }
93
94 Token _errorToken([String message = null]) {
95 return new ErrorToken(
96 TokenKind.ERROR, _source, _startIndex, _index, message);
97 }
98
99 Token finishWhitespace() {
100 _index--;
101 while (_index < _text.length) {
102 final ch = _text.charCodeAt(_index++);
103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) {
104 // do nothing
105 } else if (ch == 10/*'\n'*/) {
106 if (!_skipWhitespace) {
107 return _finishToken(TokenKind.WHITESPACE); // note the newline?
108 }
109 } else {
110 _index--;
111 if (_skipWhitespace) {
112 return next();
113 } else {
114 return _finishToken(TokenKind.WHITESPACE);
115 }
116 }
117
118 }
119 return _finishToken(TokenKind.END_OF_FILE);
120 }
121
122 Token finishHashBang() {
123 while (true) {
124 int ch = _nextChar();
125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) {
126 return _finishToken(TokenKind.HASHBANG);
127 }
128 }
129 }
130
131 Token finishSingleLineComment() {
132 while (true) {
133 int ch = _nextChar();
134 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) {
135 if (_skipWhitespace) {
136 return next();
137 } else {
138 return _finishToken(TokenKind.COMMENT);
139 }
140 }
141 }
142 }
143
144 Token finishMultiLineComment() {
145 int nesting = 1;
146 do {
147 int ch = _nextChar();
148 if (ch == 0) {
149 return _errorToken();
150 } else if (ch == 42/*'*'*/) {
151 if (_maybeEatChar(47/*'/'*/)) {
152 nesting--;
153 }
154 } else if (ch == 47/*'/'*/) {
155 if (_maybeEatChar(42/*'*'*/)) {
156 nesting++;
157 }
158 }
159 } while (nesting > 0);
160
161 if (_skipWhitespace) {
162 return next();
163 } else {
164 return _finishToken(TokenKind.COMMENT);
165 }
166 }
167
168 void eatDigits() {
169 while (_index < _text.length) {
170 if (TokenizerHelpers.isDigit(_text.charCodeAt(_index))) {
171 _index++;
172 } else {
173 return;
174 }
175 }
176 }
177
178 static int _hexDigit(int c) {
179 if(c >= 48/*0*/ && c <= 57/*9*/) {
180 return c - 48;
181 } else if (c >= 97/*a*/ && c <= 102/*f*/) {
182 return c - 87;
183 } else if (c >= 65/*A*/ && c <= 70/*F*/) {
184 return c - 55;
185 } else {
186 return -1;
187 }
188 }
189
190 int readHex([int hexLength]) {
191 int maxIndex;
192 if (hexLength === null) {
193 maxIndex = _text.length - 1;
194 } else {
195 // TODO(jimhug): What if this is too long?
196 maxIndex = _index + hexLength;
197 if (maxIndex >= _text.length) return -1;
198 }
199 var result = 0;
200 while (_index < maxIndex) {
201 final digit = _hexDigit(_text.charCodeAt(_index));
202 if (digit == -1) {
203 if (hexLength === null) {
204 return result;
205 } else {
206 return -1;
207 }
208 }
209 _hexDigit(_text.charCodeAt(_index));
210 // Multiply by 16 rather than shift by 4 since that will result in a
211 // correct value for numbers that exceed the 32 bit precision of JS
212 // 'integers'.
213 // TODO: Figure out a better solution to integer truncation. Issue 638.
214 result = (result * 16) + digit;
215 _index++;
216 }
217
218 return result;
219 }
220
221 Token finishHex() {
222 final value = readHex();
223 return new LiteralToken(TokenKind.HEX_INTEGER, _source, _startIndex,
224 _index, value);
225 }
226
227 Token finishNumber() {
228 eatDigits();
229
230 if (_peekChar() == 46/*.*/) {
231 // Handle the case of 1.toString().
232 _nextChar();
233 if (TokenizerHelpers.isDigit(_peekChar())) {
234 eatDigits();
235 return finishNumberExtra(TokenKind.DOUBLE);
236 } else {
237 _index--;
238 }
239 }
240
241 return finishNumberExtra(TokenKind.INTEGER);
242 }
243
244 Token finishNumberExtra(int kind) {
245 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) {
246 kind = TokenKind.DOUBLE;
247 _maybeEatChar(45/*-*/);
248 _maybeEatChar(43/*+*/);
249 eatDigits();
250 }
251 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
252 _nextChar();
253 return _errorToken("illegal character in number");
254 }
255
256 return _finishToken(kind);
257 }
258
259 Token _makeStringToken(List<int> buf, bool isPart) {
260 final s = new String.fromCharCodes(buf);
261 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
262 return new LiteralToken(kind, _source, _startIndex, _index, s);
263 }
264
265 Token _makeRawStringToken(bool isMultiline) {
266 String s;
267 if (isMultiline) {
268 // Skip initial newline in multiline strings
269 int start = _startIndex + 4;
270 if (_source.text[start] == '\n') start++;
271 s = _source.text.substring(start, _index - 3);
272 } else {
273 s = _source.text.substring(_startIndex + 2, _index - 1);
274 }
275 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s);
276 }
277
278 Token finishMultilineString(int quote) {
279 var buf = <int>[];
280 while (true) {
281 int ch = _nextChar();
282 if (ch == 0) {
283 return _errorToken();
284 } else if (ch == quote) {
285 if (_maybeEatChar(quote)) {
286 if (_maybeEatChar(quote)) {
287 return _makeStringToken(buf, false);
288 }
289 buf.add(quote);
290 }
291 buf.add(quote);
292 } else if (ch == 36/*$*/) {
293 // start of string interp
294 _interpStack = InterpStack.push(_interpStack, quote, true);
295 return _makeStringToken(buf, true);
296 } else if (ch == 92/*\*/) {
297 var escapeVal = readEscapeSequence();
298 if (escapeVal == -1) {
299 return _errorToken("invalid hex escape sequence");
300 } else {
301 buf.add(escapeVal);
302 }
303 } else {
304 buf.add(ch);
305 }
306 }
307 }
308
309 Token _finishOpenBrace() {
310 if (_interpStack != null) {
311 if (_interpStack.depth == -1) {
312 _interpStack.depth = 1;
313 } else {
314 assert(_interpStack.depth >= 0);
315 _interpStack.depth += 1;
316 }
317 }
318 return _finishToken(TokenKind.LBRACE);
319 }
320
321 Token _finishCloseBrace() {
322 if (_interpStack != null) {
323 _interpStack.depth -= 1;
324 assert(_interpStack.depth >= 0);
325 }
326 return _finishToken(TokenKind.RBRACE);
327 }
328
329 Token finishString(int quote) {
330 if (_maybeEatChar(quote)) {
331 if (_maybeEatChar(quote)) {
332 // skip an initial newline
333 _maybeEatChar(10/*'\n'*/);
334 return finishMultilineString(quote);
335 } else {
336 return _makeStringToken(new List<int>(), false);
337 }
338 }
339 return finishStringBody(quote);
340 }
341
342 Token finishRawString(int quote) {
343 if (_maybeEatChar(quote)) {
344 if (_maybeEatChar(quote)) {
345 return finishMultilineRawString(quote);
346 } else {
347 return _makeStringToken(<int>[], false);
348 }
349 }
350 while (true) {
351 int ch = _nextChar();
352 if (ch == quote) {
353 return _makeRawStringToken(false);
354 } else if (ch == 0) {
355 return _errorToken();
356 }
357 }
358 }
359
360 Token finishMultilineRawString(int quote) {
361 while (true) {
362 int ch = _nextChar();
363 if (ch == 0) {
364 return _errorToken();
365 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
366 return _makeRawStringToken(true);
367 }
368 }
369 }
370
371 Token finishStringBody(int quote) {
372 var buf = new List<int>();
373 while (true) {
374 int ch = _nextChar();
375 if (ch == quote) {
376 return _makeStringToken(buf, false);
377 } else if (ch == 36/*$*/) {
378 // start of string interp
379 _interpStack = InterpStack.push(_interpStack, quote, false);
380 return _makeStringToken(buf, true);
381 } else if (ch == 0) {
382 return _errorToken();
383 } else if (ch == 92/*\*/) {
384 var escapeVal = readEscapeSequence();
385 if (escapeVal == -1) {
386 return _errorToken("invalid hex escape sequence");
387 } else {
388 buf.add(escapeVal);
389 }
390 } else {
391 buf.add(ch);
392 }
393 }
394 }
395
396 int readEscapeSequence() {
397 final ch = _nextChar();
398 int hexValue;
399 switch (ch) {
400 case 110/*n*/:
401 return 0x0a/*'\n'*/;
402 case 114/*r*/:
403 return 0x0d/*'\r'*/;
404 case 102/*f*/:
405 return 0x0c/*'\f'*/;
406 case 98/*b*/:
407 return 0x08/*'\b'*/;
408 case 116/*t*/:
409 return 0x09/*'\t'*/;
410 case 118/*v*/:
411 return 0x0b/*'\v'*/;
412 case 120/*x*/:
413 hexValue = readHex(2);
414 break;
415 case 117/*u*/:
416 if (_maybeEatChar(123/*{*/)) {
417 hexValue = readHex();
418 if (!_maybeEatChar(125/*}*/)) {
419 return -1;
420 } else {
421 break;
422 }
423 } else {
424 hexValue = readHex(4);
425 break;
426 }
427 default: return ch;
428 }
429
430 if (hexValue == -1) return -1;
431
432 // According to the Unicode standard the high and low surrogate halves
433 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
434 // are not legal Unicode values.
435 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
436 return hexValue;
437 } else if (hexValue <= 0x10FFFF){
438 world.fatal('unicode values greater than 2 bytes not implemented yet');
439 return -1;
440 } else {
441 return -1;
442 }
443 }
444
445 Token finishDot() {
446 if (TokenizerHelpers.isDigit(_peekChar())) {
447 eatDigits();
448 return finishNumberExtra(TokenKind.DOUBLE);
449 } else {
450 return _finishToken(TokenKind.DOT);
451 }
452 }
453
454 Token finishIdentifier(int ch) {
455 if (_interpStack != null && _interpStack.depth == -1) {
456 _interpStack.depth = 0;
457 if (ch == 36/*$*/) {
458 return _errorToken(
459 @"illegal character after $ in string interpolation");
460 }
461 while (_index < _text.length) {
462 if (!TokenizerHelpers.isInterpIdentifierPart(_text.charCodeAt(_index++)) ) {
463 _index--;
464 break;
465 }
466 }
467 } else {
468 while (_index < _text.length) {
469 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) {
470 _index--;
471 break;
472 }
473 }
474 }
475 int kind = getIdentifierKind();
476 if (kind == TokenKind.IDENTIFIER) {
477 return _finishToken(TokenKind.IDENTIFIER);
478 } else {
479 return _finishToken(kind);
480 }
481 }
482 }
OLDNEW
« no previous file with comments | « frog/token_kind.g.dart ('k') | frog/tokenizer.g.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698