frog/tokenizer.dart - Issue 10548047: Remove frog from the repository.

Side by Side Diff: frog/tokenizer.dart

Issue 10548047: Remove frog from the repository. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Move test and update apidoc.gyp. Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4 // Generated by scripts/tokenizer_gen.py.

5

6

7 interface TokenSource {

8 Token next();

9 }

10

11 class InterpStack {

12 InterpStack next, previous;

13 final int quote;

14 final bool isMultiline;

15 int depth;

16

17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1;

18

19 InterpStack pop() {

20 return this.previous;

21 }

22

23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) {

24 var newStack = new InterpStack(stack, quote, isMultiline);

25 if (stack != null) newStack.previous = stack;

26 return newStack;

27 }

28 }

29

30 /**

31 * The base class for our tokenizer. The hand coded parts are in this file, with

32 * the generated parts in the subclass Tokenizer.

33 */

34 class TokenizerBase extends TokenizerHelpers implements TokenSource {

35 final SourceFile _source;

36 final bool _skipWhitespace;

37 String _text;

38

39 int _index;

40 int _startIndex;

41

42 /** Keeps track of string interpolation state. */

43 InterpStack _interpStack;

44

45 TokenizerBase(this._source, this._skipWhitespace, [index = 0])

46 : this._index = index {

47 _text = _source.text;

48 }

49

50 abstract Token next();

51 abstract int getIdentifierKind();

52

53 int _nextChar() {

54 if (_index < _text.length) {

55 return _text.charCodeAt(_index++);

56 } else {

57 return 0;

58 }

59 }

60

61 int _peekChar() {

62 if (_index < _text.length) {

63 return _text.charCodeAt(_index);

64 } else {

65 return 0;

66 }

67 }

68

69 bool _maybeEatChar(int ch) {

70 if (_index < _text.length) {

71 if (_text.charCodeAt(_index) == ch) {

72 _index++;

73 return true;

74 } else {

75 return false;

76 }

77 } else {

78 return false;

79 }

80 }

81

82 String _tokenText() {

83 if (_index < _text.length) {

84 return _text.substring(_startIndex, _index);

85 } else {

86 return _text.substring(_startIndex, _text.length);

87 }

88 }

89

90 Token _finishToken(int kind) {

91 return new Token(kind, _source, _startIndex, _index);

92 }

93

94 Token _errorToken([String message = null]) {

95 return new ErrorToken(

96 TokenKind.ERROR, _source, _startIndex, _index, message);

97 }

98

99 Token finishWhitespace() {

100 _index--;

101 while (_index < _text.length) {

102 final ch = _text.charCodeAt(_index++);

103 if (ch == 32/' '/ \|\| ch == 9/'\t'/ \|\| ch == 13/'\r'/) {

104 // do nothing

105 } else if (ch == 10/'\n'/) {

106 if (!_skipWhitespace) {

107 return _finishToken(TokenKind.WHITESPACE); // note the newline?

108 }

109 } else {

110 _index--;

111 if (_skipWhitespace) {

112 return next();

113 } else {

114 return _finishToken(TokenKind.WHITESPACE);

115 }

116 }

117

118 }

119 return _finishToken(TokenKind.END_OF_FILE);

120 }

121

122 Token finishHashBang() {

123 while (true) {

124 int ch = _nextChar();

125 if (ch == 0 \|\| ch == 10/'\n'/ \|\| ch == 13/'\r'/) {

126 return _finishToken(TokenKind.HASHBANG);

127 }

128 }

129 }

130

131 Token finishSingleLineComment() {

132 while (true) {

133 int ch = _nextChar();

134 if (ch == 0 \|\| ch == 10/'\n'/ \|\| ch == 13/'\r'/) {

135 if (_skipWhitespace) {

136 return next();

137 } else {

138 return _finishToken(TokenKind.COMMENT);

139 }

140 }

141 }

142 }

143

144 Token finishMultiLineComment() {

145 int nesting = 1;

146 do {

147 int ch = _nextChar();

148 if (ch == 0) {

149 return _errorToken();

150 } else if (ch == 42/''*/) {

151 if (_maybeEatChar(47/'/'/)) {

152 nesting--;

153 }

154 } else if (ch == 47/'/'/) {

155 if (_maybeEatChar(42/''*/)) {

156 nesting++;

157 }

158 }

159 } while (nesting > 0);

160

161 if (_skipWhitespace) {

162 return next();

163 } else {

164 return _finishToken(TokenKind.COMMENT);

165 }

166 }

167

168 void eatDigits() {

169 while (_index < _text.length) {

170 if (TokenizerHelpers.isDigit(_text.charCodeAt(_index))) {

171 _index++;

172 } else {

173 return;

174 }

175 }

176 }

177

178 static int _hexDigit(int c) {

179 if(c >= 48/0/ && c <= 57/9/) {

180 return c - 48;

181 } else if (c >= 97/a/ && c <= 102/f/) {

182 return c - 87;

183 } else if (c >= 65/A/ && c <= 70/F/) {

184 return c - 55;

185 } else {

186 return -1;

187 }

188 }

189

190 int readHex([int hexLength]) {

191 int maxIndex;

192 if (hexLength === null) {

193 maxIndex = _text.length - 1;

194 } else {

195 // TODO(jimhug): What if this is too long?

196 maxIndex = _index + hexLength;

197 if (maxIndex >= _text.length) return -1;

198 }

199 var result = 0;

200 while (_index < maxIndex) {

201 final digit = _hexDigit(_text.charCodeAt(_index));

202 if (digit == -1) {

203 if (hexLength === null) {

204 return result;

205 } else {

206 return -1;

207 }

208 }

209 _hexDigit(_text.charCodeAt(_index));

210 // Multiply by 16 rather than shift by 4 since that will result in a

211 // correct value for numbers that exceed the 32 bit precision of JS

212 // 'integers'.

213 // TODO: Figure out a better solution to integer truncation. Issue 638.

214 result = (result * 16) + digit;

215 _index++;

216 }

217

218 return result;

219 }

220

221 Token finishHex() {

222 final value = readHex();

223 return new LiteralToken(TokenKind.HEX_INTEGER, _source, _startIndex,

224 _index, value);

225 }

226

227 Token finishNumber() {

228 eatDigits();

229

230 if (_peekChar() == 46/./) {

231 // Handle the case of 1.toString().

232 _nextChar();

233 if (TokenizerHelpers.isDigit(_peekChar())) {

234 eatDigits();

235 return finishNumberExtra(TokenKind.DOUBLE);

236 } else {

237 _index--;

238 }

239 }

240

241 return finishNumberExtra(TokenKind.INTEGER);

242 }

243

244 Token finishNumberExtra(int kind) {

245 if (_maybeEatChar(101/e/) \|\| _maybeEatChar(69/E/)) {

246 kind = TokenKind.DOUBLE;

247 _maybeEatChar(45/-/);

248 _maybeEatChar(43/+/);

249 eatDigits();

250 }

251 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {

252 _nextChar();

253 return _errorToken("illegal character in number");

254 }

255

256 return _finishToken(kind);

257 }

258

259 Token _makeStringToken(List<int> buf, bool isPart) {

260 final s = new String.fromCharCodes(buf);

261 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;

262 return new LiteralToken(kind, _source, _startIndex, _index, s);

263 }

264

265 Token _makeRawStringToken(bool isMultiline) {

266 String s;

267 if (isMultiline) {

268 // Skip initial newline in multiline strings

269 int start = _startIndex + 4;

270 if (_source.text[start] == '\n') start++;

271 s = _source.text.substring(start, _index - 3);

272 } else {

273 s = _source.text.substring(_startIndex + 2, _index - 1);

274 }

275 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s);

276 }

277

278 Token finishMultilineString(int quote) {

279 var buf = <int>[];

280 while (true) {

281 int ch = _nextChar();

282 if (ch == 0) {

283 return _errorToken();

284 } else if (ch == quote) {

285 if (_maybeEatChar(quote)) {

286 if (_maybeEatChar(quote)) {

287 return _makeStringToken(buf, false);

288 }

289 buf.add(quote);

290 }

291 buf.add(quote);

292 } else if (ch == 36/$/) {

293 // start of string interp

294 _interpStack = InterpStack.push(_interpStack, quote, true);

295 return _makeStringToken(buf, true);

296 } else if (ch == 92/\/) {

297 var escapeVal = readEscapeSequence();

298 if (escapeVal == -1) {

299 return _errorToken("invalid hex escape sequence");

300 } else {

301 buf.add(escapeVal);

302 }

303 } else {

304 buf.add(ch);

305 }

306 }

307 }

308

309 Token _finishOpenBrace() {

310 if (_interpStack != null) {

311 if (_interpStack.depth == -1) {

312 _interpStack.depth = 1;

313 } else {

314 assert(_interpStack.depth >= 0);

315 _interpStack.depth += 1;

316 }

317 }

318 return _finishToken(TokenKind.LBRACE);

319 }

320

321 Token _finishCloseBrace() {

322 if (_interpStack != null) {

323 _interpStack.depth -= 1;

324 assert(_interpStack.depth >= 0);

325 }

326 return _finishToken(TokenKind.RBRACE);

327 }

328

329 Token finishString(int quote) {

330 if (_maybeEatChar(quote)) {

331 if (_maybeEatChar(quote)) {

332 // skip an initial newline

333 _maybeEatChar(10/'\n'/);

334 return finishMultilineString(quote);

335 } else {

336 return _makeStringToken(new List<int>(), false);

337 }

338 }

339 return finishStringBody(quote);

340 }

341

342 Token finishRawString(int quote) {

343 if (_maybeEatChar(quote)) {

344 if (_maybeEatChar(quote)) {

345 return finishMultilineRawString(quote);

346 } else {

347 return _makeStringToken(<int>[], false);

348 }

349 }

350 while (true) {

351 int ch = _nextChar();

352 if (ch == quote) {

353 return _makeRawStringToken(false);

354 } else if (ch == 0) {

355 return _errorToken();

356 }

357 }

358 }

359

360 Token finishMultilineRawString(int quote) {

361 while (true) {

362 int ch = _nextChar();

363 if (ch == 0) {

364 return _errorToken();

365 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {

366 return _makeRawStringToken(true);

367 }

368 }

369 }

370

371 Token finishStringBody(int quote) {

372 var buf = new List<int>();

373 while (true) {

374 int ch = _nextChar();

375 if (ch == quote) {

376 return _makeStringToken(buf, false);

377 } else if (ch == 36/$/) {

378 // start of string interp

379 _interpStack = InterpStack.push(_interpStack, quote, false);

380 return _makeStringToken(buf, true);

381 } else if (ch == 0) {

382 return _errorToken();

383 } else if (ch == 92/\/) {

384 var escapeVal = readEscapeSequence();

385 if (escapeVal == -1) {

386 return _errorToken("invalid hex escape sequence");

387 } else {

388 buf.add(escapeVal);

389 }

390 } else {

391 buf.add(ch);

392 }

393 }

394 }

395

396 int readEscapeSequence() {

397 final ch = _nextChar();

398 int hexValue;

399 switch (ch) {

400 case 110/n/:

401 return 0x0a/'\n'/;

402 case 114/r/:

403 return 0x0d/'\r'/;

404 case 102/f/:

405 return 0x0c/'\f'/;

406 case 98/b/:

407 return 0x08/'\b'/;

408 case 116/t/:

409 return 0x09/'\t'/;

410 case 118/v/:

411 return 0x0b/'\v'/;

412 case 120/x/:

413 hexValue = readHex(2);

414 break;

415 case 117/u/:

416 if (_maybeEatChar(123/{/)) {

417 hexValue = readHex();

418 if (!_maybeEatChar(125/}/)) {

419 return -1;

420 } else {

421 break;

422 }

423 } else {

424 hexValue = readHex(4);

425 break;

426 }

427 default: return ch;

428 }

429

430 if (hexValue == -1) return -1;

431

432 // According to the Unicode standard the high and low surrogate halves

433 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF

434 // are not legal Unicode values.

435 if (hexValue < 0xD800 \|\| hexValue > 0xDFFF && hexValue <= 0xFFFF) {

436 return hexValue;

437 } else if (hexValue <= 0x10FFFF){

438 world.fatal('unicode values greater than 2 bytes not implemented yet');

439 return -1;

440 } else {

441 return -1;

442 }

443 }

444

445 Token finishDot() {

446 if (TokenizerHelpers.isDigit(_peekChar())) {

447 eatDigits();

448 return finishNumberExtra(TokenKind.DOUBLE);

449 } else {

450 return _finishToken(TokenKind.DOT);

451 }

452 }

453

454 Token finishIdentifier(int ch) {

455 if (_interpStack != null && _interpStack.depth == -1) {

456 _interpStack.depth = 0;

457 if (ch == 36/$/) {

458 return _errorToken(

459 @"illegal character after $ in string interpolation");

460 }

461 while (_index < _text.length) {

462 if (!TokenizerHelpers.isInterpIdentifierPart(_text.charCodeAt(_index++)) ) {

463 _index--;

464 break;

465 }

466 }

467 } else {

468 while (_index < _text.length) {

469 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) {

470 _index--;

471 break;

472 }

473 }

474 }

475 int kind = getIdentifierKind();

476 if (kind == TokenKind.IDENTIFIER) {

477 return _finishToken(TokenKind.IDENTIFIER);

478 } else {

479 return _finishToken(kind);

480 }

481 }

482 }

OLD	NEW

« no previous file with comments | « frog/token_kind.g.dart ('k') | frog/tokenizer.g.dart » ('j') | no next file with comments »