| Index: frog/scripts/tokenizer_gen.py
|
| diff --git a/frog/scripts/tokenizer_gen.py b/frog/scripts/tokenizer_gen.py
|
| deleted file mode 100755
|
| index 882444feaab4d26ecba166af2b6f02970961311a..0000000000000000000000000000000000000000
|
| --- a/frog/scripts/tokenizer_gen.py
|
| +++ /dev/null
|
| @@ -1,298 +0,0 @@
|
| -#!/usr/bin/env python
|
| -# Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
| -# for details. All rights reserved. Use of this source code is governed by a
|
| -# BSD-style license that can be found in the LICENSE file.
|
| -
|
| -'''Generates the Tokenizer class into tokenenizer.g.dart.'''
|
| -
|
| -import re
|
| -from token_info import tokens, keywords
|
| -from codegen import CodeWriter, HEADER
|
| -
|
| -def makeSafe(ch):
|
| - ch_s = ch
|
| - if ch in ' \t\n\r*/': ch_s = repr(ch)
|
| - return '%d/*%s*/' % (ord(ch), ch_s)
|
| -
|
| -
|
| -class Case:
|
| - def __init__(self, ch, token, includeWhitespace=False):
|
| - self.ch = ch
|
| - self.cases = {}
|
| - self.token = None
|
| - self.includeWhitespace = includeWhitespace
|
| - if len(ch) > 0:
|
| - self.cases[ch[0]] = Case(ch[1:], token)
|
| - else:
|
| - self.token = token
|
| -
|
| - def addCase(self, ch, token):
|
| - if len(ch) == 0:
|
| - self.token = token
|
| - else:
|
| - searchChar = ch[0]
|
| - if self.cases.has_key(searchChar):
|
| - self.cases[searchChar].addCase(ch[1:], token)
|
| - else:
|
| - self.cases[searchChar] = Case(ch[1:], token)
|
| -
|
| - def defaultReturn(self):
|
| - if self.token is not None:
|
| - return 'return %s;' % self.token.getFinishCode()
|
| - else:
|
| - return 'return _errorToken();'
|
| -
|
| - def writeCases(self, cw):
|
| - ret = []
|
| - if len(self.cases) == 0:
|
| - cw.writeln(self.defaultReturn())
|
| - elif len(self.cases) < 4 and not self.includeWhitespace:
|
| - optElse = ''
|
| - for key, case in sorted(self.cases.items()):
|
| - cw.enterBlock('%sif (_maybeEatChar(%s)) {' % (optElse, makeSafe(key)))
|
| - case.writeCases(cw)
|
| - cw.exitBlock()
|
| - optElse = '} else '
|
| - cw.enterBlock('} else {')
|
| - cw.writeln(self.defaultReturn())
|
| -
|
| - cw.exitBlock('}')
|
| - else:
|
| - cw.writeln('ch = _nextChar();')
|
| - cw.enterBlock('switch(ch) {')
|
| - if self.includeWhitespace:
|
| - self.writeWhitespace(cw)
|
| - for key, case in sorted(self.cases.items()):
|
| - cw.enterBlock('case %s:' % makeSafe(key))
|
| -
|
| - case.writeCases(cw)
|
| - cw.exitBlock()
|
| - if self.includeWhitespace:
|
| - cw.enterBlock('default:')
|
| - cw.enterBlock('if (TokenizerHelpers.isIdentifierStart(ch)) {')
|
| - cw.writeln('return this.finishIdentifier(ch);')
|
| - cw.exitBlock('} else if (TokenizerHelpers.isDigit(ch)) {')
|
| - cw.enterBlock()
|
| - cw.writeln('return this.finishNumber();')
|
| - cw.exitBlock('} else {')
|
| - cw.enterBlock()
|
| - cw.writeln(self.defaultReturn())
|
| - cw.exitBlock('}')
|
| - else:
|
| - cw.writeln('default: ' + self.defaultReturn())
|
| - cw.exitBlock('}')
|
| -
|
| - def writeWhitespace(self, cw):
|
| - cw.writeln('case 0: return _finishToken(TokenKind.END_OF_FILE);')
|
| - cw.enterBlock(r"case %s: case %s: case %s: case %s:" %
|
| - tuple([makeSafe(ch) for ch in ' \t\n\r']))
|
| - cw.writeln('return finishWhitespace();')
|
| - cw.exitBlock()
|
| -
|
| -def computeCases():
|
| - top = Case('', None, True)
|
| - for tok in tokens:
|
| - #print tok.text
|
| - if tok.text != '':
|
| - top.addCase(tok.text, tok)
|
| - return top
|
| -
|
| -cases = computeCases()
|
| -
|
| -TOKENIZER = '''
|
| -/** A generated file that extends the hand coded methods in TokenizerBase. */
|
| -class Tokenizer extends TokenizerBase {
|
| -
|
| - Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0])
|
| - : super(source, skipWhitespace, index);
|
| -
|
| - Token next() {
|
| - // keep track of our starting position
|
| - _startIndex = _index;
|
| -
|
| - if (_interpStack != null && _interpStack.depth == 0) {
|
| - var istack = _interpStack;
|
| - _interpStack = _interpStack.pop();
|
| - if (istack.isMultiline) {
|
| - return finishMultilineString(istack.quote);
|
| - } else {
|
| - return finishStringBody(istack.quote);
|
| - }
|
| - }
|
| -
|
| - int ch;
|
| -%(cases)s
|
| - }
|
| -
|
| -%(extraMethods)s
|
| -}
|
| -
|
| -/** Static helper methods. */
|
| -class TokenizerHelpers {
|
| -%(helperMethods)s
|
| -}
|
| -'''
|
| -
|
| -
|
| -
|
| -def charAsInt(ch):
|
| - return '%d/*%r*/' % (ord(ch), ch)
|
| -
|
| -class CharTest:
|
| - def __init__(self, fromChar, toChar=None):
|
| - self.fromChar = fromChar
|
| - self.toChar = toChar
|
| -
|
| - def toCode(self):
|
| - if self.toChar is None:
|
| - return 'c == %s' % makeSafe(self.fromChar)
|
| - else:
|
| - return '(c >= %s && c <= %s)' % (
|
| - makeSafe(self.fromChar), makeSafe(self.toChar))
|
| -
|
| -class OrTest:
|
| - def __init__(self, *args):
|
| - self.tests = args
|
| -
|
| - def toCode(self):
|
| - return '(' + ' || '.join([test.toCode() for test in self.tests]) + ')'
|
| -
|
| -class ExplicitTest:
|
| - def __init__(self, text):
|
| - self.text = text
|
| -
|
| - def toCode(self):
|
| - return self.text
|
| -
|
| -
|
| -def writeClass(cw, name, test):
|
| - cw.enterBlock('static bool is%s(int c) {' % name)
|
| - cw.writeln('return %s;' % test.toCode())
|
| - cw.exitBlock('}')
|
| - cw.writeln()
|
| -
|
| -# TODO(jimhug): if (_restMatches(_text, i0+1, 'ase')) would be good!
|
| -class LengthGroup:
|
| - def __init__(self, length):
|
| - self.length = length
|
| - self.kws = []
|
| -
|
| - def add(self, kw):
|
| - self.kws.append(kw)
|
| -
|
| - def writeCode(self, cw):
|
| - cw.enterBlock('case %d:' % self.length)
|
| - self.writeTests(cw, self.kws)
|
| - cw.writeln('return TokenKind.IDENTIFIER;')
|
| - cw.exitBlock()
|
| -
|
| -
|
| - def writeTests(self, cw, kws, index=0):
|
| - if len(kws) == 1:
|
| - kw = kws[0].text
|
| - if index == len(kw):
|
| - cw.writeln('return TokenKind.%s;' % (kws[0].name))
|
| - else:
|
| - clauses = [
|
| - "_text.charCodeAt(%s) == %s" % (
|
| - makeIndex('i0', i), makeSafe(kw[i]))
|
| - for i in range(index, len(kw))]
|
| - test = 'if (%s) return TokenKind.%s;' % (
|
| - ' && '.join(clauses), kws[0].name)
|
| - cw.writeln(test)
|
| - else:
|
| - starts = {}
|
| - for kw in kws:
|
| - c0 = kw.text[index]
|
| - if not starts.has_key(c0):
|
| - starts[c0] = []
|
| - starts[c0].append(kw)
|
| -
|
| - cw.writeln('ch = _text.charCodeAt(%s);' % makeIndex('i0', index))
|
| - prefix = ''
|
| - for key, value in sorted(starts.items()):
|
| - cw.enterBlock('%sif (ch == %s) {' % (prefix, makeSafe(key)))
|
| - #cw.writeln(repr(value))
|
| - self.writeTests(cw, value, index+1)
|
| - cw.exitBlock()
|
| - prefix = '} else '
|
| - cw.writeln('}')
|
| - #cw.writeln(repr(kws))
|
| -
|
| - def __str__(self):
|
| - return '%d: %r' % (self.length, self.kws)
|
| -
|
| -def makeIndex(index, offset):
|
| - if offset == 0:
|
| - return index
|
| - else:
|
| - return '%s+%d' % (index, offset)
|
| -
|
| -def writeHelperMethods(cw):
|
| - cw.enterBlock()
|
| - cw.writeln()
|
| - writeClass(cw, 'IdentifierStart', OrTest(
|
| - CharTest('a', 'z'), CharTest('A', 'Z'), CharTest('_'))) #TODO: CharTest('$')
|
| - writeClass(cw, 'Digit', CharTest('0', '9'))
|
| - writeClass(cw, 'HexDigit', OrTest(
|
| - ExplicitTest('isDigit(c)'), CharTest('a', 'f'), CharTest('A', 'F')))
|
| - writeClass(cw, 'Whitespace', OrTest(
|
| - CharTest(' '), CharTest('\t'), CharTest('\n'), CharTest('\r')))
|
| - writeClass(cw, 'IdentifierPart', OrTest(
|
| - ExplicitTest('isIdentifierStart(c)'),
|
| - ExplicitTest('isDigit(c)'),
|
| - CharTest('$')))
|
| - # This is like IdentifierPart, but without $
|
| - writeClass(cw, 'InterpIdentifierPart', OrTest(
|
| - ExplicitTest('isIdentifierStart(c)'),
|
| - ExplicitTest('isDigit(c)')))
|
| -
|
| -def writeExtraMethods(cw):
|
| - lengths = {}
|
| - for kw in keywords:
|
| - l = len(kw.text)
|
| - if not lengths.has_key(l):
|
| - lengths[l] = LengthGroup(l)
|
| - lengths[l].add(kw)
|
| -
|
| - # TODO(jimhug): Consider merging this with the finishIdentifier code.
|
| - cw.enterBlock()
|
| - cw.enterBlock('int getIdentifierKind() {')
|
| - cw.writeln('final i0 = _startIndex;')
|
| - cw.writeln('int ch;')
|
| - cw.enterBlock('switch (_index - i0) {')
|
| - for key, value in sorted(lengths.items()):
|
| - value.writeCode(cw)
|
| - cw.writeln('default: return TokenKind.IDENTIFIER;')
|
| - cw.exitBlock('}')
|
| - cw.exitBlock('}')
|
| -
|
| -def makeSafe1(match):
|
| - return makeSafe(match.group(1))
|
| -
|
| -def main():
|
| - cw = CodeWriter(__file__)
|
| - cw._indent += 2;
|
| - cases.writeCases(cw)
|
| - casesCode = str(cw)
|
| -
|
| - cw = CodeWriter(__file__)
|
| - writeExtraMethods(cw)
|
| - extraMethods = str(cw)
|
| -
|
| - cw = CodeWriter(__file__)
|
| - writeHelperMethods(cw)
|
| - helperMethods = str(cw)
|
| -
|
| - out = open('tokenizer.g.dart', 'w')
|
| - out.write(HEADER % __file__)
|
| - pat = re.compile('@(.)', re.DOTALL)
|
| - text = pat.sub(makeSafe1, TOKENIZER)
|
| - out.write(text % {
|
| - 'cases': casesCode,
|
| - 'extraMethods': extraMethods,
|
| - 'helperMethods': helperMethods })
|
| - out.close()
|
| -
|
| -
|
| -if __name__ == '__main__': main()
|
|
|