OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 class Tokenizer extends TokenizerBase { |
| 6 TokenKind tmplTokens; |
| 7 |
| 8 bool _selectorParsing; |
| 9 |
| 10 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0]) |
| 11 : super(source, skipWhitespace, index), _selectorParsing = false { |
| 12 tmplTokens = new TokenKind(); |
| 13 } |
| 14 |
| 15 int get startIndex() => _startIndex; |
| 16 void set index(int idx) { |
| 17 _index = idx; |
| 18 } |
| 19 |
| 20 Token next([bool inTag = true]) { |
| 21 // keep track of our starting position |
| 22 _startIndex = _index; |
| 23 |
| 24 if (_interpStack != null && _interpStack.depth == 0) { |
| 25 var istack = _interpStack; |
| 26 _interpStack = _interpStack.pop(); |
| 27 |
| 28 /* TODO(terry): Enable for variable and string interpolation. |
| 29 * if (istack.isMultiline) { |
| 30 * return finishMultilineStringBody(istack.quote); |
| 31 * } else { |
| 32 * return finishStringBody(istack.quote); |
| 33 * } |
| 34 */ |
| 35 } |
| 36 |
| 37 int ch; |
| 38 ch = _nextChar(); |
| 39 switch(ch) { |
| 40 case 0: |
| 41 return _finishToken(TokenKind.END_OF_FILE); |
| 42 case tmplTokens.tokens[TokenKind.SPACE]: |
| 43 case tmplTokens.tokens[TokenKind.TAB]: |
| 44 case tmplTokens.tokens[TokenKind.NEWLINE]: |
| 45 case tmplTokens.tokens[TokenKind.RETURN]: |
| 46 return finishWhitespace(); |
| 47 case tmplTokens.tokens[TokenKind.END_OF_FILE]: |
| 48 return _finishToken(TokenKind.END_OF_FILE); |
| 49 case tmplTokens.tokens[TokenKind.LPAREN]: |
| 50 return _finishToken(TokenKind.LPAREN); |
| 51 case tmplTokens.tokens[TokenKind.RPAREN]: |
| 52 return _finishToken(TokenKind.RPAREN); |
| 53 case tmplTokens.tokens[TokenKind.COMMA]: |
| 54 return _finishToken(TokenKind.COMMA); |
| 55 case tmplTokens.tokens[TokenKind.LBRACE]: |
| 56 return _finishToken(TokenKind.LBRACE); |
| 57 case tmplTokens.tokens[TokenKind.RBRACE]: |
| 58 return _finishToken(TokenKind.RBRACE); |
| 59 case tmplTokens.tokens[TokenKind.LESS_THAN]: |
| 60 return _finishToken(TokenKind.LESS_THAN); |
| 61 case tmplTokens.tokens[TokenKind.GREATER_THAN]: |
| 62 return _finishToken(TokenKind.GREATER_THAN); |
| 63 case tmplTokens.tokens[TokenKind.EQUAL]: |
| 64 if (inTag) { |
| 65 if (_maybeEatChar(tmplTokens.tokens[TokenKind.SINGLE_QUOTE])) { |
| 66 return finishQuotedAttrValue( |
| 67 tmplTokens.tokens[TokenKind.SINGLE_QUOTE]); |
| 68 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.DOUBLE_QUOTE])) { |
| 69 return finishQuotedAttrValue( |
| 70 tmplTokens.tokens[TokenKind.DOUBLE_QUOTE]); |
| 71 } else if (isAttributeValueStart(_peekChar())) { |
| 72 return finishAttrValue(); |
| 73 } |
| 74 } |
| 75 return _finishToken(TokenKind.EQUAL); |
| 76 case tmplTokens.tokens[TokenKind.SLASH]: |
| 77 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { |
| 78 return _finishToken(TokenKind.END_NO_SCOPE_TAG); // /> |
| 79 } else { |
| 80 return _finishToken(TokenKind.SLASH); |
| 81 } |
| 82 case tmplTokens.tokens[TokenKind.DOLLAR]: |
| 83 if (_maybeEatChar(tmplTokens.tokens[TokenKind.LBRACE])) { |
| 84 if (_maybeEatChar(tmplTokens.tokens[TokenKind.HASH])) { |
| 85 return _finishToken(TokenKind.START_COMMAND); // ${# |
| 86 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.SLASH])) { |
| 87 return _finishToken(TokenKind.END_COMMAND); // ${/ |
| 88 } else { |
| 89 return _finishToken(TokenKind.START_EXPRESSION); // ${ |
| 90 } |
| 91 } else { |
| 92 return _finishToken(TokenKind.DOLLAR); |
| 93 } |
| 94 |
| 95 default: |
| 96 if (TokenizerHelpers.isIdentifierStart(ch)) { |
| 97 return this.finishIdentifier(); |
| 98 } else if (isDigit(ch)) { |
| 99 return this.finishNumber(); |
| 100 } else { |
| 101 return _errorToken(); |
| 102 } |
| 103 } |
| 104 } |
| 105 |
| 106 // TODO(jmesserly): we need a way to emit human readable error messages from |
| 107 // the tokenizer. |
| 108 Token _errorToken([String message = null]) { |
| 109 return _finishToken(TokenKind.ERROR); |
| 110 } |
| 111 |
| 112 int getIdentifierKind() { |
| 113 // Is the identifier an element? |
| 114 int tokId = TokenKind.matchElements(_text, _startIndex, |
| 115 _index - _startIndex); |
| 116 if (tokId == -1) { |
| 117 // No, is it an attribute? |
| 118 // tokId = TokenKind.matchAttributes(_text, _startIndex, _index - _startInd
ex); |
| 119 } |
| 120 if (tokId == -1) { |
| 121 tokId = TokenKind.matchKeywords(_text, _startIndex, _index - _startIndex); |
| 122 } |
| 123 |
| 124 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
| 125 } |
| 126 |
| 127 // Need to override so CSS version of isIdentifierPart is used. |
| 128 Token finishIdentifier() { |
| 129 while (_index < _text.length) { |
| 130 // if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { |
| 131 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index))) { |
| 132 // _index--; |
| 133 break; |
| 134 } else { |
| 135 _index += 1; |
| 136 } |
| 137 } |
| 138 if (_interpStack != null && _interpStack.depth == -1) { |
| 139 _interpStack.depth = 0; |
| 140 } |
| 141 int kind = getIdentifierKind(); |
| 142 if (kind == TokenKind.IDENTIFIER) { |
| 143 return _finishToken(TokenKind.IDENTIFIER); |
| 144 } else { |
| 145 return _finishToken(kind); |
| 146 } |
| 147 } |
| 148 |
| 149 Token _makeAttributeValueToken(List<int> buf) { |
| 150 final s = new String.fromCharCodes(buf); |
| 151 return new LiteralToken(TokenKind.ATTR_VALUE, _source, _startIndex, _index, |
| 152 s); |
| 153 } |
| 154 |
| 155 /* quote if -1 signals to read upto first whitespace otherwise read upto |
| 156 * single or double quote char. |
| 157 */ |
| 158 Token finishQuotedAttrValue([int quote = -1]) { |
| 159 var buf = new List<int>(); |
| 160 while (true) { |
| 161 int ch = _nextChar(); |
| 162 if (ch == quote) { |
| 163 return _makeAttributeValueToken(buf); |
| 164 } else if (ch == 0) { |
| 165 return _errorToken(); |
| 166 } else { |
| 167 buf.add(ch); |
| 168 } |
| 169 } |
| 170 } |
| 171 |
| 172 Token finishAttrValue() { |
| 173 var buf = new List<int>(); |
| 174 while (true) { |
| 175 int ch = _peekChar(); |
| 176 if (isWhitespace(ch) || isSlash(ch) || isCloseTag(ch)) { |
| 177 return _makeAttributeValueToken(buf); |
| 178 } else if (ch == 0) { |
| 179 return _errorToken(); |
| 180 } else { |
| 181 buf.add(_nextChar()); |
| 182 } |
| 183 } |
| 184 } |
| 185 |
| 186 Token finishNumber() { |
| 187 eatDigits(); |
| 188 |
| 189 if (_peekChar() == 46/*.*/) { |
| 190 // Handle the case of 1.toString(). |
| 191 _nextChar(); |
| 192 if (isDigit(_peekChar())) { |
| 193 eatDigits(); |
| 194 return _finishToken(TokenKind.DOUBLE); |
| 195 } else { |
| 196 _index -= 1; |
| 197 } |
| 198 } |
| 199 |
| 200 return _finishToken(TokenKind.INTEGER); |
| 201 } |
| 202 |
| 203 bool maybeEatDigit() { |
| 204 if (_index < _text.length && isDigit(_text.charCodeAt(_index))) { |
| 205 _index += 1; |
| 206 return true; |
| 207 } |
| 208 return false; |
| 209 } |
| 210 |
| 211 void eatHexDigits() { |
| 212 while (_index < _text.length) { |
| 213 if (isHexDigit(_text.charCodeAt(_index))) { |
| 214 _index += 1; |
| 215 } else { |
| 216 return; |
| 217 } |
| 218 } |
| 219 } |
| 220 |
| 221 bool maybeEatHexDigit() { |
| 222 if (_index < _text.length && isHexDigit(_text.charCodeAt(_index))) { |
| 223 _index += 1; |
| 224 return true; |
| 225 } |
| 226 return false; |
| 227 } |
| 228 |
| 229 Token finishMultiLineComment() { |
| 230 while (true) { |
| 231 int ch = _nextChar(); |
| 232 if (ch == 0) { |
| 233 return _finishToken(TokenKind.INCOMPLETE_COMMENT); |
| 234 } else if (ch == 42/*'*'*/) { |
| 235 if (_maybeEatChar(47/*'/'*/)) { |
| 236 if (_skipWhitespace) { |
| 237 return next(); |
| 238 } else { |
| 239 return _finishToken(TokenKind.COMMENT); |
| 240 } |
| 241 } |
| 242 } else if (ch == tmplTokens.tokens[TokenKind.MINUS]) { |
| 243 /* Check if close part of Comment Definition --> (CDC). */ |
| 244 if (_maybeEatChar(tmplTokens.tokens[TokenKind.MINUS])) { |
| 245 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { |
| 246 if (_skipWhitespace) { |
| 247 return next(); |
| 248 } else { |
| 249 return _finishToken(TokenKind.HTML_COMMENT); |
| 250 } |
| 251 } |
| 252 } |
| 253 } |
| 254 } |
| 255 return _errorToken(); |
| 256 } |
| 257 |
| 258 } |
| 259 |
| 260 |
| 261 /** Static helper methods. */ |
| 262 class TokenizerHelpers { |
| 263 static bool isIdentifierStart(int c) { |
| 264 return ((c >= 97/*a*/ && c <= 122/*z*/) || |
| 265 (c >= 65/*A*/ && c <= 90/*Z*/) || c == 95/*_*/); |
| 266 } |
| 267 |
| 268 static bool isDigit(int c) { |
| 269 return (c >= 48/*0*/ && c <= 57/*9*/); |
| 270 } |
| 271 |
| 272 static bool isHexDigit(int c) { |
| 273 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) || |
| 274 (c >= 65/*A*/ && c <= 70/*F*/)); |
| 275 } |
| 276 |
| 277 static bool isWhitespace(int c) { |
| 278 return (c == 32/*' '*/ || c == 9/*'\t'*/ || c == 10/*'\n'*/ || |
| 279 c == 13/*'\r'*/); |
| 280 } |
| 281 |
| 282 static bool isIdentifierPart(int c) { |
| 283 return (isIdentifierStart(c) || isDigit(c) || c == 45/*-*/ || |
| 284 c == 58/*:*/ || c == 46/*.*/); |
| 285 } |
| 286 |
| 287 static bool isInterpIdentifierPart(int c) { |
| 288 return (isIdentifierStart(c) || isDigit(c)); |
| 289 } |
| 290 |
| 291 static bool isAttributeValueStart(int c) { |
| 292 return !isWhitespace(c) && !isSlash(c) && !isCloseTag(c); |
| 293 } |
| 294 |
| 295 static bool isSlash(int c) { |
| 296 return (c == 47/* / */); |
| 297 } |
| 298 |
| 299 static bool isCloseTag(int c) { |
| 300 return (c == 62/* > */); |
| 301 } |
| 302 } |
OLD | NEW |