OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 class Tokenizer extends TokenizerBase { |
| 6 TokenKind tmplTokens; |
| 7 |
| 8 bool _selectorParsing; |
| 9 |
| 10 Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0]) |
| 11 : super(source, skipWhitespace, index), _selectorParsing = false { |
| 12 tmplTokens = new TokenKind(); |
| 13 } |
| 14 |
| 15 int get startIndex() => _startIndex; |
| 16 void set index(int idx) { |
| 17 _index = idx; |
| 18 } |
| 19 |
| 20 Token next([bool inTag = true]) { |
| 21 // keep track of our starting position |
| 22 _startIndex = _index; |
| 23 |
| 24 if (_interpStack != null && _interpStack.depth == 0) { |
| 25 var istack = _interpStack; |
| 26 _interpStack = _interpStack.pop(); |
| 27 |
| 28 /* TODO(terry): Enable for variable and string interpolation. |
| 29 * if (istack.isMultiline) { |
| 30 * return finishMultilineStringBody(istack.quote); |
| 31 * } else { |
| 32 * return finishStringBody(istack.quote); |
| 33 * } |
| 34 */ |
| 35 } |
| 36 |
| 37 int ch; |
| 38 ch = _nextChar(); |
| 39 switch(ch) { |
| 40 case 0: |
| 41 return _finishToken(TokenKind.END_OF_FILE); |
| 42 case tmplTokens.tokens[TokenKind.SPACE]: |
| 43 case tmplTokens.tokens[TokenKind.TAB]: |
| 44 case tmplTokens.tokens[TokenKind.NEWLINE]: |
| 45 case tmplTokens.tokens[TokenKind.RETURN]: |
| 46 return finishWhitespace(); |
| 47 case tmplTokens.tokens[TokenKind.END_OF_FILE]: |
| 48 return _finishToken(TokenKind.END_OF_FILE); |
| 49 case tmplTokens.tokens[TokenKind.LPAREN]: |
| 50 return _finishToken(TokenKind.LPAREN); |
| 51 case tmplTokens.tokens[TokenKind.RPAREN]: |
| 52 return _finishToken(TokenKind.RPAREN); |
| 53 case tmplTokens.tokens[TokenKind.COMMA]: |
| 54 return _finishToken(TokenKind.COMMA); |
| 55 case tmplTokens.tokens[TokenKind.LBRACE]: |
| 56 return _finishToken(TokenKind.LBRACE); |
| 57 case tmplTokens.tokens[TokenKind.RBRACE]: |
| 58 return _finishToken(TokenKind.RBRACE); |
| 59 case tmplTokens.tokens[TokenKind.LESS_THAN]: |
| 60 return _finishToken(TokenKind.LESS_THAN); |
| 61 case tmplTokens.tokens[TokenKind.GREATER_THAN]: |
| 62 return _finishToken(TokenKind.GREATER_THAN); |
| 63 case tmplTokens.tokens[TokenKind.EQUAL]: |
| 64 if (inTag) { |
| 65 if (_maybeEatChar(tmplTokens.tokens[TokenKind.SINGLE_QUOTE])) { |
| 66 return finishQuotedAttrValue(tmplTokens.tokens[TokenKind.SINGLE_QUOT
E]); |
| 67 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.DOUBLE_QUOTE])) { |
| 68 return finishQuotedAttrValue(tmplTokens.tokens[TokenKind.DOUBLE_QUOT
E]); |
| 69 } else if (isAttributeValueStart(_peekChar())) { |
| 70 return finishAttrValue(); |
| 71 } |
| 72 } |
| 73 return _finishToken(TokenKind.EQUAL); |
| 74 case tmplTokens.tokens[TokenKind.SLASH]: |
| 75 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { |
| 76 return _finishToken(TokenKind.END_NO_SCOPE_TAG); // /> |
| 77 } else { |
| 78 return _finishToken(TokenKind.SLASH); |
| 79 } |
| 80 case tmplTokens.tokens[TokenKind.DOLLAR]: |
| 81 if (_maybeEatChar(tmplTokens.tokens[TokenKind.LBRACE])) { |
| 82 if (_maybeEatChar(tmplTokens.tokens[TokenKind.HASH])) { |
| 83 return _finishToken(TokenKind.START_COMMAND); // ${# |
| 84 } else if (_maybeEatChar(tmplTokens.tokens[TokenKind.SLASH])) { |
| 85 return _finishToken(TokenKind.END_COMMAND); // ${/ |
| 86 } else { |
| 87 return _finishToken(TokenKind.START_EXPRESSION); // ${ |
| 88 } |
| 89 } else { |
| 90 return _finishToken(TokenKind.DOLLAR); |
| 91 } |
| 92 |
| 93 default: |
| 94 if (TokenizerHelpers.isIdentifierStart(ch)) { |
| 95 return this.finishIdentifier(); |
| 96 } else if (isDigit(ch)) { |
| 97 return this.finishNumber(); |
| 98 } else { |
| 99 return _errorToken(); |
| 100 } |
| 101 } |
| 102 } |
| 103 |
| 104 // TODO(jmesserly): we need a way to emit human readable error messages from |
| 105 // the tokenizer. |
| 106 Token _errorToken([String message = null]) { |
| 107 return _finishToken(TokenKind.ERROR); |
| 108 } |
| 109 |
| 110 int getIdentifierKind() { |
| 111 // Is the identifier an element? |
| 112 int tokId = TokenKind.matchElements(_text, _startIndex, _index - _startIndex
); |
| 113 if (tokId == -1) { |
| 114 // No, is it an attribute? |
| 115 // tokId = TokenKind.matchAttributes(_text, _startIndex, _index - _startInd
ex); |
| 116 } |
| 117 if (tokId == -1) { |
| 118 tokId = TokenKind.matchKeywords(_text, _startIndex, _index - _startIndex); |
| 119 } |
| 120 |
| 121 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER; |
| 122 } |
| 123 |
| 124 // Need to override so CSS version of isIdentifierPart is used. |
| 125 Token finishIdentifier() { |
| 126 while (_index < _text.length) { |
| 127 // if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index++))) { |
| 128 if (!TokenizerHelpers.isIdentifierPart(_text.charCodeAt(_index))) { |
| 129 // _index--; |
| 130 break; |
| 131 } else { |
| 132 _index += 1; |
| 133 } |
| 134 } |
| 135 if (_interpStack != null && _interpStack.depth == -1) { |
| 136 _interpStack.depth = 0; |
| 137 } |
| 138 int kind = getIdentifierKind(); |
| 139 if (kind == TokenKind.IDENTIFIER) { |
| 140 return _finishToken(TokenKind.IDENTIFIER); |
| 141 } else { |
| 142 return _finishToken(kind); |
| 143 } |
| 144 } |
| 145 |
| 146 Token _makeAttributeValueToken(List<int> buf) { |
| 147 final s = new String.fromCharCodes(buf); |
| 148 return new LiteralToken(TokenKind.ATTR_VALUE, _source, _startIndex, _index, |
| 149 s); |
| 150 } |
| 151 |
| 152 /* quote if -1 signals to read upto first whitespace otherwise read upto |
| 153 * single or double quote char. |
| 154 */ |
| 155 Token finishQuotedAttrValue([int quote = -1]) { |
| 156 var buf = new List<int>(); |
| 157 while (true) { |
| 158 int ch = _nextChar(); |
| 159 if (ch == quote) { |
| 160 return _makeAttributeValueToken(buf); |
| 161 } else if (ch == 0) { |
| 162 return _errorToken(); |
| 163 } else { |
| 164 buf.add(ch); |
| 165 } |
| 166 } |
| 167 } |
| 168 |
| 169 Token finishAttrValue() { |
| 170 var buf = new List<int>(); |
| 171 while (true) { |
| 172 int ch = _peekChar(); |
| 173 if (isWhitespace(ch) || isSlash(ch) || isCloseTag(ch)) { |
| 174 return _makeAttributeValueToken(buf); |
| 175 } else if (ch == 0) { |
| 176 return _errorToken(); |
| 177 } else { |
| 178 buf.add(_nextChar()); |
| 179 } |
| 180 } |
| 181 } |
| 182 |
| 183 Token finishNumber() { |
| 184 eatDigits(); |
| 185 |
| 186 if (_peekChar() == 46/*.*/) { |
| 187 // Handle the case of 1.toString(). |
| 188 _nextChar(); |
| 189 if (isDigit(_peekChar())) { |
| 190 eatDigits(); |
| 191 return _finishToken(TokenKind.DOUBLE); |
| 192 } else { |
| 193 _index -= 1; |
| 194 } |
| 195 } |
| 196 |
| 197 return _finishToken(TokenKind.INTEGER); |
| 198 } |
| 199 |
| 200 bool maybeEatDigit() { |
| 201 if (_index < _text.length && isDigit(_text.charCodeAt(_index))) { |
| 202 _index += 1; |
| 203 return true; |
| 204 } |
| 205 return false; |
| 206 } |
| 207 |
| 208 void eatHexDigits() { |
| 209 while (_index < _text.length) { |
| 210 if (isHexDigit(_text.charCodeAt(_index))) { |
| 211 _index += 1; |
| 212 } else { |
| 213 return; |
| 214 } |
| 215 } |
| 216 } |
| 217 |
| 218 bool maybeEatHexDigit() { |
| 219 if (_index < _text.length && isHexDigit(_text.charCodeAt(_index))) { |
| 220 _index += 1; |
| 221 return true; |
| 222 } |
| 223 return false; |
| 224 } |
| 225 |
| 226 Token finishMultiLineComment() { |
| 227 while (true) { |
| 228 int ch = _nextChar(); |
| 229 if (ch == 0) { |
| 230 return _finishToken(TokenKind.INCOMPLETE_COMMENT); |
| 231 } else if (ch == 42/*'*'*/) { |
| 232 if (_maybeEatChar(47/*'/'*/)) { |
| 233 if (_skipWhitespace) { |
| 234 return next(); |
| 235 } else { |
| 236 return _finishToken(TokenKind.COMMENT); |
| 237 } |
| 238 } |
| 239 } else if (ch == tmplTokens.tokens[TokenKind.MINUS]) { |
| 240 /* Check if close part of Comment Definition --> (CDC). */ |
| 241 if (_maybeEatChar(tmplTokens.tokens[TokenKind.MINUS])) { |
| 242 if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) { |
| 243 if (_skipWhitespace) { |
| 244 return next(); |
| 245 } else { |
| 246 return _finishToken(TokenKind.HTML_COMMENT); |
| 247 } |
| 248 } |
| 249 } |
| 250 } |
| 251 } |
| 252 return _errorToken(); |
| 253 } |
| 254 |
| 255 } |
| 256 |
| 257 |
| 258 /** Static helper methods. */ |
| 259 class TokenizerHelpers { |
| 260 static bool isIdentifierStart(int c) { |
| 261 return ((c >= 97/*a*/ && c <= 122/*z*/) || (c >= 65/*A*/ && c <= 90/*Z*/) ||
c == 95/*_*/); |
| 262 } |
| 263 |
| 264 static bool isDigit(int c) { |
| 265 return (c >= 48/*0*/ && c <= 57/*9*/); |
| 266 } |
| 267 |
| 268 static bool isHexDigit(int c) { |
| 269 return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) || (c >= 65/*A*/ && c
<= 70/*F*/)); |
| 270 } |
| 271 |
| 272 static bool isWhitespace(int c) { |
| 273 return (c == 32/*' '*/ || c == 9/*'\t'*/ || c == 10/*'\n'*/ || c == 13/*'\r'
*/); |
| 274 } |
| 275 |
| 276 static bool isIdentifierPart(int c) { |
| 277 return (isIdentifierStart(c) || isDigit(c) || c == 45/*-*/ || c == 58/*:*/ |
| c == 46/*.*/); |
| 278 } |
| 279 |
| 280 static bool isInterpIdentifierPart(int c) { |
| 281 return (isIdentifierStart(c) || isDigit(c)); |
| 282 } |
| 283 |
| 284 static bool isAttributeValueStart(int c) { |
| 285 return !isWhitespace(c) && !isSlash(c) && !isCloseTag(c); |
| 286 } |
| 287 |
| 288 static bool isSlash(int c) { |
| 289 return (c == 47/* / */); |
| 290 } |
| 291 |
| 292 static bool isCloseTag(int c) { |
| 293 return (c == 62/* > */); |
| 294 } |
| 295 } |
OLD | NEW |