Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 /** | |
| 6 * A mini parser that extracts top-level directives (library, imports, exports, | |
| 7 * and parts) from a dart source file. | |
| 8 */ | |
| 9 library directive_parser; | |
| 10 | |
| 11 import 'info.dart' show DartCodeInfo, DartDirectiveInfo; | |
| 12 import 'messages.dart' show Messages; | |
| 13 | |
| 14 /** Parse and extract top-level directives from [code]. */ | |
| 15 DartCodeInfo parseDartCode(String code, Messages messages) { | |
| 16 return new _DirectiveParser(messages).parse(code); | |
| 17 } | |
| 18 | |
| 19 /** A parser that extracts top-level directives. */ | |
| 20 // TODO(sigmund): add source-span to error messages | |
| 21 class _DirectiveParser { | |
| 22 /** Tokenizer used to parse the input until the end of the directives. */ | |
| 23 _DirectiveTokenizer tokenizer; | |
| 24 | |
| 25 /** Extracted library identifier, if any. */ | |
| 26 String libraryName; | |
| 27 | |
| 28 /** Extracted part-of identifier, if any. */ | |
| 29 String partName; | |
| 30 | |
| 31 /** Extracted imports, exports, and parts, if any. */ | |
| 32 List<DartDirectiveInfo> directives = <DartDirectiveInfo>[]; | |
| 33 | |
| 34 /** Helper for reporting error messages. */ | |
| 35 Messages messages; | |
| 36 | |
| 37 /** Last token read by the parser. */ | |
| 38 Token token; | |
| 39 | |
| 40 _DirectiveParser(this.messages); | |
| 41 | |
| 42 /** Parse and extract directives from [code]. */ | |
| 43 DartCodeInfo parse(String code) { | |
| 44 tokenizer = new _DirectiveTokenizer(code); | |
| 45 parseTopLevel(); | |
| 46 return new DartCodeInfo(libraryName, partName, directives, | |
| 47 code.substring(token.start)); | |
| 48 } | |
| 49 | |
| 50 /** | |
| 51 * Parse top-level directives and comments, but unlike normal Dart code, stop | |
| 52 * as soon as we find actual code. | |
| 53 */ | |
| 54 void parseTopLevel() { | |
| 55 token = tokenizer.next(); | |
| 56 while (token.kind != Token.EOF) { | |
| 57 if (token.kind == Token.IDENTIFIER) { | |
| 58 if (token.value == 'library') { | |
| 59 parseLibrary(); | |
| 60 } else if (token.value == 'part') { | |
| 61 parsePart(); | |
| 62 } else if (token.value == 'import') { | |
| 63 parseImport(); | |
| 64 } else if (token.value == 'export') { | |
| 65 parseExport(); | |
| 66 } else { | |
| 67 break; | |
| 68 } | |
| 69 } else if (token.kind != Token.COMMENT) { | |
| 70 break; | |
| 71 } | |
| 72 token = tokenizer.next(); | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 /** Parse library declarations: 'library foo.bar;' */ | |
| 77 parseLibrary() { | |
| 78 libraryName = parseQualifiedName(); | |
|
Jennifer Messerly
2012/10/25 04:26:25
fyi, this is not allowed by Dart language spec whe
Siggi Cherem (dart-lang)
2012/10/25 05:17:29
Good question - it's not perfectly clear. The spec
| |
| 79 expectToken(Token.SEMICOLON); | |
| 80 } | |
| 81 | |
| 82 /** | |
| 83 * Parse either a part declaration or part inclusions. For instance, | |
| 84 * part of foo.bar; | |
| 85 * or | |
| 86 * part "foo"; | |
| 87 */ | |
| 88 parsePart() { | |
| 89 token = tokenizer.next(); | |
| 90 if (token.kind == Token.IDENTIFIER && token.value == 'of') { | |
| 91 partName = parseQualifiedName(); | |
| 92 } else if (token.kind == Token.STRING) { | |
| 93 directives.add(new DartDirectiveInfo('part', token.value)); | |
| 94 token = tokenizer.next(); | |
| 95 } else { | |
| 96 messages.error('unexpected token: ${token}', null); | |
| 97 } | |
| 98 expectToken(Token.SEMICOLON); | |
| 99 } | |
| 100 | |
| 101 /** Parse a qualified name, such as `one.two.three`. */ | |
| 102 parseQualifiedName() { | |
| 103 List<String> segments = []; | |
| 104 while (true) { | |
| 105 token = tokenizer.next(); | |
| 106 if (token.kind != Token.IDENTIFIER) { | |
| 107 messages.error('invalid qualified name: $token', null); | |
| 108 return null; | |
| 109 } | |
| 110 segments.add(token.value); | |
| 111 token = tokenizer.next(); | |
| 112 if (token.kind == Token.SEMICOLON) break; | |
| 113 if (token.kind != Token.DOT) { | |
| 114 messages.error('invalid qualified name: $token', null); | |
| 115 return null; | |
| 116 } | |
| 117 } | |
| 118 return Strings.join(segments, '.'); | |
| 119 } | |
| 120 | |
| 121 /** Parse an import, with optional prefix and show/hide combinators. */ | |
| 122 parseImport() { | |
| 123 token = tokenizer.next(); | |
| 124 if (token.kind != Token.STRING) { | |
| 125 messages.error('expected an import url, but found ${token}', null); | |
|
Jennifer Messerly
2012/10/25 04:26:25
would be good to include the file name
Siggi Cherem (dart-lang)
2012/10/25 05:17:29
Done.
| |
| 126 return; | |
| 127 } | |
| 128 var uri = token.value; | |
|
Jennifer Messerly
2012/10/25 04:26:25
does the spec allow sequential string concat? i.e.
Siggi Cherem (dart-lang)
2012/10/25 05:17:29
fixed. the spec allows it, but the vm crashes on i
| |
| 129 | |
| 130 // Parse the optional prefix. | |
| 131 token = tokenizer.next(); | |
| 132 var prefix; | |
| 133 if (token.kind == Token.IDENTIFIER && token.value == 'as') { | |
| 134 token = tokenizer.next(); | |
| 135 if (token.kind != Token.IDENTIFIER) { | |
| 136 messages.error('expected an identifier as prefix, but found ${token}', | |
| 137 null); | |
| 138 return; | |
| 139 } | |
| 140 prefix = token.value; | |
| 141 token = tokenizer.next(); | |
| 142 } | |
| 143 | |
| 144 // Parse the optional show/hide combinators. | |
| 145 var hide; | |
| 146 var show; | |
| 147 while (token.kind == Token.IDENTIFIER) { | |
| 148 if (token.value == 'hide') { | |
| 149 if (hide == null) hide = []; | |
| 150 hide.addAll(parseIdentifierList()); | |
| 151 } else if (token.value == 'show') { | |
| 152 if (show == null) show = []; | |
| 153 show.addAll(parseIdentifierList()); | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 expectToken(Token.SEMICOLON); | |
| 158 directives.add(new DartDirectiveInfo('import', uri, prefix, hide, show)); | |
| 159 } | |
| 160 | |
| 161 /** Parse an export, with optional show/hide combinators. */ | |
| 162 parseExport() { | |
| 163 token = tokenizer.next(); | |
| 164 if (token.kind != Token.STRING) { | |
| 165 messages.error('expected an export url, but found ${token}', null); | |
| 166 return; | |
| 167 } | |
| 168 var uri = token.value; | |
| 169 | |
| 170 // Parse the optional show/hide combinators. | |
| 171 token = tokenizer.next(); | |
| 172 var hide; | |
| 173 var show; | |
| 174 while (token.kind == Token.IDENTIFIER) { | |
| 175 if (token.value == 'hide') { | |
| 176 if (hide == null) hide = []; | |
| 177 hide.addAll(parseIdentifierList()); | |
| 178 } else if (token.value == 'show') { | |
| 179 if (show == null) show = []; | |
| 180 show.addAll(parseIdentifierList()); | |
| 181 } | |
| 182 } | |
| 183 | |
| 184 expectToken(Token.SEMICOLON); | |
| 185 directives.add(new DartDirectiveInfo('export', uri, null, hide, show)); | |
| 186 } | |
| 187 | |
| 188 /** Parse a list of identifiers of the form `id1, id2, id3` */ | |
| 189 List<String> parseIdentifierList() { | |
| 190 var list = []; | |
| 191 do { | |
| 192 token = tokenizer.next(); | |
| 193 if (!expectToken(Token.IDENTIFIER)) return list; | |
| 194 list.add(token.value); | |
| 195 token = tokenizer.next(); | |
| 196 } while (token.kind == Token.COMMA); | |
| 197 return list; | |
| 198 } | |
| 199 | |
| 200 /** Report an error if the last token is not of the expected kind. */ | |
| 201 bool expectToken(int kind) { | |
| 202 if (token.kind != kind) { | |
| 203 messages.error( | |
| 204 'expected <${Token.KIND_NAMES[kind]}>, but got ${token}', null); | |
| 205 return false; | |
| 206 } | |
| 207 return true; | |
| 208 } | |
| 209 } | |
| 210 | |
| 211 /** Set of tokens that we parse out of the dart code. */ | |
| 212 class Token { | |
| 213 /** Kind of token, one of the constants below. */ | |
| 214 final int kind; | |
| 215 | |
| 216 /** Value in the token (filled only for identifiers and strings). */ | |
| 217 final String value; | |
| 218 | |
| 219 /** Start location for the token in the input string. */ | |
| 220 final int start; | |
| 221 | |
| 222 /** End location for the token in the input string. */ | |
| 223 final int end; | |
| 224 | |
| 225 const Token(this.kind, this.start, this.end, [this.value]); | |
| 226 | |
| 227 toString() => '<#Token ${KIND_NAMES[kind]}, $value>'; | |
| 228 | |
| 229 static const int COMMENT = 0; | |
| 230 static const int STRING = 1; | |
| 231 static const int IDENTIFIER = 2; | |
| 232 static const int SEMICOLON = 3; | |
| 233 static const int DOT = 4; | |
| 234 static const int COMMA = 5; | |
| 235 static const int EOF = 6; | |
| 236 static const List<String> KIND_NAMES = | |
| 237 const ['comment', 'string', 'id', 'semicolon', 'dot', 'comma', 'eof']; | |
| 238 | |
| 239 } | |
| 240 | |
| 241 /** | |
| 242 * A simple tokenizer that understands comments, identifiers, strings, | |
| 243 * separators, and practically nothing else. | |
| 244 */ | |
| 245 class _DirectiveTokenizer { | |
| 246 int pos = 0; | |
| 247 String _data; | |
| 248 | |
| 249 _DirectiveTokenizer(this._data); | |
| 250 | |
| 251 /** Return the next token. */ | |
| 252 Token next() { | |
| 253 while (true) { | |
| 254 if (pos >= _data.length) return new Token(Token.EOF, pos, pos); | |
| 255 if (!isWhiteSpace(peek())) break; | |
| 256 nextChar(); | |
| 257 } | |
| 258 | |
| 259 var c = peek(); | |
| 260 switch (c) { | |
| 261 case _SLASH: | |
| 262 if (peek(1) == _SLASH) return lineComment(); | |
| 263 if (peek(1) == _STAR) return blockComment(); | |
| 264 break; | |
| 265 case _SINGLE_QUOTE: | |
| 266 case _DOUBLE_QUOTE: | |
| 267 return string(); | |
| 268 case _SEMICOLON: | |
| 269 pos++; | |
| 270 return new Token(Token.SEMICOLON, pos - 1, pos); | |
| 271 case _DOT: | |
| 272 pos++; | |
| 273 return new Token(Token.DOT, pos - 1, pos); | |
| 274 case _COMMA: | |
| 275 pos++; | |
| 276 return new Token(Token.COMMA, pos - 1, pos); | |
| 277 default: | |
| 278 if (isIdentifierStart(c)) return identifier(); | |
| 279 break; | |
| 280 } | |
| 281 return new Token(Token.EOF, pos, pos); | |
| 282 } | |
| 283 | |
| 284 int nextChar() => _data.charCodeAt(pos++); | |
| 285 int peek([int skip = 0]) => _data.charCodeAt(pos + skip); | |
| 286 | |
| 287 /** Advance parsing until the end of a string (no tripple quotes allowed). */ | |
| 288 Token string() { | |
|
Jennifer Messerly
2012/10/25 04:26:25
does spec allow raw or triple quoted strings? inte
Siggi Cherem (dart-lang)
2012/10/25 05:17:29
ok added a note - seems that yes, multiline string
| |
| 289 int start = pos; | |
| 290 int startQuote = nextChar(); | |
| 291 bool escape = false; | |
| 292 while (true) { | |
| 293 if (pos >= _data.length) return new Token(Token.EOF, start, pos); | |
| 294 int c = nextChar(); | |
| 295 if (c == startQuote && !escape) break; | |
| 296 escape = !escape && c == _BACKSLASH; | |
| 297 } | |
| 298 return new Token(Token.STRING, start, pos, | |
| 299 _data.substring(start + 1, pos - 1)); | |
| 300 } | |
| 301 | |
| 302 /** Advance parsing until the end of an identifier. */ | |
| 303 Token identifier() { | |
| 304 int start = pos; | |
| 305 while (pos < _data.length && isIdentifierChar(peek())) pos++; | |
| 306 return new Token(Token.IDENTIFIER, start, pos, _data.substring(start, pos)); | |
| 307 } | |
| 308 | |
| 309 /** Advance parsing until the end of a line comment. */ | |
| 310 Token lineComment() { | |
| 311 int start = pos; | |
| 312 while (pos < _data.length && peek() != _LF) pos++; | |
| 313 return new Token(Token.COMMENT, start, pos); | |
| 314 } | |
| 315 | |
| 316 /** Advance parsing until the end of a block comment (nesting is allowed). */ | |
| 317 Token blockComment() { | |
| 318 var start = pos; | |
| 319 var commentNesting = 0; | |
| 320 pos += 2; | |
| 321 while (pos < _data.length) { | |
| 322 if (peek() == _STAR && peek(1) == _SLASH) { | |
| 323 pos += 2; | |
| 324 if (commentNesting == 0) break; | |
| 325 commentNesting--; | |
| 326 } else if (peek() == _SLASH && peek(1) == _STAR) { | |
| 327 pos += 2; | |
| 328 commentNesting++; | |
| 329 } else { | |
| 330 pos++; | |
| 331 } | |
| 332 } | |
| 333 return new Token(Token.COMMENT, start, pos); | |
| 334 } | |
| 335 | |
| 336 bool isWhiteSpace(int c) => c == _LF || c == _SPACE || c == _CR || c == _TAB; | |
| 337 bool isIdentifierStart(int c) => c == _UNDERSCORE || isLetter(c); | |
| 338 bool isIdentifierChar(int c) => isIdentifierStart(c) || isNumber(c); | |
| 339 bool isNumber(int c) => c >= _ZERO && c <= _NINE; | |
| 340 bool isLetter(int c) => | |
| 341 (c >= _LOWER_A && c <= _LOWER_Z) || | |
| 342 (c >= _UPPER_A && c <= _UPPER_Z); | |
| 343 | |
| 344 | |
| 345 // The following constant character values are used for tokenizing. | |
| 346 | |
| 347 static const int _TAB = 9; | |
| 348 static const int _LF = 10; | |
| 349 static const int _CR = 13; | |
| 350 static const int _SPACE = 32; | |
| 351 static const int _DOUBLE_QUOTE = 34; // " | |
| 352 static const int _DOLLAR = 36; // $ | |
| 353 static const int _SINGLE_QUOTE = 39; // ' | |
| 354 static const int _STAR = 42; // * | |
| 355 static const int _COMMA = 44; // , | |
| 356 static const int _DOT = 46; // . | |
| 357 static const int _SLASH = 47; // / | |
| 358 static const int _ZERO = 48; // 0 | |
| 359 static const int _NINE = 57; // 9 | |
| 360 static const int _SEMICOLON = 59; // ; | |
| 361 static const int _UPPER_A = 65; // A | |
| 362 static const int _UPPER_Z = 90; // Z | |
| 363 static const int _BACKSLASH = 92; // \ | |
| 364 static const int _UNDERSCORE = 95; // _ | |
| 365 static const int _LOWER_A = 97; // a | |
| 366 static const int _LOWER_Z = 122; // z | |
| 367 } | |
| OLD | NEW |