| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 /** |
| 6 * A mini parser that extracts top-level directives (library, imports, exports, |
| 7 * and parts) from a dart source file. |
| 8 */ |
| 9 library directive_parser; |
| 10 |
| 11 import 'info.dart' show DartCodeInfo, DartDirectiveInfo; |
| 12 import 'messages.dart' show Messages; |
| 13 import 'file_system/path.dart'; |
| 14 |
| 15 /** Parse and extract top-level directives from [code]. */ |
| 16 DartCodeInfo parseDartCode(String code, Path file, Messages messages) { |
| 17 return new _DirectiveParser(messages, file).parse(code); |
| 18 } |
| 19 |
| 20 /** A parser that extracts top-level directives. */ |
| 21 // TODO(sigmund): add source-span to error messages |
| 22 class _DirectiveParser { |
| 23 /** Path to the source file containing the code (for error messages). */ |
| 24 Path file; |
| 25 |
| 26 /** Tokenizer used to parse the input until the end of the directives. */ |
| 27 _DirectiveTokenizer tokenizer; |
| 28 |
| 29 /** Extracted library identifier, if any. */ |
| 30 String libraryName; |
| 31 |
| 32 /** Extracted part-of identifier, if any. */ |
| 33 String partName; |
| 34 |
| 35 /** Extracted imports, exports, and parts, if any. */ |
| 36 List<DartDirectiveInfo> directives = <DartDirectiveInfo>[]; |
| 37 |
| 38 /** Helper for reporting error messages. */ |
| 39 Messages messages; |
| 40 |
| 41 /** Last token read by the parser. */ |
| 42 Token token; |
| 43 |
| 44 _DirectiveParser(this.messages, this.file); |
| 45 |
| 46 /** Parse and extract directives from [code]. */ |
| 47 DartCodeInfo parse(String code) { |
| 48 tokenizer = new _DirectiveTokenizer(code); |
| 49 parseTopLevel(); |
| 50 return new DartCodeInfo(libraryName, partName, directives, |
| 51 code.substring(token.start)); |
| 52 } |
| 53 |
| 54 /** |
| 55 * Parse top-level directives and comments, but unlike normal Dart code, stop |
| 56 * as soon as we find actual code. |
| 57 */ |
| 58 void parseTopLevel() { |
| 59 token = tokenizer.next(); |
| 60 while (token.kind != Token.EOF) { |
| 61 if (token.kind == Token.IDENTIFIER) { |
| 62 if (token.value == 'library') { |
| 63 parseLibrary(); |
| 64 } else if (token.value == 'part') { |
| 65 parsePart(); |
| 66 } else if (token.value == 'import') { |
| 67 parseImport(); |
| 68 } else if (token.value == 'export') { |
| 69 parseExport(); |
| 70 } else { |
| 71 break; |
| 72 } |
| 73 } else if (token.kind != Token.COMMENT) { |
| 74 break; |
| 75 } |
| 76 token = tokenizer.next(); |
| 77 } |
| 78 } |
| 79 |
| 80 /** Parse library declarations: 'library foo.bar;' */ |
| 81 parseLibrary() { |
| 82 libraryName = parseQualifiedName(); |
| 83 expectToken(Token.SEMICOLON); |
| 84 } |
| 85 |
| 86 /** |
| 87 * Parse either a part declaration or part inclusions. For instance, |
| 88 * part of foo.bar; |
| 89 * or |
| 90 * part "foo"; |
| 91 */ |
| 92 parsePart() { |
| 93 token = tokenizer.next(); |
| 94 if (token.kind == Token.IDENTIFIER && token.value == 'of') { |
| 95 partName = parseQualifiedName(); |
| 96 } else if (token.kind == Token.STRING) { |
| 97 directives.add(new DartDirectiveInfo('part', token.value)); |
| 98 token = tokenizer.next(); |
| 99 } else { |
| 100 messages.error('unexpected token: ${token}', null, file: file); |
| 101 } |
| 102 expectToken(Token.SEMICOLON); |
| 103 } |
| 104 |
| 105 /** Parse a qualified name, such as `one.two.three`. */ |
| 106 parseQualifiedName() { |
| 107 List<String> segments = []; |
| 108 while (true) { |
| 109 token = tokenizer.next(); |
| 110 if (token.kind != Token.IDENTIFIER) { |
| 111 messages.error('invalid qualified name: $token', null, file: file); |
| 112 return null; |
| 113 } |
| 114 segments.add(token.value); |
| 115 token = tokenizer.next(); |
| 116 if (token.kind == Token.SEMICOLON) break; |
| 117 if (token.kind != Token.DOT) { |
| 118 messages.error('invalid qualified name: $token', null, file: file); |
| 119 return null; |
| 120 } |
| 121 } |
| 122 return Strings.join(segments, '.'); |
| 123 } |
| 124 |
| 125 /** Parse an import, with optional prefix and show/hide combinators. */ |
| 126 parseImport() { |
| 127 token = tokenizer.next(); |
| 128 if (token.kind != Token.STRING) { |
| 129 // TODO(sigmund): add file name and span information here. |
| 130 messages.error('expected an import url, but found ${token}', null, |
| 131 file: file); |
| 132 return; |
| 133 } |
| 134 var uri = token.value; |
| 135 token = tokenizer.next(); |
| 136 while (token.kind == Token.STRING) { |
| 137 uri = '$uri${token.value}'; |
| 138 token = tokenizer.next(); |
| 139 } |
| 140 |
| 141 // Parse the optional prefix. |
| 142 var prefix; |
| 143 if (token.kind == Token.IDENTIFIER && token.value == 'as') { |
| 144 token = tokenizer.next(); |
| 145 if (token.kind != Token.IDENTIFIER) { |
| 146 messages.error('expected an identifier as prefix, but found ${token}', |
| 147 null, file: file); |
| 148 return; |
| 149 } |
| 150 prefix = token.value; |
| 151 token = tokenizer.next(); |
| 152 } |
| 153 |
| 154 // Parse the optional show/hide combinators. |
| 155 var hide; |
| 156 var show; |
| 157 while (token.kind == Token.IDENTIFIER) { |
| 158 if (token.value == 'hide') { |
| 159 if (hide == null) hide = []; |
| 160 hide.addAll(parseIdentifierList()); |
| 161 } else if (token.value == 'show') { |
| 162 if (show == null) show = []; |
| 163 show.addAll(parseIdentifierList()); |
| 164 } else { |
| 165 break; |
| 166 } |
| 167 } |
| 168 |
| 169 expectToken(Token.SEMICOLON); |
| 170 directives.add(new DartDirectiveInfo('import', uri, prefix, hide, show)); |
| 171 } |
| 172 |
| 173 /** Parse an export, with optional show/hide combinators. */ |
| 174 parseExport() { |
| 175 token = tokenizer.next(); |
| 176 if (token.kind != Token.STRING) { |
| 177 messages.error('expected an export url, but found ${token}', null, |
| 178 file: file); |
| 179 return; |
| 180 } |
| 181 var uri = token.value; |
| 182 |
| 183 // Parse the optional show/hide combinators. |
| 184 token = tokenizer.next(); |
| 185 var hide; |
| 186 var show; |
| 187 while (token.kind == Token.IDENTIFIER) { |
| 188 if (token.value == 'hide') { |
| 189 if (hide == null) hide = []; |
| 190 hide.addAll(parseIdentifierList()); |
| 191 } else if (token.value == 'show') { |
| 192 if (show == null) show = []; |
| 193 show.addAll(parseIdentifierList()); |
| 194 } |
| 195 } |
| 196 |
| 197 expectToken(Token.SEMICOLON); |
| 198 directives.add(new DartDirectiveInfo('export', uri, null, hide, show)); |
| 199 } |
| 200 |
| 201 /** Parse a list of identifiers of the form `id1, id2, id3` */ |
| 202 List<String> parseIdentifierList() { |
| 203 var list = []; |
| 204 do { |
| 205 token = tokenizer.next(); |
| 206 if (!expectToken(Token.IDENTIFIER)) return list; |
| 207 list.add(token.value); |
| 208 token = tokenizer.next(); |
| 209 } while (token.kind == Token.COMMA); |
| 210 return list; |
| 211 } |
| 212 |
| 213 /** Report an error if the last token is not of the expected kind. */ |
| 214 bool expectToken(int kind) { |
| 215 if (token.kind != kind) { |
| 216 messages.error( |
| 217 'expected <${Token.KIND_NAMES[kind]}>, but got ${token}', null, |
| 218 file: file); |
| 219 return false; |
| 220 } |
| 221 return true; |
| 222 } |
| 223 } |
| 224 |
| 225 /** Set of tokens that we parse out of the dart code. */ |
| 226 class Token { |
| 227 /** Kind of token, one of the constants below. */ |
| 228 final int kind; |
| 229 |
| 230 /** Value in the token (filled only for identifiers and strings). */ |
| 231 final String value; |
| 232 |
| 233 /** Start location for the token in the input string. */ |
| 234 final int start; |
| 235 |
| 236 /** End location for the token in the input string. */ |
| 237 final int end; |
| 238 |
| 239 const Token(this.kind, this.start, this.end, [this.value]); |
| 240 |
| 241 toString() => '<#Token ${KIND_NAMES[kind]}, $value>'; |
| 242 |
| 243 static const int COMMENT = 0; |
| 244 static const int STRING = 1; |
| 245 static const int IDENTIFIER = 2; |
| 246 static const int SEMICOLON = 3; |
| 247 static const int DOT = 4; |
| 248 static const int COMMA = 5; |
| 249 static const int EOF = 6; |
| 250 static const List<String> KIND_NAMES = |
| 251 const ['comment', 'string', 'id', 'semicolon', 'dot', 'comma', 'eof']; |
| 252 |
| 253 } |
| 254 |
| 255 /** |
| 256 * A simple tokenizer that understands comments, identifiers, strings, |
| 257 * separators, and practically nothing else. |
| 258 */ |
| 259 class _DirectiveTokenizer { |
| 260 int pos = 0; |
| 261 String _data; |
| 262 |
| 263 _DirectiveTokenizer(this._data); |
| 264 |
| 265 /** Return the next token. */ |
| 266 Token next() { |
| 267 while (true) { |
| 268 if (pos >= _data.length) return new Token(Token.EOF, pos, pos); |
| 269 if (!isWhiteSpace(peek())) break; |
| 270 nextChar(); |
| 271 } |
| 272 |
| 273 var c = peek(); |
| 274 switch (c) { |
| 275 case _SLASH: |
| 276 if (peek(1) == _SLASH) return lineComment(); |
| 277 if (peek(1) == _STAR) return blockComment(); |
| 278 break; |
| 279 case _SINGLE_QUOTE: |
| 280 case _DOUBLE_QUOTE: |
| 281 return string(); |
| 282 case _SEMICOLON: |
| 283 pos++; |
| 284 return new Token(Token.SEMICOLON, pos - 1, pos); |
| 285 case _DOT: |
| 286 pos++; |
| 287 return new Token(Token.DOT, pos - 1, pos); |
| 288 case _COMMA: |
| 289 pos++; |
| 290 return new Token(Token.COMMA, pos - 1, pos); |
| 291 default: |
| 292 if (isIdentifierStart(c)) return identifier(); |
| 293 break; |
| 294 } |
| 295 return new Token(Token.EOF, pos, pos); |
| 296 } |
| 297 |
| 298 int nextChar() => _data.charCodeAt(pos++); |
| 299 int peek([int skip = 0]) => _data.charCodeAt(pos + skip); |
| 300 |
| 301 /** Advance parsing until the end of a string (no tripple quotes allowed). */ |
| 302 Token string() { |
| 303 // TODO(sigmund): add support for multi-line strings, and raw strings. |
| 304 int start = pos; |
| 305 int startQuote = nextChar(); |
| 306 bool escape = false; |
| 307 while (true) { |
| 308 if (pos >= _data.length) return new Token(Token.EOF, start, pos); |
| 309 int c = nextChar(); |
| 310 if (c == startQuote && !escape) break; |
| 311 escape = !escape && c == _BACKSLASH; |
| 312 } |
| 313 return new Token(Token.STRING, start, pos, |
| 314 _data.substring(start + 1, pos - 1)); |
| 315 } |
| 316 |
| 317 /** Advance parsing until the end of an identifier. */ |
| 318 Token identifier() { |
| 319 int start = pos; |
| 320 while (pos < _data.length && isIdentifierChar(peek())) pos++; |
| 321 return new Token(Token.IDENTIFIER, start, pos, _data.substring(start, pos)); |
| 322 } |
| 323 |
| 324 /** Advance parsing until the end of a line comment. */ |
| 325 Token lineComment() { |
| 326 int start = pos; |
| 327 while (pos < _data.length && peek() != _LF) pos++; |
| 328 return new Token(Token.COMMENT, start, pos); |
| 329 } |
| 330 |
| 331 /** Advance parsing until the end of a block comment (nesting is allowed). */ |
| 332 Token blockComment() { |
| 333 var start = pos; |
| 334 var commentNesting = 0; |
| 335 pos += 2; |
| 336 while (pos < _data.length) { |
| 337 if (peek() == _STAR && peek(1) == _SLASH) { |
| 338 pos += 2; |
| 339 if (commentNesting == 0) break; |
| 340 commentNesting--; |
| 341 } else if (peek() == _SLASH && peek(1) == _STAR) { |
| 342 pos += 2; |
| 343 commentNesting++; |
| 344 } else { |
| 345 pos++; |
| 346 } |
| 347 } |
| 348 return new Token(Token.COMMENT, start, pos); |
| 349 } |
| 350 |
| 351 bool isWhiteSpace(int c) => c == _LF || c == _SPACE || c == _CR || c == _TAB; |
| 352 bool isIdentifierStart(int c) => c == _UNDERSCORE || isLetter(c); |
| 353 bool isIdentifierChar(int c) => isIdentifierStart(c) || isNumber(c); |
| 354 bool isNumber(int c) => c >= _ZERO && c <= _NINE; |
| 355 bool isLetter(int c) => |
| 356 (c >= _LOWER_A && c <= _LOWER_Z) || |
| 357 (c >= _UPPER_A && c <= _UPPER_Z); |
| 358 |
| 359 |
| 360 // The following constant character values are used for tokenizing. |
| 361 |
| 362 static const int _TAB = 9; |
| 363 static const int _LF = 10; |
| 364 static const int _CR = 13; |
| 365 static const int _SPACE = 32; |
| 366 static const int _DOUBLE_QUOTE = 34; // " |
| 367 static const int _DOLLAR = 36; // $ |
| 368 static const int _SINGLE_QUOTE = 39; // ' |
| 369 static const int _STAR = 42; // * |
| 370 static const int _COMMA = 44; // , |
| 371 static const int _DOT = 46; // . |
| 372 static const int _SLASH = 47; // / |
| 373 static const int _ZERO = 48; // 0 |
| 374 static const int _NINE = 57; // 9 |
| 375 static const int _SEMICOLON = 59; // ; |
| 376 static const int _UPPER_A = 65; // A |
| 377 static const int _UPPER_Z = 90; // Z |
| 378 static const int _BACKSLASH = 92; // \ |
| 379 static const int _UNDERSCORE = 95; // _ |
| 380 static const int _LOWER_A = 97; // a |
| 381 static const int _LOWER_Z = 122; // z |
| 382 } |
| OLD | NEW |