OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 /** | |
6 * A mini parser that extracts top-level directives (library, imports, exports, | |
7 * and parts) from a dart source file. | |
8 */ | |
9 library directive_parser; | |
10 | |
11 import 'info.dart' show DartCodeInfo, DartDirectiveInfo; | |
12 import 'messages.dart' show Messages; | |
13 import 'file_system/path.dart'; | |
14 | |
15 /** | |
16 * Parse and extract top-level directives from [code]. | |
17 * | |
18 * Adds emitted error/warning messages to [messages], if [messages] is | |
19 * supplied. | |
20 */ | |
21 DartCodeInfo parseDartCode(String code, Path file, {Messages messages}) { | |
22 messages = messages == null ? new Messages.silent() : messages; | |
23 return new _DirectiveParser(messages, file).parse(code); | |
24 } | |
25 | |
26 /** A parser that extracts top-level directives. */ | |
27 // TODO(sigmund): add source-span to error messages | |
28 class _DirectiveParser { | |
29 /** Path to the source file containing the code (for error messages). */ | |
30 Path file; | |
31 | |
32 /** Tokenizer used to parse the input until the end of the directives. */ | |
33 _DirectiveTokenizer tokenizer; | |
34 | |
35 /** Extracted library identifier, if any. */ | |
36 String libraryName; | |
37 | |
38 /** Extracted part-of identifier, if any. */ | |
39 String partName; | |
40 | |
41 /** Extracted imports, exports, and parts, if any. */ | |
42 List<DartDirectiveInfo> directives = <DartDirectiveInfo>[]; | |
43 | |
44 /** Helper for reporting error messages. */ | |
45 Messages messages; | |
46 | |
47 /** Last token read by the parser. */ | |
48 Token token; | |
49 | |
50 _DirectiveParser(this.messages, this.file); | |
51 | |
52 /** Parse and extract directives from [code]. */ | |
53 DartCodeInfo parse(String code) { | |
54 tokenizer = new _DirectiveTokenizer(code); | |
55 parseTopLevel(); | |
56 return new DartCodeInfo(libraryName, partName, directives, | |
57 code.substring(token.start)); | |
58 } | |
59 | |
60 /** | |
61 * Parse top-level directives and comments, but unlike normal Dart code, stop | |
62 * as soon as we find actual code. | |
63 */ | |
64 void parseTopLevel() { | |
65 token = tokenizer.next(); | |
66 while (token.kind != Token.EOF) { | |
67 if (token.kind == Token.IDENTIFIER) { | |
68 if (token.value == 'library') { | |
69 parseLibrary(); | |
70 } else if (token.value == 'part') { | |
71 parsePart(); | |
72 } else if (token.value == 'import') { | |
73 parseImport(); | |
74 } else if (token.value == 'export') { | |
75 parseExport(); | |
76 } else { | |
77 break; | |
78 } | |
79 } else if (token.kind != Token.COMMENT) { | |
80 break; | |
81 } | |
82 token = tokenizer.next(); | |
83 } | |
84 } | |
85 | |
86 /** Parse library declarations: 'library foo.bar;' */ | |
87 parseLibrary() { | |
88 libraryName = parseQualifiedName(); | |
89 expectToken(Token.SEMICOLON); | |
90 } | |
91 | |
92 /** | |
93 * Parse either a part declaration or part inclusions. For instance, | |
94 * part of foo.bar; | |
95 * or | |
96 * part "foo"; | |
97 */ | |
98 parsePart() { | |
99 token = tokenizer.next(); | |
100 if (token.kind == Token.IDENTIFIER && token.value == 'of') { | |
101 partName = parseQualifiedName(); | |
102 } else if (token.kind == Token.STRING) { | |
103 directives.add(new DartDirectiveInfo('part', token.value)); | |
104 token = tokenizer.next(); | |
105 } else { | |
106 messages.error('unexpected token: ${token}', null, file: file); | |
107 } | |
108 expectToken(Token.SEMICOLON); | |
109 } | |
110 | |
111 /** Parse a qualified name, such as `one.two.three`. */ | |
112 parseQualifiedName() { | |
113 List<String> segments = []; | |
114 while (true) { | |
115 token = tokenizer.next(); | |
116 if (token.kind != Token.IDENTIFIER) { | |
117 messages.error('invalid qualified name: $token', null, file: file); | |
118 return null; | |
119 } | |
120 segments.add(token.value); | |
121 token = tokenizer.next(); | |
122 if (token.kind == Token.SEMICOLON) break; | |
123 if (token.kind != Token.DOT) { | |
124 messages.error('invalid qualified name: $token', null, file: file); | |
125 return null; | |
126 } | |
127 } | |
128 return segments.join('.'); | |
129 } | |
130 | |
131 /** Parse an import, with optional prefix and show/hide combinators. */ | |
132 parseImport() { | |
133 token = tokenizer.next(); | |
134 if (token.kind != Token.STRING) { | |
135 // TODO(sigmund): add file name and span information here. | |
136 messages.error('expected an import url, but found ${token}', null, | |
137 file: file); | |
138 return; | |
139 } | |
140 var uri = token.value; | |
141 token = tokenizer.next(); | |
142 while (token.kind == Token.STRING) { | |
143 uri = '$uri${token.value}'; | |
144 token = tokenizer.next(); | |
145 } | |
146 | |
147 // Parse the optional prefix. | |
148 var prefix; | |
149 if (token.kind == Token.IDENTIFIER && token.value == 'as') { | |
150 token = tokenizer.next(); | |
151 if (token.kind != Token.IDENTIFIER) { | |
152 messages.error('expected an identifier as prefix, but found ${token}', | |
153 null, file: file); | |
154 return; | |
155 } | |
156 prefix = token.value; | |
157 token = tokenizer.next(); | |
158 } | |
159 | |
160 // Parse the optional show/hide combinators. | |
161 var hide; | |
162 var show; | |
163 while (token.kind == Token.IDENTIFIER) { | |
164 if (token.value == 'hide') { | |
165 if (hide == null) hide = []; | |
166 hide.addAll(parseIdentifierList()); | |
167 } else if (token.value == 'show') { | |
168 if (show == null) show = []; | |
169 show.addAll(parseIdentifierList()); | |
170 } else { | |
171 break; | |
172 } | |
173 } | |
174 | |
175 expectToken(Token.SEMICOLON); | |
176 directives.add(new DartDirectiveInfo('import', uri, prefix, hide, show)); | |
177 } | |
178 | |
179 /** Parse an export, with optional show/hide combinators. */ | |
180 parseExport() { | |
181 token = tokenizer.next(); | |
182 if (token.kind != Token.STRING) { | |
183 messages.error('expected an export url, but found ${token}', null, | |
184 file: file); | |
185 return; | |
186 } | |
187 var uri = token.value; | |
188 | |
189 // Parse the optional show/hide combinators. | |
190 token = tokenizer.next(); | |
191 var hide; | |
192 var show; | |
193 while (token.kind == Token.IDENTIFIER) { | |
194 if (token.value == 'hide') { | |
195 if (hide == null) hide = []; | |
196 hide.addAll(parseIdentifierList()); | |
197 } else if (token.value == 'show') { | |
198 if (show == null) show = []; | |
199 show.addAll(parseIdentifierList()); | |
200 } | |
201 } | |
202 | |
203 expectToken(Token.SEMICOLON); | |
204 directives.add(new DartDirectiveInfo('export', uri, null, hide, show)); | |
205 } | |
206 | |
207 /** Parse a list of identifiers of the form `id1, id2, id3` */ | |
208 List<String> parseIdentifierList() { | |
209 var list = []; | |
210 do { | |
211 token = tokenizer.next(); | |
212 if (!expectToken(Token.IDENTIFIER)) return list; | |
213 list.add(token.value); | |
214 token = tokenizer.next(); | |
215 } while (token.kind == Token.COMMA); | |
216 return list; | |
217 } | |
218 | |
219 /** Report an error if the last token is not of the expected kind. */ | |
220 bool expectToken(int kind) { | |
221 if (token.kind != kind) { | |
222 messages.error( | |
223 'expected <${Token.KIND_NAMES[kind]}>, but got ${token}', null, | |
224 file: file); | |
225 return false; | |
226 } | |
227 return true; | |
228 } | |
229 } | |
230 | |
231 /** Set of tokens that we parse out of the dart code. */ | |
232 class Token { | |
233 /** Kind of token, one of the constants below. */ | |
234 final int kind; | |
235 | |
236 /** Value in the token (filled only for identifiers and strings). */ | |
237 final String value; | |
238 | |
239 /** Start location for the token in the input string. */ | |
240 final int start; | |
241 | |
242 /** End location for the token in the input string. */ | |
243 final int end; | |
244 | |
245 const Token(this.kind, this.start, this.end, [this.value]); | |
246 | |
247 toString() => '<#Token ${KIND_NAMES[kind]}, $value>'; | |
248 | |
249 static const int COMMENT = 0; | |
250 static const int STRING = 1; | |
251 static const int IDENTIFIER = 2; | |
252 static const int SEMICOLON = 3; | |
253 static const int DOT = 4; | |
254 static const int COMMA = 5; | |
255 static const int EOF = 6; | |
256 static const List<String> KIND_NAMES = | |
257 const ['comment', 'string', 'id', 'semicolon', 'dot', 'comma', 'eof']; | |
258 | |
259 } | |
260 | |
261 /** | |
262 * A simple tokenizer that understands comments, identifiers, strings, | |
263 * separators, and practically nothing else. | |
264 */ | |
265 class _DirectiveTokenizer { | |
266 int pos = 0; | |
267 String _data; | |
268 | |
269 _DirectiveTokenizer(this._data); | |
270 | |
271 /** Return the next token. */ | |
272 Token next() { | |
273 while (true) { | |
274 if (pos >= _data.length) return new Token(Token.EOF, pos, pos); | |
275 if (!isWhiteSpace(peek())) break; | |
276 nextChar(); | |
277 } | |
278 | |
279 var c = peek(); | |
280 switch (c) { | |
281 case _SLASH: | |
282 if (peek(1) == _SLASH) return lineComment(); | |
283 if (peek(1) == _STAR) return blockComment(); | |
284 break; | |
285 case _SINGLE_QUOTE: | |
286 case _DOUBLE_QUOTE: | |
287 return string(); | |
288 case _SEMICOLON: | |
289 pos++; | |
290 return new Token(Token.SEMICOLON, pos - 1, pos); | |
291 case _DOT: | |
292 pos++; | |
293 return new Token(Token.DOT, pos - 1, pos); | |
294 case _COMMA: | |
295 pos++; | |
296 return new Token(Token.COMMA, pos - 1, pos); | |
297 default: | |
298 if (isIdentifierStart(c)) return identifier(); | |
299 break; | |
300 } | |
301 return new Token(Token.EOF, pos, pos); | |
302 } | |
303 | |
304 int nextChar() => _data.charCodeAt(pos++); | |
305 int peek([int skip = 0]) => _data.charCodeAt(pos + skip); | |
306 | |
307 /** Advance parsing until the end of a string (no tripple quotes allowed). */ | |
308 Token string() { | |
309 // TODO(sigmund): add support for multi-line strings, and raw strings. | |
310 int start = pos; | |
311 int startQuote = nextChar(); | |
312 bool escape = false; | |
313 while (true) { | |
314 if (pos >= _data.length) return new Token(Token.EOF, start, pos); | |
315 int c = nextChar(); | |
316 if (c == startQuote && !escape) break; | |
317 escape = !escape && c == _BACKSLASH; | |
318 } | |
319 return new Token(Token.STRING, start, pos, | |
320 _data.substring(start + 1, pos - 1)); | |
321 } | |
322 | |
323 /** Advance parsing until the end of an identifier. */ | |
324 Token identifier() { | |
325 int start = pos; | |
326 while (pos < _data.length && isIdentifierChar(peek())) pos++; | |
327 return new Token(Token.IDENTIFIER, start, pos, _data.substring(start, pos)); | |
328 } | |
329 | |
330 /** Advance parsing until the end of a line comment. */ | |
331 Token lineComment() { | |
332 int start = pos; | |
333 while (pos < _data.length && peek() != _LF) pos++; | |
334 return new Token(Token.COMMENT, start, pos); | |
335 } | |
336 | |
337 /** Advance parsing until the end of a block comment (nesting is allowed). */ | |
338 Token blockComment() { | |
339 var start = pos; | |
340 var commentNesting = 0; | |
341 pos += 2; | |
342 while (pos < _data.length) { | |
343 if (peek() == _STAR && peek(1) == _SLASH) { | |
344 pos += 2; | |
345 if (commentNesting == 0) break; | |
346 commentNesting--; | |
347 } else if (peek() == _SLASH && peek(1) == _STAR) { | |
348 pos += 2; | |
349 commentNesting++; | |
350 } else { | |
351 pos++; | |
352 } | |
353 } | |
354 return new Token(Token.COMMENT, start, pos); | |
355 } | |
356 | |
357 bool isWhiteSpace(int c) => c == _LF || c == _SPACE || c == _CR || c == _TAB; | |
358 bool isIdentifierStart(int c) => c == _UNDERSCORE || isLetter(c); | |
359 bool isIdentifierChar(int c) => isIdentifierStart(c) || isNumber(c); | |
360 bool isNumber(int c) => c >= _ZERO && c <= _NINE; | |
361 bool isLetter(int c) => | |
362 (c >= _LOWER_A && c <= _LOWER_Z) || | |
363 (c >= _UPPER_A && c <= _UPPER_Z); | |
364 | |
365 | |
366 // The following constant character values are used for tokenizing. | |
367 | |
368 static const int _TAB = 9; | |
369 static const int _LF = 10; | |
370 static const int _CR = 13; | |
371 static const int _SPACE = 32; | |
372 static const int _DOUBLE_QUOTE = 34; // " | |
373 static const int _DOLLAR = 36; // $ | |
374 static const int _SINGLE_QUOTE = 39; // ' | |
375 static const int _STAR = 42; // * | |
376 static const int _COMMA = 44; // , | |
377 static const int _DOT = 46; // . | |
378 static const int _SLASH = 47; // / | |
379 static const int _ZERO = 48; // 0 | |
380 static const int _NINE = 57; // 9 | |
381 static const int _SEMICOLON = 59; // ; | |
382 static const int _UPPER_A = 65; // A | |
383 static const int _UPPER_Z = 90; // Z | |
384 static const int _BACKSLASH = 92; // \ | |
385 static const int _UNDERSCORE = 95; // _ | |
386 static const int _LOWER_A = 97; // a | |
387 static const int _LOWER_Z = 122; // z | |
388 } | |
OLD | NEW |