OLD | NEW |
---|---|
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * Translates a string of characters into a YAML serialization tree. | 6 * Translates a string of characters into a YAML serialization tree. |
7 * | 7 * |
8 * This parser is designed to closely follow the spec. All productions in the | 8 * This parser is designed to closely follow the spec. All productions in the |
9 * spec are numbered, and the corresponding methods in the parser have the same | 9 * spec are numbered, and the corresponding methods in the parser have the same |
10 * numbers. This is certainly not the most efficient way of parsing YAML, but it | 10 * numbers. This is certainly not the most efficient way of parsing YAML, but it |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
137 */ | 137 */ |
138 int farthestLine = 0; | 138 int farthestLine = 0; |
139 | 139 |
140 /** | 140 /** |
141 * The column number of the farthest position that has been parsed | 141 * The column number of the farthest position that has been parsed |
142 * successfully before backtracking. Used for error reporting. | 142 * successfully before backtracking. Used for error reporting. |
143 */ | 143 */ |
144 int farthestColumn = 0; | 144 int farthestColumn = 0; |
145 | 145 |
146 /** | 146 /** |
147 * The farthest position in the source string that has been parsed | |
148 * successfully before backtracking. Used for error reporting. | |
149 */ | |
150 int farthestPos = 0; | |
151 | |
152 /** | |
147 * The name of the context of the farthest position that has been parsed | 153 * The name of the context of the farthest position that has been parsed |
148 * successfully before backtracking. Used for error reporting. | 154 * successfully before backtracking. Used for error reporting. |
149 */ | 155 */ |
150 String farthestContext = "document"; | 156 String farthestContext = "document"; |
151 | 157 |
152 /** A stack of the names of parse contexts. Used for error reporting. */ | 158 /** A stack of the names of parse contexts. Used for error reporting. */ |
153 List<String> contextStack; | 159 List<String> contextStack; |
154 | 160 |
155 /** | 161 /** |
162 * Annotations attached to ranges of the source string that add extra | |
163 * information to any errors that occur in the annotated range. | |
164 */ | |
165 _RangeMap<String> errorAnnotations; | |
166 | |
167 /** | |
156 * The buffer containing the string currently being captured. | 168 * The buffer containing the string currently being captured. |
157 */ | 169 */ |
158 StringBuffer capturedString; | 170 StringBuffer capturedString; |
159 | 171 |
160 /** | 172 /** |
161 * The beginning of the current section of the captured string. | 173 * The beginning of the current section of the captured string. |
162 */ | 174 */ |
163 int captureStart; | 175 int captureStart; |
164 | 176 |
165 /** | 177 /** |
166 * Whether the current string capture is being overridden. | 178 * Whether the current string capture is being overridden. |
167 */ | 179 */ |
168 bool capturingAs = false; | 180 bool capturingAs = false; |
169 | 181 |
170 _Parser(String s) | 182 _Parser(String s) |
171 : this.s = s, | 183 : this.s = s, |
172 len = s.length, | 184 len = s.length, |
173 contextStack = <String>["document"]; | 185 contextStack = <String>["document"], |
186 errorAnnotations = new _RangeMap(); | |
174 | 187 |
175 /** | 188 /** |
176 * Return the character at the current position, then move that position | 189 * Return the character at the current position, then move that position |
177 * forward one character. Also updates the current line and column numbers. | 190 * forward one character. Also updates the current line and column numbers. |
178 */ | 191 */ |
179 int next() { | 192 int next() { |
180 if (pos == len) return -1; | 193 if (pos == len) return -1; |
181 var char = s.charCodeAt(pos++); | 194 var char = s.charCodeAt(pos++); |
182 if (isBreak(char)) { | 195 if (isBreak(char)) { |
183 line++; | 196 line++; |
184 column = 0; | 197 column = 0; |
185 } else { | 198 } else { |
186 column++; | 199 column++; |
187 } | 200 } |
188 | 201 |
189 if (farthestLine < line) { | 202 if (farthestLine < line) { |
190 farthestLine = line; | 203 farthestLine = line; |
191 farthestColumn = column; | 204 farthestColumn = column; |
192 farthestContext = contextStack.last(); | 205 farthestContext = contextStack.last(); |
193 } else if (farthestLine == line && farthestColumn < column) { | 206 } else if (farthestLine == line && farthestColumn < column) { |
194 farthestColumn = column; | 207 farthestColumn = column; |
195 farthestContext = contextStack.last(); | 208 farthestContext = contextStack.last(); |
196 } | 209 } |
210 farthestPos = pos; | |
197 | 211 |
198 return char; | 212 return char; |
199 } | 213 } |
200 | 214 |
201 /** | 215 /** |
202 * Returns the character at the current position, or the character [i] | 216 * Returns the character at the current position, or the character [i] |
203 * characters after the current position. | 217 * characters after the current position. |
204 * | 218 * |
205 * Returns -1 if this would return a character after the end or before the | 219 * Returns -1 if this would return a character after the end or before the |
206 * beginning of the input string. | 220 * beginning of the input string. |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
409 context(String name, fn()) { | 423 context(String name, fn()) { |
410 try { | 424 try { |
411 contextStack.add(name); | 425 contextStack.add(name); |
412 return fn(); | 426 return fn(); |
413 } finally { | 427 } finally { |
414 var popped = contextStack.removeLast(); | 428 var popped = contextStack.removeLast(); |
415 assert(popped == name); | 429 assert(popped == name); |
416 } | 430 } |
417 } | 431 } |
418 | 432 |
433 /** | |
434 * Adds [message] as extra information to any errors that occur between the | |
435 * current position and the position of the cursor after running [fn]. The | |
436 * cursor is reset after [fn] is run. | |
437 */ | |
438 annotateError(String message, fn()) { | |
439 var start = pos; | |
440 var end; | |
441 transaction(() { | |
442 fn(); | |
443 end = pos; | |
444 return false; | |
445 }); | |
446 errorAnnotations[new _Range(start, end)] = message; | |
447 } | |
448 | |
419 /** Throws an error with additional context information. */ | 449 /** Throws an error with additional context information. */ |
420 error(String message) { | 450 error(String message) { |
421 // Line and column should be one-based. | 451 // Line and column should be one-based. |
422 throw new SyntaxError(line + 1, column + 1, | 452 throw new SyntaxError(line + 1, column + 1, |
423 "$message (in $farthestContext)"); | 453 "$message (in $farthestContext)"); |
424 } | 454 } |
425 | 455 |
426 /** | 456 /** |
427 * If [result] is falsey, throws an error saying that [expected] was | 457 * If [result] is falsey, throws an error saying that [expected] was |
428 * expected. | 458 * expected. |
429 */ | 459 */ |
430 expect(result, String expected) { | 460 expect(result, String expected) { |
431 if (truth(result)) return result; | 461 if (truth(result)) return result; |
432 error("expected $expected"); | 462 error("expected $expected"); |
433 } | 463 } |
434 | 464 |
435 /** | 465 /** |
436 * Throws an error saying that the parse failed. Uses [farthestLine], | 466 * Throws an error saying that the parse failed. Uses [farthestLine], |
437 * [farthestColumn], and [farthestContext] to provide additional information. | 467 * [farthestColumn], and [farthestContext] to provide additional information. |
438 */ | 468 */ |
439 parseFailed() { | 469 parseFailed() { |
440 throw new SyntaxError(farthestLine + 1, farthestColumn + 1, | 470 var message = "invalid YAML in $farthestContext"; |
441 "invalid YAML in $farthestContext"); | 471 var extraError = errorAnnotations[farthestPos]; |
472 if (extraError != null) message = "$message ($extraError)"; | |
473 throw new SyntaxError(farthestLine + 1, farthestColumn + 1, message); | |
442 } | 474 } |
443 | 475 |
444 /** Returns the number of spaces after the current position. */ | 476 /** Returns the number of spaces after the current position. */ |
445 int countIndentation() { | 477 int countIndentation() { |
446 var i = 0; | 478 var i = 0; |
447 while (peek(i) == SP) i++; | 479 while (peek(i) == SP) i++; |
448 return i; | 480 return i; |
449 } | 481 } |
450 | 482 |
451 /** Returns the indentation for a block scalar. */ | 483 /** Returns the indentation for a block scalar. */ |
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
667 ns_escNull, ns_escBell, ns_escBackspace, ns_escHorizontalTab, | 699 ns_escNull, ns_escBell, ns_escBackspace, ns_escHorizontalTab, |
668 ns_escLineFeed, ns_escVerticalTab, ns_escFormFeed, ns_escCarriageReturn, | 700 ns_escLineFeed, ns_escVerticalTab, ns_escFormFeed, ns_escCarriageReturn, |
669 ns_escEscape, ns_escSpace, ns_escDoubleQuote, ns_escSlash, | 701 ns_escEscape, ns_escSpace, ns_escDoubleQuote, ns_escSlash, |
670 ns_escBackslash, ns_escNextLine, ns_escNonBreakingSpace, | 702 ns_escBackslash, ns_escNextLine, ns_escNonBreakingSpace, |
671 ns_escLineSeparator, ns_escParagraphSeparator, ns_esc8Bit, ns_esc16Bit, | 703 ns_escLineSeparator, ns_escParagraphSeparator, ns_esc8Bit, ns_esc16Bit, |
672 ns_esc32Bit | 704 ns_esc32Bit |
673 ])); | 705 ])); |
674 })); | 706 })); |
675 | 707 |
676 // 63 | 708 // 63 |
677 bool s_indent(int indent) => nAtOnce(indent, (c, i) => c == SP); | 709 bool s_indent(int indent) { |
710 var result = nAtOnce(indent, (c, i) => c == SP); | |
711 if (peek() == TAB) { | |
712 annotateError("\\t is not allowed as indentation in YAML", | |
Bob Nystrom
2012/08/30 16:43:51
I would use "tab characters" instead of the escape
nweiz
2012/08/30 19:19:02
Done.
| |
713 () => zeroOrMore(() => consume(isSpace))); | |
714 } | |
715 return result; | |
716 } | |
678 | 717 |
679 // 64 | 718 // 64 |
680 bool s_indentLessThan(int indent) { | 719 bool s_indentLessThan(int indent) { |
681 for (int i = 0; i < indent - 1; i++) { | 720 for (int i = 0; i < indent - 1; i++) { |
682 if (!consumeChar(SP)) break; | 721 if (!consumeChar(SP)) { |
722 if (peek() == TAB) { | |
723 annotateError("\\t is not allowed as indentation in YAML", () { | |
Bob Nystrom
2012/08/30 16:43:51
Ditto.
nweiz
2012/08/30 19:19:02
Done.
| |
724 for (; i < indent - 1; i++) { | |
725 if (!consume(isSpace)) break; | |
726 } | |
727 }); | |
728 } | |
729 break; | |
730 } | |
683 } | 731 } |
684 return true; | 732 return true; |
685 } | 733 } |
686 | 734 |
687 // 65 | 735 // 65 |
688 bool s_indentLessThanOrEqualTo(int indent) => s_indentLessThan(indent + 1); | 736 bool s_indentLessThanOrEqualTo(int indent) => s_indentLessThan(indent + 1); |
689 | 737 |
690 // 66 | 738 // 66 |
691 bool s_separateInLine() => transaction(() { | 739 bool s_separateInLine() => transaction(() { |
692 return captureAs('', () => | 740 return captureAs('', () => |
(...skipping 1201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1894 | 1942 |
1895 /** The information in the header for a block scalar. */ | 1943 /** The information in the header for a block scalar. */ |
1896 class _BlockHeader { | 1944 class _BlockHeader { |
1897 final int additionalIndent; | 1945 final int additionalIndent; |
1898 final int chomping; | 1946 final int chomping; |
1899 | 1947 |
1900 _BlockHeader(this.additionalIndent, this.chomping); | 1948 _BlockHeader(this.additionalIndent, this.chomping); |
1901 | 1949 |
1902 bool get autoDetectIndent => additionalIndent == null; | 1950 bool get autoDetectIndent => additionalIndent == null; |
1903 } | 1951 } |
1952 | |
1953 /** | |
1954 * A range of characters in the YAML document, from [start] to [end] (inclusive) . | |
1955 */ | |
1956 class _Range { | |
1957 /** The first character in the range. */ | |
1958 final int start; | |
1959 | |
1960 /** The last character in the range. */ | |
1961 final int end; | |
1962 | |
1963 _Range(this.start, this.end); | |
1964 | |
1965 /** Returns whether or not [pos] lies within this range. */ | |
1966 bool contains(int pos) => pos >= start && pos <= end; | |
1967 } | |
1968 | |
1969 /** | |
1970 * A map that associates [E] values with [_Range]s. It's efficient to create new | |
1971 * associations, but finding the value associated with a position is more | |
1972 * expensive. | |
1973 */ | |
1974 class _RangeMap<E> { | |
1975 /** The ranges and their associated elements. */ | |
1976 final List<_Pair<_Range, E>> contents; | |
1977 | |
1978 _RangeMap() : this.contents = <_Pair<_Range, E>>[]; | |
1979 | |
1980 /** | |
1981 * Returns the value associated with the range in which [pos] lies, or null if | |
1982 * there is no such range. If there's more than one such range, the most | |
1983 * recently set one is used. | |
1984 */ | |
1985 E operator[](int pos) { | |
1986 // Iterate backwards through contents so the more recent range takes | |
1987 // precedence. TODO(nweiz): clean this up when issue 2804 is fixed. | |
1988 for (var i = contents.length - 1; i >= 0; i--) { | |
1989 var pair = contents[i]; | |
1990 if (pair.first.contains(pos)) return pair.last; | |
1991 } | |
1992 return null; | |
1993 } | |
1994 | |
1995 /** Associates [value] with [range]. */ | |
1996 operator[]=(_Range range, E value) => | |
1997 contents.add(new _Pair<_Range, E>(range, value)); | |
1998 } | |
OLD | NEW |