OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #library('classify'); | 5 #library('classify'); |
6 | 6 |
7 #import('frog/lang.dart'); | 7 #import('../compiler/implementation/scanner/scannerlib.dart'); |
8 #import('markdown.dart', prefix: 'md'); | 8 #import('markdown.dart', prefix: 'md'); |
9 | 9 |
10 /** | 10 /** |
11 * Kinds of tokens that we care to highlight differently. The values of the | 11 * Kinds of tokens that we care to highlight differently. The values of the |
12 * fields here will be used as CSS class names for the generated spans. | 12 * fields here will be used as CSS class names for the generated spans. |
13 */ | 13 */ |
14 class Classification { | 14 class Classification { |
15 static final NONE = null; | 15 static final NONE = null; |
16 static final ERROR = "e"; | 16 static final ERROR = "e"; |
17 static final COMMENT = "c"; | 17 static final COMMENT = "c"; |
18 static final IDENTIFIER = "i"; | 18 static final IDENTIFIER = "i"; |
19 static final KEYWORD = "k"; | 19 static final KEYWORD = "k"; |
20 static final OPERATOR = "o"; | 20 static final OPERATOR = "o"; |
21 static final STRING = "s"; | 21 static final STRING = "s"; |
22 static final NUMBER = "n"; | 22 static final NUMBER = "n"; |
23 static final PUNCTUATION = "p"; | 23 static final PUNCTUATION = "p"; |
24 | 24 |
25 // A few things that are nice to make different: | 25 // A few things that are nice to make different: |
26 static final TYPE_IDENTIFIER = "t"; | 26 static final TYPE_IDENTIFIER = "t"; |
27 | 27 |
28 // Between a keyword and an identifier | 28 // Between a keyword and an identifier |
29 static final SPECIAL_IDENTIFIER = "r"; | 29 static final SPECIAL_IDENTIFIER = "r"; |
30 | 30 |
31 static final ARROW_OPERATOR = "a"; | 31 static final ARROW_OPERATOR = "a"; |
32 | 32 |
33 static final STRING_INTERPOLATION = 'si'; | 33 static final STRING_INTERPOLATION = 'si'; |
34 } | 34 } |
35 | 35 |
36 String classifySource(SourceFile src) { | 36 String classifySource(String text) { |
37 var html = new StringBuffer(); | 37 var html = new StringBuffer(); |
38 var tokenizer = new Tokenizer(src, /*skipWhitespace:*/false); | 38 var tokenizer = new StringScanner(text, includeComments: true); |
39 | 39 |
40 var token; | 40 var whitespaceOffset = 0; |
| 41 var token = tokenizer.tokenize(); |
41 var inString = false; | 42 var inString = false; |
42 while ((token = tokenizer.next()).kind != TokenKind.END_OF_FILE) { | 43 while (token.kind != EOF_TOKEN) { |
| 44 html.add(text.substring(whitespaceOffset, token.charOffset)); |
| 45 whitespaceOffset = token.charOffset + token.slowCharCount; |
43 | 46 |
44 // Track whether or not we're in a string. | 47 // Track whether or not we're in a string. |
45 switch (token.kind) { | 48 switch (token.kind) { |
46 case TokenKind.STRING: | 49 case STRING_TOKEN: |
47 case TokenKind.STRING_PART: | 50 case STRING_INTERPOLATION_TOKEN: |
48 case TokenKind.INCOMPLETE_STRING: | |
49 case TokenKind.INCOMPLETE_MULTILINE_STRING_DQ: | |
50 case TokenKind.INCOMPLETE_MULTILINE_STRING_SQ: | |
51 inString = true; | 51 inString = true; |
52 break; | 52 break; |
53 } | 53 } |
54 | 54 |
55 final kind = classify(token); | 55 final kind = classify(token); |
56 final text = md.escapeHtml(token.text); | 56 final escapedText = md.escapeHtml(token.slowToString()); |
57 if (kind != null) { | 57 if (kind != null) { |
58 // Add a secondary class to tokens appearing within a string so that | 58 // Add a secondary class to tokens appearing within a string so that |
59 // we can highlight tokens in an interpolation specially. | 59 // we can highlight tokens in an interpolation specially. |
60 var stringClass = inString ? Classification.STRING_INTERPOLATION : ''; | 60 var stringClass = inString ? Classification.STRING_INTERPOLATION : ''; |
61 html.add('<span class="$kind $stringClass">$text</span>'); | 61 html.add('<span class="$kind $stringClass">$escapedText</span>'); |
62 } else { | 62 } else { |
63 html.add('<span>$text</span>'); | 63 html.add(escapedText); |
64 } | 64 } |
65 | 65 |
66 // Track whether or not we're in a string. | 66 // Track whether or not we're in a string. |
67 if (token.kind == TokenKind.STRING) { | 67 if (token.kind == STRING_TOKEN) { |
68 inString = false; | 68 inString = false; |
69 } | 69 } |
| 70 token = token.next; |
70 } | 71 } |
71 return html.toString(); | 72 return html.toString(); |
72 } | 73 } |
73 | 74 |
74 bool _looksLikeType(String name) { | 75 bool _looksLikeType(String name) { |
75 // If the name looks like an UppercaseName, assume it's a type. | 76 // If the name looks like an UppercaseName, assume it's a type. |
76 return _looksLikePublicType(name) || _looksLikePrivateType(name); | 77 return _looksLikePublicType(name) || _looksLikePrivateType(name); |
77 } | 78 } |
78 | 79 |
79 bool _looksLikePublicType(String name) { | 80 bool _looksLikePublicType(String name) { |
80 // If the name looks like an UppercaseName, assume it's a type. | 81 // If the name looks like an UppercaseName, assume it's a type. |
81 return name.length >= 2 && isUpper(name[0]) && isLower(name[1]); | 82 return name.length >= 2 && isUpper(name[0]) && isLower(name[1]); |
82 } | 83 } |
83 | 84 |
84 bool _looksLikePrivateType(String name) { | 85 bool _looksLikePrivateType(String name) { |
85 // If the name looks like an _UppercaseName, assume it's a type. | 86 // If the name looks like an _UppercaseName, assume it's a type. |
86 return (name.length >= 3 && name[0] == '_' && isUpper(name[1]) | 87 return (name.length >= 3 && name[0] == '_' && isUpper(name[1]) |
87 && isLower(name[2])); | 88 && isLower(name[2])); |
88 } | 89 } |
89 | 90 |
90 // These ensure that they don't return "true" if the string only has symbols. | 91 // These ensure that they don't return "true" if the string only has symbols. |
91 bool isUpper(String s) => s.toLowerCase() != s; | 92 bool isUpper(String s) => s.toLowerCase() != s; |
92 bool isLower(String s) => s.toUpperCase() != s; | 93 bool isLower(String s) => s.toUpperCase() != s; |
93 | 94 |
94 String classify(Token token) { | 95 String classify(Token token) { |
95 switch (token.kind) { | 96 switch (token.kind) { |
96 case TokenKind.ERROR: | 97 case UNKNOWN_TOKEN: |
97 return Classification.ERROR; | 98 return Classification.ERROR; |
98 | 99 |
99 case TokenKind.IDENTIFIER: | 100 case IDENTIFIER_TOKEN: |
100 // Special case for names that look like types. | 101 // Special case for names that look like types. |
101 if (_looksLikeType(token.text) | 102 final text = token.slowToString(); |
102 || token.text == 'num' | 103 if (_looksLikeType(text) |
103 || token.text == 'bool' | 104 || text == 'num' |
104 || token.text == 'int' | 105 || text == 'bool' |
105 || token.text == 'double') { | 106 || text == 'int' |
| 107 || text == 'double') { |
106 return Classification.TYPE_IDENTIFIER; | 108 return Classification.TYPE_IDENTIFIER; |
107 } | 109 } |
108 return Classification.IDENTIFIER; | 110 return Classification.IDENTIFIER; |
109 | 111 |
110 // Even though it's a reserved word, let's try coloring it like a type. | 112 case STRING_TOKEN: |
111 case TokenKind.VOID: | 113 case STRING_INTERPOLATION_TOKEN: |
112 return Classification.TYPE_IDENTIFIER; | |
113 | |
114 case TokenKind.THIS: | |
115 case TokenKind.SUPER: | |
116 return Classification.SPECIAL_IDENTIFIER; | |
117 | |
118 case TokenKind.STRING: | |
119 case TokenKind.STRING_PART: | |
120 case TokenKind.INCOMPLETE_STRING: | |
121 case TokenKind.INCOMPLETE_MULTILINE_STRING_DQ: | |
122 case TokenKind.INCOMPLETE_MULTILINE_STRING_SQ: | |
123 return Classification.STRING; | 114 return Classification.STRING; |
124 | 115 |
125 case TokenKind.INTEGER: | 116 case INT_TOKEN: |
126 case TokenKind.HEX_INTEGER: | 117 case HEXADECIMAL_TOKEN: |
127 case TokenKind.DOUBLE: | 118 case DOUBLE_TOKEN: |
128 return Classification.NUMBER; | 119 return Classification.NUMBER; |
129 | 120 |
130 case TokenKind.COMMENT: | 121 case COMMENT_TOKEN: |
131 case TokenKind.INCOMPLETE_COMMENT: | |
132 return Classification.COMMENT; | 122 return Classification.COMMENT; |
133 | 123 |
134 // => is so awesome it is in a class of its own. | 124 // => is so awesome it is in a class of its own. |
135 case TokenKind.ARROW: | 125 case FUNCTION_TOKEN: |
136 return Classification.ARROW_OPERATOR; | 126 return Classification.ARROW_OPERATOR; |
137 | 127 |
138 case TokenKind.HASHBANG: | 128 case OPEN_PAREN_TOKEN: |
139 case TokenKind.LPAREN: | 129 case CLOSE_PAREN_TOKEN: |
140 case TokenKind.RPAREN: | 130 case OPEN_SQUARE_BRACKET_TOKEN: |
141 case TokenKind.LBRACK: | 131 case CLOSE_SQUARE_BRACKET_TOKEN: |
142 case TokenKind.RBRACK: | 132 case OPEN_CURLY_BRACKET_TOKEN: |
143 case TokenKind.LBRACE: | 133 case CLOSE_CURLY_BRACKET_TOKEN: |
144 case TokenKind.RBRACE: | 134 case COLON_TOKEN: |
145 case TokenKind.COLON: | 135 case SEMICOLON_TOKEN: |
146 case TokenKind.SEMICOLON: | 136 case COMMA_TOKEN: |
147 case TokenKind.COMMA: | 137 case PERIOD_TOKEN: |
148 case TokenKind.DOT: | 138 case PERIOD_PERIOD_TOKEN: |
149 case TokenKind.ELLIPSIS: | |
150 return Classification.PUNCTUATION; | 139 return Classification.PUNCTUATION; |
151 | 140 |
152 case TokenKind.INCR: | 141 case PLUS_PLUS_TOKEN: |
153 case TokenKind.DECR: | 142 case MINUS_MINUS_TOKEN: |
154 case TokenKind.BIT_NOT: | 143 case TILDE_TOKEN: |
155 case TokenKind.NOT: | 144 case BANG_TOKEN: |
156 case TokenKind.ASSIGN: | 145 case EQ_TOKEN: |
157 case TokenKind.ASSIGN_OR: | 146 case BAR_EQ_TOKEN: |
158 case TokenKind.ASSIGN_XOR: | 147 case CARET_EQ_TOKEN: |
159 case TokenKind.ASSIGN_AND: | 148 case AMPERSAND_EQ_TOKEN: |
160 case TokenKind.ASSIGN_SHL: | 149 case LT_LT_EQ_TOKEN: |
161 case TokenKind.ASSIGN_SAR: | 150 case GT_GT_GT_EQ_TOKEN: |
162 case TokenKind.ASSIGN_SHR: | 151 case GT_GT_EQ_TOKEN: |
163 case TokenKind.ASSIGN_ADD: | 152 case PLUS_EQ_TOKEN: |
164 case TokenKind.ASSIGN_SUB: | 153 case MINUS_EQ_TOKEN: |
165 case TokenKind.ASSIGN_MUL: | 154 case STAR_EQ_TOKEN: |
166 case TokenKind.ASSIGN_DIV: | 155 case SLASH_EQ_TOKEN: |
167 case TokenKind.ASSIGN_TRUNCDIV: | 156 case TILDE_SLASH_EQ_TOKEN: |
168 case TokenKind.ASSIGN_MOD: | 157 case PERCENT_EQ_TOKEN: |
169 case TokenKind.CONDITIONAL: | 158 case QUESTION_TOKEN: |
170 case TokenKind.OR: | 159 case BAR_BAR_TOKEN: |
171 case TokenKind.AND: | 160 case AMPERSAND_AMPERSAND_TOKEN: |
172 case TokenKind.BIT_OR: | 161 case BAR_TOKEN: |
173 case TokenKind.BIT_XOR: | 162 case CARET_TOKEN: |
174 case TokenKind.BIT_AND: | 163 case AMPERSAND_TOKEN: |
175 case TokenKind.SHL: | 164 case LT_LT_TOKEN: |
176 case TokenKind.SAR: | 165 case GT_GT_GT_TOKEN: |
177 case TokenKind.SHR: | 166 case GT_GT_TOKEN: |
178 case TokenKind.ADD: | 167 case PLUS_TOKEN: |
179 case TokenKind.SUB: | 168 case MINUS_TOKEN: |
180 case TokenKind.MUL: | 169 case STAR_TOKEN: |
181 case TokenKind.DIV: | 170 case SLASH_TOKEN: |
182 case TokenKind.TRUNCDIV: | 171 case TILDE_SLASH_TOKEN: |
183 case TokenKind.MOD: | 172 case PERCENT_TOKEN: |
184 case TokenKind.EQ: | 173 case EQ_EQ_TOKEN: |
185 case TokenKind.NE: | 174 case BANG_EQ_TOKEN: |
186 case TokenKind.EQ_STRICT: | 175 case EQ_EQ_EQ_TOKEN: |
187 case TokenKind.NE_STRICT: | 176 case BANG_EQ_EQ_TOKEN: |
188 case TokenKind.LT: | 177 case LT_TOKEN: |
189 case TokenKind.GT: | 178 case GT_TOKEN: |
190 case TokenKind.LTE: | 179 case LT_EQ_TOKEN: |
191 case TokenKind.GTE: | 180 case GT_EQ_TOKEN: |
192 case TokenKind.INDEX: | 181 case INDEX_TOKEN: |
193 case TokenKind.SETINDEX: | 182 case INDEX_EQ_TOKEN: |
194 return Classification.OPERATOR; | 183 return Classification.OPERATOR; |
195 | 184 |
196 // Color this like a keyword | 185 // Color keyword token. Most are colored as keywords. |
197 case TokenKind.HASH: | 186 case HASH_TOKEN: |
198 | 187 case KEYWORD_TOKEN: |
199 case TokenKind.ABSTRACT: | 188 if (token.stringValue === 'void') { |
200 case TokenKind.ASSERT: | 189 // Color "void" as a type. |
201 case TokenKind.CLASS: | 190 return Classification.TYPE_IDENTIFIER; |
202 case TokenKind.EXTENDS: | 191 } |
203 case TokenKind.FACTORY: | 192 if (token.stringValue === 'this' || token.stringValue === 'super') { |
204 case TokenKind.GET: | 193 // Color "this" and "super" as identifiers. |
205 case TokenKind.IMPLEMENTS: | 194 return Classification.SPECIAL_IDENTIFIER; |
206 case TokenKind.IMPORT: | 195 } |
207 case TokenKind.INTERFACE: | |
208 case TokenKind.LIBRARY: | |
209 case TokenKind.NATIVE: | |
210 case TokenKind.NEGATE: | |
211 case TokenKind.OPERATOR: | |
212 case TokenKind.SET: | |
213 case TokenKind.SOURCE: | |
214 case TokenKind.STATIC: | |
215 case TokenKind.TYPEDEF: | |
216 case TokenKind.BREAK: | |
217 case TokenKind.CASE: | |
218 case TokenKind.CATCH: | |
219 case TokenKind.CONST: | |
220 case TokenKind.CONTINUE: | |
221 case TokenKind.DEFAULT: | |
222 case TokenKind.DO: | |
223 case TokenKind.ELSE: | |
224 case TokenKind.FALSE: | |
225 case TokenKind.FINALLY: | |
226 case TokenKind.FOR: | |
227 case TokenKind.IF: | |
228 case TokenKind.IN: | |
229 case TokenKind.IS: | |
230 case TokenKind.NEW: | |
231 case TokenKind.NULL: | |
232 case TokenKind.RETURN: | |
233 case TokenKind.SWITCH: | |
234 case TokenKind.THROW: | |
235 case TokenKind.TRUE: | |
236 case TokenKind.TRY: | |
237 case TokenKind.WHILE: | |
238 case TokenKind.VAR: | |
239 case TokenKind.FINAL: | |
240 return Classification.KEYWORD; | 196 return Classification.KEYWORD; |
241 | 197 |
242 case TokenKind.WHITESPACE: | 198 case EOF_TOKEN: |
243 case TokenKind.END_OF_FILE: | |
244 return Classification.NONE; | 199 return Classification.NONE; |
245 | 200 |
246 default: | 201 default: |
247 return Classification.NONE; | 202 return Classification.NONE; |
248 } | 203 } |
249 } | 204 } |
OLD | NEW |