| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #library('classify'); | 5 #library('classify'); |
| 6 | 6 |
| 7 #import('frog/lang.dart'); | 7 #import('../compiler/implementation/scanner/scannerlib.dart'); |
| 8 #import('markdown.dart', prefix: 'md'); | 8 #import('markdown.dart', prefix: 'md'); |
| 9 | 9 |
| 10 /** | 10 /** |
| 11 * Kinds of tokens that we care to highlight differently. The values of the | 11 * Kinds of tokens that we care to highlight differently. The values of the |
| 12 * fields here will be used as CSS class names for the generated spans. | 12 * fields here will be used as CSS class names for the generated spans. |
| 13 */ | 13 */ |
| 14 class Classification { | 14 class Classification { |
| 15 static final NONE = null; | 15 static final NONE = null; |
| 16 static final ERROR = "e"; | 16 static final ERROR = "e"; |
| 17 static final COMMENT = "c"; | 17 static final COMMENT = "c"; |
| 18 static final IDENTIFIER = "i"; | 18 static final IDENTIFIER = "i"; |
| 19 static final KEYWORD = "k"; | 19 static final KEYWORD = "k"; |
| 20 static final OPERATOR = "o"; | 20 static final OPERATOR = "o"; |
| 21 static final STRING = "s"; | 21 static final STRING = "s"; |
| 22 static final NUMBER = "n"; | 22 static final NUMBER = "n"; |
| 23 static final PUNCTUATION = "p"; | 23 static final PUNCTUATION = "p"; |
| 24 | 24 |
| 25 // A few things that are nice to make different: | 25 // A few things that are nice to make different: |
| 26 static final TYPE_IDENTIFIER = "t"; | 26 static final TYPE_IDENTIFIER = "t"; |
| 27 | 27 |
| 28 // Between a keyword and an identifier | 28 // Between a keyword and an identifier |
| 29 static final SPECIAL_IDENTIFIER = "r"; | 29 static final SPECIAL_IDENTIFIER = "r"; |
| 30 | 30 |
| 31 static final ARROW_OPERATOR = "a"; | 31 static final ARROW_OPERATOR = "a"; |
| 32 | 32 |
| 33 static final STRING_INTERPOLATION = 'si'; | 33 static final STRING_INTERPOLATION = 'si'; |
| 34 } | 34 } |
| 35 | 35 |
| 36 String classifySource(SourceFile src) { | 36 String classifySource(String text) { |
| 37 var html = new StringBuffer(); | 37 var html = new StringBuffer(); |
| 38 var tokenizer = new Tokenizer(src, /*skipWhitespace:*/false); | 38 var tokenizer = new StringScanner(text, includeComments: true); |
| 39 | 39 |
| 40 var token; | 40 var whitespaceOffset = 0; |
| 41 var token = tokenizer.tokenize(); |
| 41 var inString = false; | 42 var inString = false; |
| 42 while ((token = tokenizer.next()).kind != TokenKind.END_OF_FILE) { | 43 while (token.kind != EOF_TOKEN) { |
| 44 html.add(text.substring(whitespaceOffset, token.charOffset)); |
| 45 whitespaceOffset = token.charOffset + token.slowCharCount; |
| 43 | 46 |
| 44 // Track whether or not we're in a string. | 47 // Track whether or not we're in a string. |
| 45 switch (token.kind) { | 48 switch (token.kind) { |
| 46 case TokenKind.STRING: | 49 case STRING_TOKEN: |
| 47 case TokenKind.STRING_PART: | 50 case STRING_INTERPOLATION_TOKEN: |
| 48 case TokenKind.INCOMPLETE_STRING: | |
| 49 case TokenKind.INCOMPLETE_MULTILINE_STRING_DQ: | |
| 50 case TokenKind.INCOMPLETE_MULTILINE_STRING_SQ: | |
| 51 inString = true; | 51 inString = true; |
| 52 break; | 52 break; |
| 53 } | 53 } |
| 54 | 54 |
| 55 final kind = classify(token); | 55 final kind = classify(token); |
| 56 final text = md.escapeHtml(token.text); | 56 final escapedText = md.escapeHtml(token.slowToString()); |
| 57 if (kind != null) { | 57 if (kind != null) { |
| 58 // Add a secondary class to tokens appearing within a string so that | 58 // Add a secondary class to tokens appearing within a string so that |
| 59 // we can highlight tokens in an interpolation specially. | 59 // we can highlight tokens in an interpolation specially. |
| 60 var stringClass = inString ? Classification.STRING_INTERPOLATION : ''; | 60 var stringClass = inString ? Classification.STRING_INTERPOLATION : ''; |
| 61 html.add('<span class="$kind $stringClass">$text</span>'); | 61 html.add('<span class="$kind $stringClass">$escapedText</span>'); |
| 62 } else { | 62 } else { |
| 63 html.add('<span>$text</span>'); | 63 html.add(escapedText); |
| 64 } | 64 } |
| 65 | 65 |
| 66 // Track whether or not we're in a string. | 66 // Track whether or not we're in a string. |
| 67 if (token.kind == TokenKind.STRING) { | 67 if (token.kind == STRING_TOKEN) { |
| 68 inString = false; | 68 inString = false; |
| 69 } | 69 } |
| 70 token = token.next; |
| 70 } | 71 } |
| 71 return html.toString(); | 72 return html.toString(); |
| 72 } | 73 } |
| 73 | 74 |
| 74 bool _looksLikeType(String name) { | 75 bool _looksLikeType(String name) { |
| 75 // If the name looks like an UppercaseName, assume it's a type. | 76 // If the name looks like an UppercaseName, assume it's a type. |
| 76 return _looksLikePublicType(name) || _looksLikePrivateType(name); | 77 return _looksLikePublicType(name) || _looksLikePrivateType(name); |
| 77 } | 78 } |
| 78 | 79 |
| 79 bool _looksLikePublicType(String name) { | 80 bool _looksLikePublicType(String name) { |
| 80 // If the name looks like an UppercaseName, assume it's a type. | 81 // If the name looks like an UppercaseName, assume it's a type. |
| 81 return name.length >= 2 && isUpper(name[0]) && isLower(name[1]); | 82 return name.length >= 2 && isUpper(name[0]) && isLower(name[1]); |
| 82 } | 83 } |
| 83 | 84 |
| 84 bool _looksLikePrivateType(String name) { | 85 bool _looksLikePrivateType(String name) { |
| 85 // If the name looks like an _UppercaseName, assume it's a type. | 86 // If the name looks like an _UppercaseName, assume it's a type. |
| 86 return (name.length >= 3 && name[0] == '_' && isUpper(name[1]) | 87 return (name.length >= 3 && name[0] == '_' && isUpper(name[1]) |
| 87 && isLower(name[2])); | 88 && isLower(name[2])); |
| 88 } | 89 } |
| 89 | 90 |
| 90 // These ensure that they don't return "true" if the string only has symbols. | 91 // These ensure that they don't return "true" if the string only has symbols. |
| 91 bool isUpper(String s) => s.toLowerCase() != s; | 92 bool isUpper(String s) => s.toLowerCase() != s; |
| 92 bool isLower(String s) => s.toUpperCase() != s; | 93 bool isLower(String s) => s.toUpperCase() != s; |
| 93 | 94 |
| 94 String classify(Token token) { | 95 String classify(Token token) { |
| 95 switch (token.kind) { | 96 switch (token.kind) { |
| 96 case TokenKind.ERROR: | 97 case UNKNOWN_TOKEN: |
| 97 return Classification.ERROR; | 98 return Classification.ERROR; |
| 98 | 99 |
| 99 case TokenKind.IDENTIFIER: | 100 case IDENTIFIER_TOKEN: |
| 100 // Special case for names that look like types. | 101 // Special case for names that look like types. |
| 101 if (_looksLikeType(token.text) | 102 final text = token.slowToString(); |
| 102 || token.text == 'num' | 103 if (_looksLikeType(text) |
| 103 || token.text == 'bool' | 104 || text == 'num' |
| 104 || token.text == 'int' | 105 || text == 'bool' |
| 105 || token.text == 'double') { | 106 || text == 'int' |
| 107 || text == 'double') { |
| 106 return Classification.TYPE_IDENTIFIER; | 108 return Classification.TYPE_IDENTIFIER; |
| 107 } | 109 } |
| 108 return Classification.IDENTIFIER; | 110 return Classification.IDENTIFIER; |
| 109 | 111 |
| 110 // Even though it's a reserved word, let's try coloring it like a type. | 112 case STRING_TOKEN: |
| 111 case TokenKind.VOID: | 113 case STRING_INTERPOLATION_TOKEN: |
| 112 return Classification.TYPE_IDENTIFIER; | |
| 113 | |
| 114 case TokenKind.THIS: | |
| 115 case TokenKind.SUPER: | |
| 116 return Classification.SPECIAL_IDENTIFIER; | |
| 117 | |
| 118 case TokenKind.STRING: | |
| 119 case TokenKind.STRING_PART: | |
| 120 case TokenKind.INCOMPLETE_STRING: | |
| 121 case TokenKind.INCOMPLETE_MULTILINE_STRING_DQ: | |
| 122 case TokenKind.INCOMPLETE_MULTILINE_STRING_SQ: | |
| 123 return Classification.STRING; | 114 return Classification.STRING; |
| 124 | 115 |
| 125 case TokenKind.INTEGER: | 116 case INT_TOKEN: |
| 126 case TokenKind.HEX_INTEGER: | 117 case HEXADECIMAL_TOKEN: |
| 127 case TokenKind.DOUBLE: | 118 case DOUBLE_TOKEN: |
| 128 return Classification.NUMBER; | 119 return Classification.NUMBER; |
| 129 | 120 |
| 130 case TokenKind.COMMENT: | 121 case COMMENT_TOKEN: |
| 131 case TokenKind.INCOMPLETE_COMMENT: | |
| 132 return Classification.COMMENT; | 122 return Classification.COMMENT; |
| 133 | 123 |
| 134 // => is so awesome it is in a class of its own. | 124 // => is so awesome it is in a class of its own. |
| 135 case TokenKind.ARROW: | 125 case FUNCTION_TOKEN: |
| 136 return Classification.ARROW_OPERATOR; | 126 return Classification.ARROW_OPERATOR; |
| 137 | 127 |
| 138 case TokenKind.HASHBANG: | 128 case OPEN_PAREN_TOKEN: |
| 139 case TokenKind.LPAREN: | 129 case CLOSE_PAREN_TOKEN: |
| 140 case TokenKind.RPAREN: | 130 case OPEN_SQUARE_BRACKET_TOKEN: |
| 141 case TokenKind.LBRACK: | 131 case CLOSE_SQUARE_BRACKET_TOKEN: |
| 142 case TokenKind.RBRACK: | 132 case OPEN_CURLY_BRACKET_TOKEN: |
| 143 case TokenKind.LBRACE: | 133 case CLOSE_CURLY_BRACKET_TOKEN: |
| 144 case TokenKind.RBRACE: | 134 case COLON_TOKEN: |
| 145 case TokenKind.COLON: | 135 case SEMICOLON_TOKEN: |
| 146 case TokenKind.SEMICOLON: | 136 case COMMA_TOKEN: |
| 147 case TokenKind.COMMA: | 137 case PERIOD_TOKEN: |
| 148 case TokenKind.DOT: | 138 case PERIOD_PERIOD_TOKEN: |
| 149 case TokenKind.ELLIPSIS: | |
| 150 return Classification.PUNCTUATION; | 139 return Classification.PUNCTUATION; |
| 151 | 140 |
| 152 case TokenKind.INCR: | 141 case PLUS_PLUS_TOKEN: |
| 153 case TokenKind.DECR: | 142 case MINUS_MINUS_TOKEN: |
| 154 case TokenKind.BIT_NOT: | 143 case TILDE_TOKEN: |
| 155 case TokenKind.NOT: | 144 case BANG_TOKEN: |
| 156 case TokenKind.ASSIGN: | 145 case EQ_TOKEN: |
| 157 case TokenKind.ASSIGN_OR: | 146 case BAR_EQ_TOKEN: |
| 158 case TokenKind.ASSIGN_XOR: | 147 case CARET_EQ_TOKEN: |
| 159 case TokenKind.ASSIGN_AND: | 148 case AMPERSAND_EQ_TOKEN: |
| 160 case TokenKind.ASSIGN_SHL: | 149 case LT_LT_EQ_TOKEN: |
| 161 case TokenKind.ASSIGN_SAR: | 150 case GT_GT_GT_EQ_TOKEN: |
| 162 case TokenKind.ASSIGN_SHR: | 151 case GT_GT_EQ_TOKEN: |
| 163 case TokenKind.ASSIGN_ADD: | 152 case PLUS_EQ_TOKEN: |
| 164 case TokenKind.ASSIGN_SUB: | 153 case MINUS_EQ_TOKEN: |
| 165 case TokenKind.ASSIGN_MUL: | 154 case STAR_EQ_TOKEN: |
| 166 case TokenKind.ASSIGN_DIV: | 155 case SLASH_EQ_TOKEN: |
| 167 case TokenKind.ASSIGN_TRUNCDIV: | 156 case TILDE_SLASH_EQ_TOKEN: |
| 168 case TokenKind.ASSIGN_MOD: | 157 case PERCENT_EQ_TOKEN: |
| 169 case TokenKind.CONDITIONAL: | 158 case QUESTION_TOKEN: |
| 170 case TokenKind.OR: | 159 case BAR_BAR_TOKEN: |
| 171 case TokenKind.AND: | 160 case AMPERSAND_AMPERSAND_TOKEN: |
| 172 case TokenKind.BIT_OR: | 161 case BAR_TOKEN: |
| 173 case TokenKind.BIT_XOR: | 162 case CARET_TOKEN: |
| 174 case TokenKind.BIT_AND: | 163 case AMPERSAND_TOKEN: |
| 175 case TokenKind.SHL: | 164 case LT_LT_TOKEN: |
| 176 case TokenKind.SAR: | 165 case GT_GT_GT_TOKEN: |
| 177 case TokenKind.SHR: | 166 case GT_GT_TOKEN: |
| 178 case TokenKind.ADD: | 167 case PLUS_TOKEN: |
| 179 case TokenKind.SUB: | 168 case MINUS_TOKEN: |
| 180 case TokenKind.MUL: | 169 case STAR_TOKEN: |
| 181 case TokenKind.DIV: | 170 case SLASH_TOKEN: |
| 182 case TokenKind.TRUNCDIV: | 171 case TILDE_SLASH_TOKEN: |
| 183 case TokenKind.MOD: | 172 case PERCENT_TOKEN: |
| 184 case TokenKind.EQ: | 173 case EQ_EQ_TOKEN: |
| 185 case TokenKind.NE: | 174 case BANG_EQ_TOKEN: |
| 186 case TokenKind.EQ_STRICT: | 175 case EQ_EQ_EQ_TOKEN: |
| 187 case TokenKind.NE_STRICT: | 176 case BANG_EQ_EQ_TOKEN: |
| 188 case TokenKind.LT: | 177 case LT_TOKEN: |
| 189 case TokenKind.GT: | 178 case GT_TOKEN: |
| 190 case TokenKind.LTE: | 179 case LT_EQ_TOKEN: |
| 191 case TokenKind.GTE: | 180 case GT_EQ_TOKEN: |
| 192 case TokenKind.INDEX: | 181 case INDEX_TOKEN: |
| 193 case TokenKind.SETINDEX: | 182 case INDEX_EQ_TOKEN: |
| 194 return Classification.OPERATOR; | 183 return Classification.OPERATOR; |
| 195 | 184 |
| 196 // Color this like a keyword | 185 // Color keyword token. Most are colored as keywords. |
| 197 case TokenKind.HASH: | 186 case HASH_TOKEN: |
| 198 | 187 case KEYWORD_TOKEN: |
| 199 case TokenKind.ABSTRACT: | 188 if (token.stringValue === 'void') { |
| 200 case TokenKind.ASSERT: | 189 // Color "void" as a type. |
| 201 case TokenKind.CLASS: | 190 return Classification.TYPE_IDENTIFIER; |
| 202 case TokenKind.EXTENDS: | 191 } |
| 203 case TokenKind.FACTORY: | 192 if (token.stringValue === 'this' || token.stringValue === 'super') { |
| 204 case TokenKind.GET: | 193 // Color "this" and "super" as identifiers. |
| 205 case TokenKind.IMPLEMENTS: | 194 return Classification.SPECIAL_IDENTIFIER; |
| 206 case TokenKind.IMPORT: | 195 } |
| 207 case TokenKind.INTERFACE: | |
| 208 case TokenKind.LIBRARY: | |
| 209 case TokenKind.NATIVE: | |
| 210 case TokenKind.NEGATE: | |
| 211 case TokenKind.OPERATOR: | |
| 212 case TokenKind.SET: | |
| 213 case TokenKind.SOURCE: | |
| 214 case TokenKind.STATIC: | |
| 215 case TokenKind.TYPEDEF: | |
| 216 case TokenKind.BREAK: | |
| 217 case TokenKind.CASE: | |
| 218 case TokenKind.CATCH: | |
| 219 case TokenKind.CONST: | |
| 220 case TokenKind.CONTINUE: | |
| 221 case TokenKind.DEFAULT: | |
| 222 case TokenKind.DO: | |
| 223 case TokenKind.ELSE: | |
| 224 case TokenKind.FALSE: | |
| 225 case TokenKind.FINALLY: | |
| 226 case TokenKind.FOR: | |
| 227 case TokenKind.IF: | |
| 228 case TokenKind.IN: | |
| 229 case TokenKind.IS: | |
| 230 case TokenKind.NEW: | |
| 231 case TokenKind.NULL: | |
| 232 case TokenKind.RETURN: | |
| 233 case TokenKind.SWITCH: | |
| 234 case TokenKind.THROW: | |
| 235 case TokenKind.TRUE: | |
| 236 case TokenKind.TRY: | |
| 237 case TokenKind.WHILE: | |
| 238 case TokenKind.VAR: | |
| 239 case TokenKind.FINAL: | |
| 240 return Classification.KEYWORD; | 196 return Classification.KEYWORD; |
| 241 | 197 |
| 242 case TokenKind.WHITESPACE: | 198 case EOF_TOKEN: |
| 243 case TokenKind.END_OF_FILE: | |
| 244 return Classification.NONE; | 199 return Classification.NONE; |
| 245 | 200 |
| 246 default: | 201 default: |
| 247 return Classification.NONE; | 202 return Classification.NONE; |
| 248 } | 203 } |
| 249 } | 204 } |
| OLD | NEW |