| OLD | NEW |
| 1 #library('inputstream'); | 1 #library('inputstream'); |
| 2 | 2 |
| 3 #import('dart:io'); | 3 #import('dart:io'); |
| 4 #import('dart:utf'); | 4 #import('dart:utf'); |
| 5 #import('char_encodings.dart'); | 5 #import('char_encodings.dart'); |
| 6 #import('constants.dart'); | 6 #import('constants.dart'); |
| 7 #import('utils.dart'); | 7 #import('utils.dart'); |
| 8 #import('encoding_parser.dart'); | 8 #import('encoding_parser.dart'); |
| 9 #import('../treebuilders/simpletree.dart', prefix: 'tree'); // for Span | 9 #import('../dom.dart', prefix: 'dom'); // for Span |
| 10 | 10 |
| 11 /** | 11 /** |
| 12 * Provides a unicode stream of characters to the HTMLTokenizer. | 12 * Provides a unicode stream of characters to the HTMLTokenizer. |
| 13 * | 13 * |
| 14 * This class takes care of character encoding and removing or replacing | 14 * This class takes care of character encoding and removing or replacing |
| 15 * incorrect byte-sequences and also provides column and line tracking. | 15 * incorrect byte-sequences and also provides column and line tracking. |
| 16 */ | 16 */ |
| 17 class HTMLInputStream { | 17 class HTMLInputStream { |
| 18 | 18 |
| 19 const int _defaultChunkSize = 10240; | 19 const int _defaultChunkSize = 10240; |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 185 var parser = new EncodingParser(slice(rawBytes, 0, numBytesMeta)); | 185 var parser = new EncodingParser(slice(rawBytes, 0, numBytesMeta)); |
| 186 var encoding = parser.getEncoding(); | 186 var encoding = parser.getEncoding(); |
| 187 | 187 |
| 188 if (const ["utf-16", "utf-16-be", "utf-16-le"].indexOf(encoding) >= 0) { | 188 if (const ["utf-16", "utf-16-be", "utf-16-le"].indexOf(encoding) >= 0) { |
| 189 encoding = "utf-8"; | 189 encoding = "utf-8"; |
| 190 } | 190 } |
| 191 | 191 |
| 192 return encoding; | 192 return encoding; |
| 193 } | 193 } |
| 194 | 194 |
| 195 tree.Span _position(int offset) { | 195 dom.Span _position(int offset) { |
| 196 var nLines = 1; | 196 var nLines = 1; |
| 197 var lastLinePos = -1; | 197 var lastLinePos = -1; |
| 198 for (int i = 0; i < offset; i++) { | 198 for (int i = 0; i < offset; i++) { |
| 199 if (chunk.charCodeAt(i) == NEWLINE) { | 199 if (chunk.charCodeAt(i) == NEWLINE) { |
| 200 lastLinePos = i; | 200 lastLinePos = i; |
| 201 nLines++; | 201 nLines++; |
| 202 } | 202 } |
| 203 } | 203 } |
| 204 var positionLine = prevNumLines + nLines; | 204 var positionLine = prevNumLines + nLines; |
| 205 var positionColumn; | 205 var positionColumn; |
| 206 if (lastLinePos == -1) { | 206 if (lastLinePos == -1) { |
| 207 positionColumn = prevNumCols + offset; | 207 positionColumn = prevNumCols + offset; |
| 208 } else { | 208 } else { |
| 209 positionColumn = offset - (lastLinePos + 1); | 209 positionColumn = offset - (lastLinePos + 1); |
| 210 } | 210 } |
| 211 return new tree.Span(positionLine, positionColumn); | 211 return new dom.Span(positionLine, positionColumn); |
| 212 } | 212 } |
| 213 | 213 |
| 214 /** Returns (line, col) of the current position in the stream. */ | 214 /** Returns (line, col) of the current position in the stream. */ |
| 215 tree.Span position() => _position(chunkOffset); | 215 dom.Span position() => _position(chunkOffset); |
| 216 | 216 |
| 217 /** | 217 /** |
| 218 * Read one character from the stream or queue if available. Return | 218 * Read one character from the stream or queue if available. Return |
| 219 * EOF when EOF is reached. | 219 * EOF when EOF is reached. |
| 220 */ | 220 */ |
| 221 String char() { | 221 String char() { |
| 222 // Read a new chunk from the input stream if necessary | 222 // Read a new chunk from the input stream if necessary |
| 223 if (chunkOffset >= chunk.length) { | 223 if (chunkOffset >= chunk.length) { |
| 224 if (!readChunk()) { | 224 if (!readChunk()) { |
| 225 return EOF; | 225 return EOF; |
| (...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 424 * string doesn't correspond to a valid encoding. | 424 * string doesn't correspond to a valid encoding. |
| 425 */ | 425 */ |
| 426 String codecName(String encoding) { | 426 String codecName(String encoding) { |
| 427 final asciiPunctuation = const RegExp( | 427 final asciiPunctuation = const RegExp( |
| 428 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]"); | 428 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]"); |
| 429 | 429 |
| 430 if (encoding == null) return null; | 430 if (encoding == null) return null; |
| 431 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase(); | 431 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase(); |
| 432 return encodings[canonicalName]; | 432 return encodings[canonicalName]; |
| 433 } | 433 } |
| OLD | NEW |