Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(159)

Side by Side Diff: lib/src/inputstream.dart

Issue 11260039: Advance html5lib to newest breaking changes in core: getKeys -> keys, etc (Closed) Base URL: git@github.com:dart-lang/html5lib.git@master
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/parser.dart ('k') | lib/src/list_proxy.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 library inputstream; 1 library inputstream;
2 2
3 import 'dart:utf'; 3 import 'dart:utf';
4 import 'package:html5lib/dom_parsing.dart' show SourceFileInfo; 4 import 'package:html5lib/dom_parsing.dart' show SourceFileInfo;
5 import 'char_encodings.dart'; 5 import 'char_encodings.dart';
6 import 'constants.dart'; 6 import 'constants.dart';
7 import 'utils.dart'; 7 import 'utils.dart';
8 import 'encoding_parser.dart'; 8 import 'encoding_parser.dart';
9 9
10 /** Hooks to call into dart:io without directly referencing it. */ 10 /** Hooks to call into dart:io without directly referencing it. */
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
86 * element) 86 * element)
87 * 87 *
88 * [parseMeta] - Look for a <meta> element containing encoding information 88 * [parseMeta] - Look for a <meta> element containing encoding information
89 */ 89 */
90 HtmlInputStream(source, [String encoding, bool parseMeta = true, 90 HtmlInputStream(source, [String encoding, bool parseMeta = true,
91 this.generateSpans = false]) 91 this.generateSpans = false])
92 : charEncodingName = codecName(encoding) { 92 : charEncodingName = codecName(encoding) {
93 93
94 if (source is String) { 94 if (source is String) {
95 // TODO(jmesserly): if the data is already a string, we should just use 95 // TODO(jmesserly): if the data is already a string, we should just use
96 // the source.charCodes() instead of wasting time encoding/decoding. 96 // the source.charCodes instead of wasting time encoding/decoding.
97 rawBytes = encodeUtf8(source); 97 rawBytes = encodeUtf8(source);
98 charEncodingName = 'utf-8'; 98 charEncodingName = 'utf-8';
99 charEncodingCertain = true; 99 charEncodingCertain = true;
100 } else if (source is List<int>) { 100 } else if (source is List<int>) {
101 rawBytes = source; 101 rawBytes = source;
102 } else { 102 } else {
103 // TODO(jmesserly): it's unfortunate we need to read all bytes in advance, 103 // TODO(jmesserly): it's unfortunate we need to read all bytes in advance,
104 // but it's necessary because of how the UTF decoders work. 104 // but it's necessary because of how the UTF decoders work.
105 rawBytes = consoleSupport.bytesFromFile(source); 105 rawBytes = consoleSupport.bytesFromFile(source);
106 106
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 var regexpKey = new Pair(characters, opposite ? 'opposite' : ''); 299 var regexpKey = new Pair(characters, opposite ? 'opposite' : '');
300 var chars = charsUntilRegEx[regexpKey]; 300 var chars = charsUntilRegEx[regexpKey];
301 301
302 if (chars == null) { 302 if (chars == null) {
303 escapeChar(c) { 303 escapeChar(c) {
304 assert(c < 128); 304 assert(c < 128);
305 var hex = c.toRadixString(16); 305 var hex = c.toRadixString(16);
306 hex = (hex.length == 1) ? "0$hex" : hex; 306 hex = (hex.length == 1) ? "0$hex" : hex;
307 return "\\u00$hex"; 307 return "\\u00$hex";
308 } 308 }
309 var regex = joinStr(characters.charCodes().map(escapeChar)); 309 var regex = joinStr(characters.charCodes.map(escapeChar));
310 if (!opposite) { 310 if (!opposite) {
311 regex = "^${regex}"; 311 regex = "^${regex}";
312 } 312 }
313 chars = charsUntilRegEx[regexpKey] = new RegExp("^[${regex}]+"); 313 chars = charsUntilRegEx[regexpKey] = new RegExp("^[${regex}]+");
314 } 314 }
315 315
316 var rv = []; 316 var rv = [];
317 while (true) { 317 while (true) {
318 // Find the longest matching prefix 318 // Find the longest matching prefix
319 // TODO(jmesserly): RegExp does not seem to offer a start offset? 319 // TODO(jmesserly): RegExp does not seem to offer a start offset?
320 var searchChunk = chunk.substring(chunkOffset); 320 var searchChunk = chunk.substring(chunkOffset);
321 var m = chars.firstMatch(searchChunk); 321 var m = chars.firstMatch(searchChunk);
322 if (m === null) { 322 if (m === null) {
323 // If nothing matched, and it wasn't because we ran out of chunk, 323 // If nothing matched, and it wasn't because we ran out of chunk,
324 // then stop 324 // then stop
325 if (chunkOffset != chunk.length) { 325 if (chunkOffset != chunk.length) {
326 break; 326 break;
327 } 327 }
328 } else { 328 } else {
329 assert(m.start() == 0); 329 assert(m.start == 0);
330 var end = m.end(); 330 var end = m.end;
331 // If not the whole chunk matched, return everything 331 // If not the whole chunk matched, return everything
332 // up to the part that didn't match 332 // up to the part that didn't match
333 if (end != chunk.length - chunkOffset) { 333 if (end != chunk.length - chunkOffset) {
334 rv.add(searchChunk.substring(0, end)); 334 rv.add(searchChunk.substring(0, end));
335 chunkOffset += end; 335 chunkOffset += end;
336 break; 336 break;
337 } 337 }
338 } 338 }
339 // If the whole remainder of the chunk matched, 339 // If the whole remainder of the chunk matched,
340 // use it all and read the next chunk 340 // use it all and read the next chunk
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
417 * string doesn't correspond to a valid encoding. 417 * string doesn't correspond to a valid encoding.
418 */ 418 */
419 String codecName(String encoding) { 419 String codecName(String encoding) {
420 final asciiPunctuation = const RegExp( 420 final asciiPunctuation = const RegExp(
421 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]"); 421 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]");
422 422
423 if (encoding == null) return null; 423 if (encoding == null) return null;
424 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase(); 424 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();
425 return encodings[canonicalName]; 425 return encodings[canonicalName];
426 } 426 }
OLDNEW
« no previous file with comments | « lib/parser.dart ('k') | lib/src/list_proxy.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698