lib/src/inputstream.dart - Issue 11260039: Advance html5lib to newest breaking changes in core: getKeys -> keys, etc

Side by Side Diff: lib/src/inputstream.dart

Issue 11260039: Advance html5lib to newest breaking changes in core: getKeys -> keys, etc (Closed) Base URL: git@github.com:dart-lang/html5lib.git@master

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 library inputstream;	1 library inputstream;

2	2

3 import 'dart:utf';	3 import 'dart:utf';

4 import 'package:html5lib/dom_parsing.dart' show SourceFileInfo;	4 import 'package:html5lib/dom_parsing.dart' show SourceFileInfo;

5 import 'char_encodings.dart';	5 import 'char_encodings.dart';

6 import 'constants.dart';	6 import 'constants.dart';

7 import 'utils.dart';	7 import 'utils.dart';

8 import 'encoding_parser.dart';	8 import 'encoding_parser.dart';

9	9

10 /** Hooks to call into dart:io without directly referencing it. */	10 /** Hooks to call into dart:io without directly referencing it. */

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
86 * element)	86 * element)

87 *	87 *

88 * [parseMeta] - Look for a <meta> element containing encoding information	88 * [parseMeta] - Look for a <meta> element containing encoding information

89 */	89 */

90 HtmlInputStream(source, [String encoding, bool parseMeta = true,	90 HtmlInputStream(source, [String encoding, bool parseMeta = true,

91 this.generateSpans = false])	91 this.generateSpans = false])

92 : charEncodingName = codecName(encoding) {	92 : charEncodingName = codecName(encoding) {

93	93

94 if (source is String) {	94 if (source is String) {

95 // TODO(jmesserly): if the data is already a string, we should just use	95 // TODO(jmesserly): if the data is already a string, we should just use

96 // the source.charCodes() instead of wasting time encoding/decoding.	96 // the source.charCodes instead of wasting time encoding/decoding.

97 rawBytes = encodeUtf8(source);	97 rawBytes = encodeUtf8(source);

98 charEncodingName = 'utf-8';	98 charEncodingName = 'utf-8';

99 charEncodingCertain = true;	99 charEncodingCertain = true;

100 } else if (source is List<int>) {	100 } else if (source is List<int>) {

101 rawBytes = source;	101 rawBytes = source;

102 } else {	102 } else {

103 // TODO(jmesserly): it's unfortunate we need to read all bytes in advance,	103 // TODO(jmesserly): it's unfortunate we need to read all bytes in advance,

104 // but it's necessary because of how the UTF decoders work.	104 // but it's necessary because of how the UTF decoders work.

105 rawBytes = consoleSupport.bytesFromFile(source);	105 rawBytes = consoleSupport.bytesFromFile(source);

106	106

(...skipping 192 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
299 var regexpKey = new Pair(characters, opposite ? 'opposite' : '');	299 var regexpKey = new Pair(characters, opposite ? 'opposite' : '');

300 var chars = charsUntilRegEx[regexpKey];	300 var chars = charsUntilRegEx[regexpKey];

301	301

302 if (chars == null) {	302 if (chars == null) {

303 escapeChar(c) {	303 escapeChar(c) {

304 assert(c < 128);	304 assert(c < 128);

305 var hex = c.toRadixString(16);	305 var hex = c.toRadixString(16);

306 hex = (hex.length == 1) ? "0$hex" : hex;	306 hex = (hex.length == 1) ? "0$hex" : hex;

307 return "\\u00$hex";	307 return "\\u00$hex";

308 }	308 }

309 var regex = joinStr(characters.charCodes().map(escapeChar));	309 var regex = joinStr(characters.charCodes.map(escapeChar));

310 if (!opposite) {	310 if (!opposite) {

311 regex = "^${regex}";	311 regex = "^${regex}";

312 }	312 }

313 chars = charsUntilRegEx[regexpKey] = new RegExp("^[${regex}]+");	313 chars = charsUntilRegEx[regexpKey] = new RegExp("^[${regex}]+");

314 }	314 }

315	315

316 var rv = [];	316 var rv = [];

317 while (true) {	317 while (true) {

318 // Find the longest matching prefix	318 // Find the longest matching prefix

319 // TODO(jmesserly): RegExp does not seem to offer a start offset?	319 // TODO(jmesserly): RegExp does not seem to offer a start offset?

320 var searchChunk = chunk.substring(chunkOffset);	320 var searchChunk = chunk.substring(chunkOffset);

321 var m = chars.firstMatch(searchChunk);	321 var m = chars.firstMatch(searchChunk);

322 if (m === null) {	322 if (m === null) {

323 // If nothing matched, and it wasn't because we ran out of chunk,	323 // If nothing matched, and it wasn't because we ran out of chunk,

324 // then stop	324 // then stop

325 if (chunkOffset != chunk.length) {	325 if (chunkOffset != chunk.length) {

326 break;	326 break;

327 }	327 }

328 } else {	328 } else {

329 assert(m.start() == 0);	329 assert(m.start == 0);

330 var end = m.end();	330 var end = m.end;

331 // If not the whole chunk matched, return everything	331 // If not the whole chunk matched, return everything

332 // up to the part that didn't match	332 // up to the part that didn't match

333 if (end != chunk.length - chunkOffset) {	333 if (end != chunk.length - chunkOffset) {

334 rv.add(searchChunk.substring(0, end));	334 rv.add(searchChunk.substring(0, end));

335 chunkOffset += end;	335 chunkOffset += end;

336 break;	336 break;

337 }	337 }

338 }	338 }

339 // If the whole remainder of the chunk matched,	339 // If the whole remainder of the chunk matched,

340 // use it all and read the next chunk	340 // use it all and read the next chunk

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
417 * string doesn't correspond to a valid encoding.	417 * string doesn't correspond to a valid encoding.

418 */	418 */

419 String codecName(String encoding) {	419 String codecName(String encoding) {

420 final asciiPunctuation = const RegExp(	420 final asciiPunctuation = const RegExp(

421 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]");	421 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]");

422	422

423 if (encoding == null) return null;	423 if (encoding == null) return null;

424 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();	424 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();

425 return encodings[canonicalName];	425 return encodings[canonicalName];

426 }	426 }

OLD	NEW

« no previous file with comments | « lib/parser.dart ('k') | lib/src/list_proxy.dart » ('j') | no next file with comments »