lib/src/tokenizer.dart - Issue 11260039: Advance html5lib to newest breaking changes in core: getKeys -> keys, etc

Side by Side Diff: lib/src/tokenizer.dart

Issue 11260039: Advance html5lib to newest breaking changes in core: getKeys -> keys, etc (Closed) Base URL: git@github.com:dart-lang/html5lib.git@master

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 library tokenizer;	1 library tokenizer;

2	2

3 import 'dart:math';	3 import 'dart:math';

4 import 'package:html5lib/dom_parsing.dart' show SourceSpan;	4 import 'package:html5lib/dom_parsing.dart' show SourceSpan;

5 import 'package:html5lib/parser.dart' show HtmlParser;	5 import 'package:html5lib/parser.dart' show HtmlParser;

6 import 'constants.dart';	6 import 'constants.dart';

7 import 'inputstream.dart';	7 import 'inputstream.dart';

8 import 'token.dart';	8 import 'token.dart';

9 import 'utils.dart';	9 import 'utils.dart';

10	10

11 // Group entities by their first character, for faster lookups	11 // Group entities by their first character, for faster lookups

12	12

13 // TODO(jmesserly): we could use a better data structure here like a trie, if	13 // TODO(jmesserly): we could use a better data structure here like a trie, if

14 // we had it implemented in Dart.	14 // we had it implemented in Dart.

15 Map<String, List<String>> entitiesByFirstChar = (() {	15 Map<String, List<String>> entitiesByFirstChar = (() {

16 var result = {};	16 var result = {};

17 for (var k in entities.getKeys()) {	17 for (var k in entities.keys) {

18 result.putIfAbsent(k[0], () => []).add(k);	18 result.putIfAbsent(k[0], () => []).add(k);

19 }	19 }

20 return result;	20 return result;

21 })();	21 })();

22	22

23 // TODO(jmesserly): lots of ways to make this faster:	23 // TODO(jmesserly): lots of ways to make this faster:

24 // - use char codes everywhere instead of 1-char strings	24 // - use char codes everywhere instead of 1-char strings

25 // - use switch instead of contains, indexOf	25 // - use switch instead of contains, indexOf

26 // - use switch instead of the sequential if tests	26 // - use switch instead of the sequential if tests

27 // - avoid string concat	27 // - avoid string concat

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
64 HtmlTokenizer(doc,	64 HtmlTokenizer(doc,

65 [String encoding, bool parseMeta = true,	65 [String encoding, bool parseMeta = true,

66 this.lowercaseElementName = true, this.lowercaseAttrName = true,	66 this.lowercaseElementName = true, this.lowercaseAttrName = true,

67 bool generateSpans = false])	67 bool generateSpans = false])

68 : stream = new HtmlInputStream(doc, encoding, parseMeta, generateSpans),	68 : stream = new HtmlInputStream(doc, encoding, parseMeta, generateSpans),

69 tokenQueue = new Queue(),	69 tokenQueue = new Queue(),

70 generateSpans = generateSpans {	70 generateSpans = generateSpans {

71 reset();	71 reset();

72 }	72 }

73	73

74 get lastData => currentToken.data.last();	74 get lastData => currentToken.data.last;

75	75

76 TagToken get currentTagToken => currentToken;	76 TagToken get currentTagToken => currentToken;

77 DoctypeToken get currentDoctypeToken => currentToken;	77 DoctypeToken get currentDoctypeToken => currentToken;

78	78

79 bool get hasNext {	79 bool get hasNext {

80 if (stream.errors.length > 0) return true;	80 if (stream.errors.length > 0) return true;

81 if (tokenQueue.length > 0) return true;	81 if (tokenQueue.length > 0) return true;

82 // Start processing. When EOF is reached state will return false;	82 // Start processing. When EOF is reached state will return false;

83 // instead of true and the loop will terminate.	83 // instead of true and the loop will terminate.

84 do {	84 do {

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
204 var output = "&";	204 var output = "&";

205	205

206 var charStack = [stream.char()];	206 var charStack = [stream.char()];

207 if (isWhitespace(charStack[0]) \|\| charStack[0] == '<' \|\| charStack[0] == '&'	207 if (isWhitespace(charStack[0]) \|\| charStack[0] == '<' \|\| charStack[0] == '&'

208 \|\| charStack[0] == EOF \|\| allowedChar == charStack[0]) {	208 \|\| charStack[0] == EOF \|\| allowedChar == charStack[0]) {

209 stream.unget(charStack[0]);	209 stream.unget(charStack[0]);

210 } else if (charStack[0] == "#") {	210 } else if (charStack[0] == "#") {

211 // Read the next character to see if it's hex or decimal	211 // Read the next character to see if it's hex or decimal

212 bool hex = false;	212 bool hex = false;

213 charStack.add(stream.char());	213 charStack.add(stream.char());

214 if (charStack.last() == 'x' \|\| charStack.last() == 'X') {	214 if (charStack.last == 'x' \|\| charStack.last == 'X') {

215 hex = true;	215 hex = true;

216 charStack.add(stream.char());	216 charStack.add(stream.char());

217 }	217 }

218	218

219 // charStack.last() should be the first digit	219 // charStack.last should be the first digit

220 if (hex && isHexDigit(charStack.last()) \|\|	220 if (hex && isHexDigit(charStack.last) \|\|

221 (!hex && isDigit(charStack.last()))) {	221 (!hex && isDigit(charStack.last))) {

222 // At least one digit found, so consume the whole number	222 // At least one digit found, so consume the whole number

223 stream.unget(charStack.last());	223 stream.unget(charStack.last);

224 output = consumeNumberEntity(hex);	224 output = consumeNumberEntity(hex);

225 } else {	225 } else {

226 // No digits found	226 // No digits found

227 _addToken(new ParseErrorToken("expected-numeric-entity"));	227 _addToken(new ParseErrorToken("expected-numeric-entity"));

228 stream.unget(charStack.removeLast());	228 stream.unget(charStack.removeLast());

229 output = "&${joinStr(charStack)}";	229 output = "&${joinStr(charStack)}";

230 }	230 }

231 } else {	231 } else {

232 // At this point in the process might have named entity. Entities	232 // At this point in the process might have named entity. Entities

233 // are stored in the global variable "entities".	233 // are stored in the global variable "entities".

234 //	234 //

235 // Consume characters and compare to these to a substring of the	235 // Consume characters and compare to these to a substring of the

236 // entity names in the list until the substring no longer matches.	236 // entity names in the list until the substring no longer matches.

237 var filteredEntityList = entitiesByFirstChar[charStack[0]];	237 var filteredEntityList = entitiesByFirstChar[charStack[0]];

238 if (filteredEntityList == null) filteredEntityList = const [];	238 if (filteredEntityList == null) filteredEntityList = const [];

239	239

240 while (charStack.last() !== EOF) {	240 while (charStack.last !== EOF) {

241 var name = joinStr(charStack);	241 var name = joinStr(charStack);

242 filteredEntityList = filteredEntityList.filter(	242 filteredEntityList = filteredEntityList.filter(

243 (e) => e.startsWith(name));	243 (e) => e.startsWith(name));

244	244

245 if (filteredEntityList.length == 0) {	245 if (filteredEntityList.length == 0) {

246 break;	246 break;

247 }	247 }

248 charStack.add(stream.char());	248 charStack.add(stream.char());

249 }	249 }

250	250

(...skipping 953 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1204	1204

1205 // Eat the character directly after the bogus comment which is either a	1205 // Eat the character directly after the bogus comment which is either a

1206 // ">" or an EOF.	1206 // ">" or an EOF.

1207 stream.char();	1207 stream.char();

1208 state = dataState;	1208 state = dataState;

1209 return true;	1209 return true;

1210 }	1210 }

1211	1211

1212 bool markupDeclarationOpenState() {	1212 bool markupDeclarationOpenState() {

1213 var charStack = [stream.char()];	1213 var charStack = [stream.char()];

1214 if (charStack.last() == "-") {	1214 if (charStack.last == "-") {

1215 charStack.add(stream.char());	1215 charStack.add(stream.char());

1216 if (charStack.last() == "-") {	1216 if (charStack.last == "-") {

1217 currentToken = new CommentToken("");	1217 currentToken = new CommentToken("");

1218 state = commentStartState;	1218 state = commentStartState;

1219 return true;	1219 return true;

1220 }	1220 }

1221 } else if (charStack.last() == 'd' \|\| charStack.last() == 'D') {	1221 } else if (charStack.last == 'd' \|\| charStack.last == 'D') {

1222 var matched = true;	1222 var matched = true;

1223 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) {	1223 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) {

1224 var char = stream.char();	1224 var char = stream.char();

1225 charStack.add(char);	1225 charStack.add(char);

1226 if (char == EOF \|\| !expected.contains(char)) {	1226 if (char == EOF \|\| !expected.contains(char)) {

1227 matched = false;	1227 matched = false;

1228 break;	1228 break;

1229 }	1229 }

1230 }	1230 }

1231 if (matched) {	1231 if (matched) {

1232 currentToken = new DoctypeToken(correct: true);	1232 currentToken = new DoctypeToken(correct: true);

1233 state = doctypeState;	1233 state = doctypeState;

1234 return true;	1234 return true;

1235 }	1235 }

1236 } else if (charStack.last() == "[" &&	1236 } else if (charStack.last == "[" &&

1237 parser !== null && parser.tree.openElements.length > 0 &&	1237 parser !== null && parser.tree.openElements.length > 0 &&

1238 parser.tree.openElements.last().namespace	1238 parser.tree.openElements.last.namespace

1239 != parser.tree.defaultNamespace) {	1239 != parser.tree.defaultNamespace) {

1240 var matched = true;	1240 var matched = true;

1241 for (var expected in const ["C", "D", "A", "T", "A", "["]) {	1241 for (var expected in const ["C", "D", "A", "T", "A", "["]) {

1242 charStack.add(stream.char());	1242 charStack.add(stream.char());

1243 if (charStack.last() != expected) {	1243 if (charStack.last != expected) {

1244 matched = false;	1244 matched = false;

1245 break;	1245 break;

1246 }	1246 }

1247 }	1247 }

1248 if (matched) {	1248 if (matched) {

1249 state = cdataSectionState;	1249 state = cdataSectionState;

1250 return true;	1250 return true;

1251 }	1251 }

1252 }	1252 }

1253	1253

(...skipping 572 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1826 }	1826 }

1827	1827

1828 if (data.length > 0) {	1828 if (data.length > 0) {

1829 _addToken(new CharactersToken(joinStr(data)));	1829 _addToken(new CharactersToken(joinStr(data)));

1830 }	1830 }

1831 state = dataState;	1831 state = dataState;

1832 return true;	1832 return true;

1833 }	1833 }

1834 }	1834 }

1835	1835

OLD	NEW

« no previous file with comments | « lib/src/list_proxy.dart ('k') | lib/src/treebuilder.dart » ('j') | no next file with comments »