Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1373)

Side by Side Diff: html5parser.dart

Issue 10916294: switch html5lib to new pkg layout (Closed) Base URL: https://github.com/dart-lang/html5lib.git@master
Patch Set: Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | lib/char_encodings.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #library('html5parser');
2
3 #import('dart:math');
4 #import('package:logging/logging.dart');
5 #import('treebuilders/base.dart'); // for Marker
6 #import('treebuilders/simpletree.dart');
7 #import('lib/constants.dart');
8 #import('lib/encoding_parser.dart');
9 #import('lib/token.dart');
10 #import('lib/utils.dart');
11 #import('tokenizer.dart');
12
13 // TODO(jmesserly): these APIs, as well as the HTMLParser contructor and
14 // HTMLParser.parse and parseFragment were changed a bit to avoid passing a
15 // first class type that is used for construction. It might be okay, but I'd
16 // like to find a good dependency-injection pattern for Dart rather than
17 // copy the Python API.
18 // TODO(jmesserly): Also some of the HTMLParser APIs are messed up to avoid
19 // editor shadowing warnings :\. Look for trailing underscores.
20 /**
21 * Parse an html5 [doc]ument that is a [String], [RandomAccessFile] or
22 * [List<int>] of bytes into a tree.
23 *
24 * The optional [encoding] must be a string that indicates the encoding. If
25 * specified, that encoding will be used, regardless of any BOM or later
26 * declaration (such as in a meta element).
27 */
28 Document parse(doc, [TreeBuilder treebuilder, String encoding]) {
29 var tokenizer = new HTMLTokenizer(doc, encoding: encoding);
30 return new HTMLParser(treebuilder).parse(tokenizer);
31 }
32
33 /**
34 * Parse an html5 [doc]ument fragment that is a [String], [RandomAccessFile] or
35 * [List<int>] of bytes into a tree. Pass a [container] to change the type of
36 * the containing element.
37 *
38 * The optional [encoding] must be a string that indicates the encoding. If
39 * specified, that encoding will be used, regardless of any BOM or later
40 * declaration (such as in a meta element).
41 */
42 DocumentFragment parseFragment(doc, [String container = "div",
43 TreeBuilder treebuilder, String encoding]) {
44 var tokenizer = new HTMLTokenizer(doc, encoding: encoding);
45 var parser = new HTMLParser(treebuilder);
46 return parser.parseFragment(tokenizer, container_: container);
47 }
48
49
50 /**
51 * HTML parser. Generates a tree structure from a stream of (possibly malformed)
52 * HTML.
53 */
54 class HTMLParser {
55 /** Raise an exception on the first error encountered. */
56 bool strict;
57
58 final TreeBuilder tree;
59
60 List<ParseError> errors;
61
62 // TODO(jmesserly): would be faster not to use Map lookup.
63 Map<String, Phase> phases;
64
65 bool innerHTMLMode;
66
67 String container;
68
69 bool firstStartTag = false;
70
71 // TODO(jmesserly): use enum?
72 /** "quirks" / "limited quirks" / "no quirks" */
73 String compatMode = "no quirks";
74
75 /** innerHTML container when parsing document fragment. */
76 String innerHTML;
77
78 Phase phase;
79
80 Phase lastPhase;
81
82 Phase originalPhase;
83
84 Phase beforeRCDataPhase;
85
86 bool framesetOK;
87
88 HTMLTokenizer tokenizer;
89
90 // These fields hold the different phase singletons. At any given time one
91 // of them will be active.
92 InitialPhase _initialPhase;
93 BeforeHtmlPhase _beforeHtmlPhase;
94 BeforeHeadPhase _beforeHeadPhase;
95 InHeadPhase _inHeadPhase;
96 AfterHeadPhase _afterHeadPhase;
97 InBodyPhase _inBodyPhase;
98 TextPhase _textPhase;
99 InTablePhase _inTablePhase;
100 InTableTextPhase _inTableTextPhase;
101 InCaptionPhase _inCaptionPhase;
102 InColumnGroupPhase _inColumnGroupPhase;
103 InTableBodyPhase _inTableBodyPhase;
104 InRowPhase _inRowPhase;
105 InCellPhase _inCellPhase;
106 InSelectPhase _inSelectPhase;
107 InSelectInTablePhase _inSelectInTablePhase;
108 InForeignContentPhase _inForeignContentPhase;
109 AfterBodyPhase _afterBodyPhase;
110 InFramesetPhase _inFramesetPhase;
111 AfterFramesetPhase _afterFramesetPhase;
112 AfterAfterBodyPhase _afterAfterBodyPhase;
113 AfterAfterFramesetPhase _afterAfterFramesetPhase;
114
115 /**
116 * Create a new HTMLParser and configure the [tree] builder and [strict] mode.
117 */
118 HTMLParser([TreeBuilder tree, this.strict = false])
119 : tree = tree != null ? tree : new TreeBuilder(true),
120 errors = <ParseError>[] {
121
122 _initialPhase = new InitialPhase(this);
123 _beforeHtmlPhase = new BeforeHtmlPhase(this);
124 _beforeHeadPhase = new BeforeHeadPhase(this);
125 _inHeadPhase = new InHeadPhase(this);
126 // XXX "inHeadNoscript": new InHeadNoScriptPhase(this);
127 _afterHeadPhase = new AfterHeadPhase(this);
128 _inBodyPhase = new InBodyPhase(this);
129 _textPhase = new TextPhase(this);
130 _inTablePhase = new InTablePhase(this);
131 _inTableTextPhase = new InTableTextPhase(this);
132 _inCaptionPhase = new InCaptionPhase(this);
133 _inColumnGroupPhase = new InColumnGroupPhase(this);
134 _inTableBodyPhase = new InTableBodyPhase(this);
135 _inRowPhase = new InRowPhase(this);
136 _inCellPhase = new InCellPhase(this);
137 _inSelectPhase = new InSelectPhase(this);
138 _inSelectInTablePhase = new InSelectInTablePhase(this);
139 _inForeignContentPhase = new InForeignContentPhase(this);
140 _afterBodyPhase = new AfterBodyPhase(this);
141 _inFramesetPhase = new InFramesetPhase(this);
142 _afterFramesetPhase = new AfterFramesetPhase(this);
143 _afterAfterBodyPhase = new AfterAfterBodyPhase(this);
144 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this);
145 // XXX after after frameset
146 }
147
148 /**
149 * Parse a HTML document into a well-formed tree
150 *
151 * [tokenizer_] - an object that provides a stream of tokens to the
152 * treebuilder. This may be replaced for e.g. a sanitizer which converts some
153 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the
154 * appropriate options.
155 */
156 Document parse(HTMLTokenizer tokenizer_) {
157 _parse(tokenizer_, innerHTML_: false);
158 return tree.getDocument();
159 }
160
161 /**
162 * Parse a HTML fragment into a well-formed tree fragment.
163 *
164 * [container_] - name of the element we're setting the innerHTML property
165 * if set to null, default to 'div'.
166 *
167 * [tokenizer_] - an object that provides a stream of tokens to the
168 * treebuilder. This may be replaced for e.g. a sanitizer which converts some
169 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the
170 * appropriate options.
171 */
172 DocumentFragment parseFragment(HTMLTokenizer tokenizer_,
173 [String container_ = "div"]) {
174 _parse(tokenizer_, innerHTML_: true, container_: container_);
175 return tree.getFragment();
176 }
177
178 void _parse(HTMLTokenizer tokenizer_, [bool innerHTML_ = false,
179 String container_ = "div"]) {
180
181 innerHTMLMode = innerHTML_;
182 container = container_;
183 tokenizer = tokenizer_;
184 // TODO(jmesserly): this feels a little strange, but it's needed for CDATA.
185 // Maybe we should change the API to having the parser create the tokenizer.
186 tokenizer.parser = this;
187
188 reset();
189
190 while (true) {
191 try {
192 mainLoop();
193 break;
194 } on ReparseException catch (e) {
195 reset();
196 }
197 }
198 }
199
200 void reset() {
201 tree.reset();
202 firstStartTag = false;
203 errors = <ParseError>[];
204 // "quirks" / "limited quirks" / "no quirks"
205 compatMode = "no quirks";
206
207 if (innerHTMLMode) {
208 innerHTML = container.toLowerCase();
209
210 if (cdataElements.indexOf(innerHTML) >= 0) {
211 tokenizer.state = tokenizer.rcdataState;
212 } else if (rcdataElements.indexOf(innerHTML) >= 0) {
213 tokenizer.state = tokenizer.rawtextState;
214 } else if (innerHTML == 'plaintext') {
215 tokenizer.state = tokenizer.plaintextState;
216 } else {
217 // state already is data state
218 // tokenizer.state = tokenizer.dataState;
219 }
220 phase = _beforeHtmlPhase;
221 _beforeHtmlPhase.insertHtmlElement();
222 resetInsertionMode();
223 } else {
224 innerHTML = null;
225 phase = _initialPhase;
226 }
227
228 lastPhase = null;
229 beforeRCDataPhase = null;
230 framesetOK = true;
231 }
232
233 bool isHTMLIntegrationPoint(Node element) {
234 if (element.tagName == "annotation-xml" &&
235 element.namespace == Namespaces.mathml) {
236 var enc = element.attributes["encoding"];
237 if (enc != null) enc = asciiUpper2Lower(enc);
238 return enc == "text/html" || enc == "application/xhtml+xml";
239 } else {
240 return htmlIntegrationPointElements.indexOf(
241 new Pair(element.namespace, element.tagName)) >= 0;
242 }
243 }
244
245 bool isMathMLTextIntegrationPoint(Node element) {
246 return mathmlTextIntegrationPointElements.indexOf(
247 new Pair(element.namespace, element.tagName)) >= 0;
248 }
249
250 bool inForeignContent(Token token, int type) {
251 if (tree.openElements.length == 0) return false;
252
253 var node = tree.openElements.last();
254 if (node.namespace == tree.defaultNamespace) return false;
255
256 if (isMathMLTextIntegrationPoint(node)) {
257 if (type == TokenKind.startTag &&
258 (token as StartTagToken).name != "mglyph" &&
259 (token as StartTagToken).name != "malignmark") {
260 return false;
261 }
262 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) {
263 return false;
264 }
265 }
266
267 if (node.tagName == "annotation-xml" && type == TokenKind.startTag &&
268 (token as StartTagToken).name == "svg") {
269 return false;
270 }
271
272 if (isHTMLIntegrationPoint(node)) {
273 if (type == TokenKind.startTag ||
274 type == TokenKind.characters ||
275 type == TokenKind.spaceCharacters) {
276 return false;
277 }
278 }
279
280 return true;
281 }
282
283 void mainLoop() {
284 while (tokenizer.hasNext()) {
285 var token = normalizeToken(tokenizer.next());
286 var newToken = token;
287 int type;
288 while (newToken !== null) {
289 type = newToken.kind;
290
291 // Note: avoid "is" test here, see http://dartbug.com/4795
292 if (type == TokenKind.parseError) {
293 ParseErrorToken error = newToken;
294 parseError(error.data, error.messageParams);
295 newToken = null;
296 } else {
297 Phase phase_ = phase;
298 if (inForeignContent(token, type)) {
299 phase_ = _inForeignContentPhase;
300 }
301
302 switch (type) {
303 case TokenKind.characters:
304 newToken = phase_.processCharacters(newToken);
305 break;
306 case TokenKind.spaceCharacters:
307 newToken = phase_.processSpaceCharacters(newToken);
308 break;
309 case TokenKind.startTag:
310 newToken = phase_.processStartTag(newToken);
311 break;
312 case TokenKind.endTag:
313 newToken = phase_.processEndTag(newToken);
314 break;
315 case TokenKind.comment:
316 newToken = phase_.processComment(newToken);
317 break;
318 case TokenKind.doctype:
319 newToken = phase_.processDoctype(newToken);
320 break;
321 }
322 }
323 }
324
325 if (token is StartTagToken) {
326 if (token.selfClosing && !token.selfClosingAcknowledged) {
327 parseError("non-void-element-with-trailing-solidus",
328 {"name": token.name});
329 }
330 }
331 }
332
333 // When the loop finishes it's EOF
334 var reprocess = true;
335 var reprocessPhases = [];
336 while (reprocess) {
337 reprocessPhases.add(phase);
338 reprocess = phase.processEOF();
339 if (reprocess) {
340 assert(reprocessPhases.indexOf(phase) == -1);
341 }
342 }
343 }
344
345 void parseError([String errorcode = "XXX-undefined-error",
346 Map datavars = const {}]) {
347 // XXX The idea is to make errorcode mandatory.
348 var position = tokenizer.stream.position();
349 var err = new ParseError(errorcode, position, datavars);
350 errors.add(err);
351 if (strict) throw err;
352 }
353
354 /** HTML5 specific normalizations to the token stream. */
355 Token normalizeToken(Token token) {
356 if (token is StartTagToken) {
357 token.data = makeDict(token.data);
358 }
359 return token;
360 }
361
362 void adjustMathMLAttributes(StartTagToken token) {
363 var orig = token.data.remove("definitionurl");
364 if (orig != null) {
365 token.data["definitionURL"] = orig;
366 }
367 }
368
369 void adjustSVGAttributes(Token token) {
370 final replacements = const {
371 "attributename":"attributeName",
372 "attributetype":"attributeType",
373 "basefrequency":"baseFrequency",
374 "baseprofile":"baseProfile",
375 "calcmode":"calcMode",
376 "clippathunits":"clipPathUnits",
377 "contentscripttype":"contentScriptType",
378 "contentstyletype":"contentStyleType",
379 "diffuseconstant":"diffuseConstant",
380 "edgemode":"edgeMode",
381 "externalresourcesrequired":"externalResourcesRequired",
382 "filterres":"filterRes",
383 "filterunits":"filterUnits",
384 "glyphref":"glyphRef",
385 "gradienttransform":"gradientTransform",
386 "gradientunits":"gradientUnits",
387 "kernelmatrix":"kernelMatrix",
388 "kernelunitlength":"kernelUnitLength",
389 "keypoints":"keyPoints",
390 "keysplines":"keySplines",
391 "keytimes":"keyTimes",
392 "lengthadjust":"lengthAdjust",
393 "limitingconeangle":"limitingConeAngle",
394 "markerheight":"markerHeight",
395 "markerunits":"markerUnits",
396 "markerwidth":"markerWidth",
397 "maskcontentunits":"maskContentUnits",
398 "maskunits":"maskUnits",
399 "numoctaves":"numOctaves",
400 "pathlength":"pathLength",
401 "patterncontentunits":"patternContentUnits",
402 "patterntransform":"patternTransform",
403 "patternunits":"patternUnits",
404 "pointsatx":"pointsAtX",
405 "pointsaty":"pointsAtY",
406 "pointsatz":"pointsAtZ",
407 "preservealpha":"preserveAlpha",
408 "preserveaspectratio":"preserveAspectRatio",
409 "primitiveunits":"primitiveUnits",
410 "refx":"refX",
411 "refy":"refY",
412 "repeatcount":"repeatCount",
413 "repeatdur":"repeatDur",
414 "requiredextensions":"requiredExtensions",
415 "requiredfeatures":"requiredFeatures",
416 "specularconstant":"specularConstant",
417 "specularexponent":"specularExponent",
418 "spreadmethod":"spreadMethod",
419 "startoffset":"startOffset",
420 "stddeviation":"stdDeviation",
421 "stitchtiles":"stitchTiles",
422 "surfacescale":"surfaceScale",
423 "systemlanguage":"systemLanguage",
424 "tablevalues":"tableValues",
425 "targetx":"targetX",
426 "targety":"targetY",
427 "textlength":"textLength",
428 "viewbox":"viewBox",
429 "viewtarget":"viewTarget",
430 "xchannelselector":"xChannelSelector",
431 "ychannelselector":"yChannelSelector",
432 "zoomandpan":"zoomAndPan"
433 };
434 for (var originalName in token.data.getKeys()) {
435 var svgName = replacements[originalName];
436 if (svgName != null) {
437 token.data[svgName] = token.data.remove(originalName);
438 }
439 }
440 }
441
442 void adjustForeignAttributes(Token token) {
443 // TODO(jmesserly): I don't like mixing non-string objects with strings in
444 // the Node.attributes Map. Is there another solution?
445 final replacements = const {
446 "xlink:actuate": const AttributeName("xlink", "actuate",
447 Namespaces.xlink),
448 "xlink:arcrole": const AttributeName("xlink", "arcrole",
449 Namespaces.xlink),
450 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink),
451 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink),
452 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink),
453 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink),
454 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink),
455 "xml:base": const AttributeName("xml", "base", Namespaces.xml),
456 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml),
457 "xml:space": const AttributeName("xml", "space", Namespaces.xml),
458 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns),
459 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns)
460 };
461
462 for (var originalName in token.data.getKeys()) {
463 var foreignName = replacements[originalName];
464 if (foreignName != null) {
465 token.data[foreignName] = token.data.remove(originalName);
466 }
467 }
468 }
469
470 void resetInsertionMode() {
471 // The name of this method is mostly historical. (It's also used in the
472 // specification.)
473 for (Node node in reversed(tree.openElements)) {
474 var nodeName = node.tagName;
475 bool last = node == tree.openElements[0];
476 if (last) {
477 assert(innerHTMLMode);
478 nodeName = innerHTML;
479 }
480 // Check for conditions that should only happen in the innerHTML
481 // case
482 switch (nodeName) {
483 case "select": case "colgroup": case "head": case "html":
484 assert(innerHTMLMode);
485 break;
486 }
487 if (!last && node.namespace != tree.defaultNamespace) {
488 continue;
489 }
490 switch (nodeName) {
491 case "select": phase = _inSelectPhase; return;
492 case "td": phase = _inCellPhase; return;
493 case "th": phase = _inCellPhase; return;
494 case "tr": phase = _inRowPhase; return;
495 case "tbody": phase = _inTableBodyPhase; return;
496 case "thead": phase = _inTableBodyPhase; return;
497 case "tfoot": phase = _inTableBodyPhase; return;
498 case "caption": phase = _inCaptionPhase; return;
499 case "colgroup": phase = _inColumnGroupPhase; return;
500 case "table": phase = _inTablePhase; return;
501 case "head": phase = _inBodyPhase; return;
502 case "body": phase = _inBodyPhase; return;
503 case "frameset": phase = _inFramesetPhase; return;
504 case "html": phase = _beforeHeadPhase; return;
505 }
506 }
507 phase = _inBodyPhase;
508 }
509
510 /**
511 * Generic RCDATA/RAWTEXT Parsing algorithm
512 * [contentType] - RCDATA or RAWTEXT
513 */
514 void parseRCDataRawtext(Token token, String contentType) {
515 assert(contentType == "RAWTEXT" || contentType == "RCDATA");
516
517 var element = tree.insertElement(token);
518
519 if (contentType == "RAWTEXT") {
520 tokenizer.state = tokenizer.rawtextState;
521 } else {
522 tokenizer.state = tokenizer.rcdataState;
523 }
524
525 originalPhase = phase;
526 phase = _textPhase;
527 }
528 }
529
530
531 /** Base class for helper object that implements each phase of processing. */
532 class Phase {
533 // Order should be (they can be omitted):
534 // * EOF
535 // * Comment
536 // * Doctype
537 // * SpaceCharacters
538 // * Characters
539 // * StartTag
540 // - startTag* methods
541 // * EndTag
542 // - endTag* methods
543
544 final HTMLParser parser;
545
546 final TreeBuilder tree;
547
548 Phase(HTMLParser parser) : parser = parser, tree = parser.tree;
549
550 bool processEOF() {
551 throw const NotImplementedException();
552 }
553
554 Token processComment(CommentToken token) {
555 // For most phases the following is correct. Where it's not it will be
556 // overridden.
557 tree.insertComment(token, tree.openElements.last());
558 }
559
560 Token processDoctype(DoctypeToken token) {
561 parser.parseError("unexpected-doctype");
562 }
563
564 Token processCharacters(CharactersToken token) {
565 tree.insertText(token.data);
566 }
567
568 Token processSpaceCharacters(SpaceCharactersToken token) {
569 tree.insertText(token.data);
570 }
571
572 Token processStartTag(StartTagToken token) {
573 throw const NotImplementedException();
574 }
575
576 Token startTagHtml(StartTagToken token) {
577 if (parser.firstStartTag == false && token.name == "html") {
578 parser.parseError("non-html-root");
579 }
580 // XXX Need a check here to see if the first start tag token emitted is
581 // this token... If it's not, invoke parser.parseError().
582 token.data.forEach((attr, value) {
583 tree.openElements[0].attributes.putIfAbsent(attr, () => value);
584 });
585 parser.firstStartTag = false;
586 }
587
588 Token processEndTag(EndTagToken token) {
589 throw const NotImplementedException();
590 }
591
592 /** Helper method for popping openElements. */
593 void popOpenElementsUntil(String name) {
594 var node = tree.openElements.removeLast();
595 while (node.tagName != name) {
596 node = tree.openElements.removeLast();
597 }
598 }
599 }
600
601 class InitialPhase extends Phase {
602 InitialPhase(parser) : super(parser);
603
604 Token processSpaceCharacters(SpaceCharactersToken token) {
605 }
606
607 Token processComment(CommentToken token) {
608 tree.insertComment(token, tree.document);
609 }
610
611 Token processDoctype(DoctypeToken token) {
612 var name = token.name;
613 String publicId = token.publicId;
614 var systemId = token.systemId;
615 var correct = token.correct;
616
617 if ((name != "html" || publicId != null ||
618 systemId != null && systemId != "about:legacy-compat")) {
619 parser.parseError("unknown-doctype");
620 }
621
622 if (publicId === null) {
623 publicId = "";
624 }
625
626 tree.insertDoctype(token);
627
628 if (publicId != "") {
629 publicId = asciiUpper2Lower(publicId);
630 }
631
632 if (!correct || token.name != "html"
633 || startsWithAny(publicId, const [
634 "+//silmaril//dtd html pro v0r11 19970101//",
635 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
636 "-//as//dtd html 3.0 aswedit + extensions//",
637 "-//ietf//dtd html 2.0 level 1//",
638 "-//ietf//dtd html 2.0 level 2//",
639 "-//ietf//dtd html 2.0 strict level 1//",
640 "-//ietf//dtd html 2.0 strict level 2//",
641 "-//ietf//dtd html 2.0 strict//",
642 "-//ietf//dtd html 2.0//",
643 "-//ietf//dtd html 2.1e//",
644 "-//ietf//dtd html 3.0//",
645 "-//ietf//dtd html 3.2 final//",
646 "-//ietf//dtd html 3.2//",
647 "-//ietf//dtd html 3//",
648 "-//ietf//dtd html level 0//",
649 "-//ietf//dtd html level 1//",
650 "-//ietf//dtd html level 2//",
651 "-//ietf//dtd html level 3//",
652 "-//ietf//dtd html strict level 0//",
653 "-//ietf//dtd html strict level 1//",
654 "-//ietf//dtd html strict level 2//",
655 "-//ietf//dtd html strict level 3//",
656 "-//ietf//dtd html strict//",
657 "-//ietf//dtd html//",
658 "-//metrius//dtd metrius presentational//",
659 "-//microsoft//dtd internet explorer 2.0 html strict//",
660 "-//microsoft//dtd internet explorer 2.0 html//",
661 "-//microsoft//dtd internet explorer 2.0 tables//",
662 "-//microsoft//dtd internet explorer 3.0 html strict//",
663 "-//microsoft//dtd internet explorer 3.0 html//",
664 "-//microsoft//dtd internet explorer 3.0 tables//",
665 "-//netscape comm. corp.//dtd html//",
666 "-//netscape comm. corp.//dtd strict html//",
667 "-//o'reilly and associates//dtd html 2.0//",
668 "-//o'reilly and associates//dtd html extended 1.0//",
669 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
670 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h tml 4.0//",
671 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ",
672 "-//spyglass//dtd html 2.0 extended//",
673 "-//sq//dtd html 2.0 hotmetal + extensions//",
674 "-//sun microsystems corp.//dtd hotjava html//",
675 "-//sun microsystems corp.//dtd hotjava strict html//",
676 "-//w3c//dtd html 3 1995-03-24//",
677 "-//w3c//dtd html 3.2 draft//",
678 "-//w3c//dtd html 3.2 final//",
679 "-//w3c//dtd html 3.2//",
680 "-//w3c//dtd html 3.2s draft//",
681 "-//w3c//dtd html 4.0 frameset//",
682 "-//w3c//dtd html 4.0 transitional//",
683 "-//w3c//dtd html experimental 19960712//",
684 "-//w3c//dtd html experimental 970421//",
685 "-//w3c//dtd w3 html//",
686 "-//w3o//dtd w3 html 3.0//",
687 "-//webtechs//dtd mozilla html 2.0//",
688 "-//webtechs//dtd mozilla html//"])
689 || const ["-//w3o//dtd w3 html strict 3.0//en//",
690 "-/w3c/dtd html 4.0 transitional/en",
691 "html"].indexOf(publicId) >= 0
692 || startsWithAny(publicId, const [
693 "-//w3c//dtd html 4.01 frameset//",
694 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null
695 || systemId != null && systemId.toLowerCase() ==
696 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
697
698 parser.compatMode = "quirks";
699 } else if (startsWithAny(publicId, const [
700 "-//w3c//dtd xhtml 1.0 frameset//",
701 "-//w3c//dtd xhtml 1.0 transitional//"])
702 || startsWithAny(publicId, const [
703 "-//w3c//dtd html 4.01 frameset//",
704 "-//w3c//dtd html 4.01 transitional//"]) &&
705 systemId != null) {
706 parser.compatMode = "limited quirks";
707 }
708 parser.phase = parser._beforeHtmlPhase;
709 }
710
711 void anythingElse() {
712 parser.compatMode = "quirks";
713 parser.phase = parser._beforeHtmlPhase;
714 }
715
716 Token processCharacters(CharactersToken token) {
717 parser.parseError("expected-doctype-but-got-chars");
718 anythingElse();
719 return token;
720 }
721
722 Token processStartTag(StartTagToken token) {
723 parser.parseError("expected-doctype-but-got-start-tag",
724 {"name": token.name});
725 anythingElse();
726 return token;
727 }
728
729 Token processEndTag(EndTagToken token) {
730 parser.parseError("expected-doctype-but-got-end-tag",
731 {"name": token.name});
732 anythingElse();
733 return token;
734 }
735
736 bool processEOF() {
737 parser.parseError("expected-doctype-but-got-eof");
738 anythingElse();
739 return true;
740 }
741 }
742
743
744 class BeforeHtmlPhase extends Phase {
745 BeforeHtmlPhase(parser) : super(parser);
746
747 // helper methods
748 void insertHtmlElement() {
749 tree.insertRoot(new StartTagToken("html", data: {}));
750 parser.phase = parser._beforeHeadPhase;
751 }
752
753 // other
754 bool processEOF() {
755 insertHtmlElement();
756 return true;
757 }
758
759 Token processComment(CommentToken token) {
760 tree.insertComment(token, tree.document);
761 }
762
763 Token processSpaceCharacters(SpaceCharactersToken token) {
764 }
765
766 Token processCharacters(CharactersToken token) {
767 insertHtmlElement();
768 return token;
769 }
770
771 Token processStartTag(StartTagToken token) {
772 if (token.name == "html") {
773 parser.firstStartTag = true;
774 }
775 insertHtmlElement();
776 return token;
777 }
778
779 Token processEndTag(EndTagToken token) {
780 switch (token.name) {
781 case "head": case "body": case "html": case "br":
782 insertHtmlElement();
783 return token;
784 default:
785 parser.parseError("unexpected-end-tag-before-html",
786 {"name": token.name});
787 return null;
788 }
789 }
790 }
791
792
793 class BeforeHeadPhase extends Phase {
794 BeforeHeadPhase(parser) : super(parser);
795
796 processStartTag(StartTagToken token) {
797 switch (token.name) {
798 case 'html': return startTagHtml(token);
799 case 'head': return startTagHead(token);
800 default: return startTagOther(token);
801 }
802 }
803
804 processEndTag(EndTagToken token) {
805 switch (token.name) {
806 case "head": case "body": case "html": case "br":
807 return endTagImplyHead(token);
808 default: return endTagOther(token);
809 }
810 }
811
812 bool processEOF() {
813 startTagHead(new StartTagToken("head", data: {}));
814 return true;
815 }
816
817 Token processSpaceCharacters(SpaceCharactersToken token) {
818 }
819
820 Token processCharacters(CharactersToken token) {
821 startTagHead(new StartTagToken("head", data: {}));
822 return token;
823 }
824
825 Token startTagHtml(StartTagToken token) {
826 return parser._inBodyPhase.processStartTag(token);
827 }
828
829 void startTagHead(StartTagToken token) {
830 tree.insertElement(token);
831 tree.headPointer = tree.openElements.last();
832 parser.phase = parser._inHeadPhase;
833 }
834
835 Token startTagOther(StartTagToken token) {
836 startTagHead(new StartTagToken("head", data: {}));
837 return token;
838 }
839
840 Token endTagImplyHead(EndTagToken token) {
841 startTagHead(new StartTagToken("head", data: {}));
842 return token;
843 }
844
845 void endTagOther(EndTagToken token) {
846 parser.parseError("end-tag-after-implied-root",
847 {"name": token.name});
848 }
849 }
850
851 class InHeadPhase extends Phase {
852 InHeadPhase(parser) : super(parser);
853
854 processStartTag(StartTagToken token) {
855 switch (token.name) {
856 case "html": return startTagHtml(token);
857 case "title": return startTagTitle(token);
858 case "noscript": case "noframes": case "style":
859 return startTagNoScriptNoFramesStyle(token);
860 case "script": return startTagScript(token);
861 case "base": case "basefont": case "bgsound": case "command": case "link":
862 return startTagBaseLinkCommand(token);
863 case "meta": return startTagMeta(token);
864 case "head": return startTagHead(token);
865 default: return startTagOther(token);
866 }
867 }
868
869 processEndTag(EndTagToken token) {
870 switch (token.name) {
871 case "head": return endTagHead(token);
872 case "br": case "html": case "body": return endTagHtmlBodyBr(token);
873 default: return endTagOther(token);
874 }
875 }
876
877 // the real thing
878 bool processEOF() {
879 anythingElse();
880 return true;
881 }
882
883 Token processCharacters(CharactersToken token) {
884 anythingElse();
885 return token;
886 }
887
888 Token startTagHtml(StartTagToken token) {
889 return parser._inBodyPhase.processStartTag(token);
890 }
891
892 void startTagHead(StartTagToken token) {
893 parser.parseError("two-heads-are-not-better-than-one");
894 }
895
896 void startTagBaseLinkCommand(StartTagToken token) {
897 tree.insertElement(token);
898 tree.openElements.removeLast();
899 token.selfClosingAcknowledged = true;
900 }
901
902 void startTagMeta(StartTagToken token) {
903 tree.insertElement(token);
904 tree.openElements.removeLast();
905 token.selfClosingAcknowledged = true;
906
907 var attributes = token.data;
908 if (!parser.tokenizer.stream.charEncodingCertain) {
909 var charset = attributes["charset"];
910 var content = attributes["content"];
911 if (charset != null) {
912 parser.tokenizer.stream.changeEncoding(charset);
913 } else if (content != null) {
914 var data = new EncodingBytes(content);
915 var codec = new ContentAttrParser(data).parse();
916 parser.tokenizer.stream.changeEncoding(codec);
917 }
918 }
919 }
920
921 void startTagTitle(StartTagToken token) {
922 parser.parseRCDataRawtext(token, "RCDATA");
923 }
924
925 void startTagNoScriptNoFramesStyle(StartTagToken token) {
926 // Need to decide whether to implement the scripting-disabled case
927 parser.parseRCDataRawtext(token, "RAWTEXT");
928 }
929
930 void startTagScript(StartTagToken token) {
931 tree.insertElement(token);
932 parser.tokenizer.state = parser.tokenizer.scriptDataState;
933 parser.originalPhase = parser.phase;
934 parser.phase = parser._textPhase;
935 }
936
937 Token startTagOther(StartTagToken token) {
938 anythingElse();
939 return token;
940 }
941
942 void endTagHead(EndTagToken token) {
943 var node = parser.tree.openElements.removeLast();
944 assert(node.tagName == "head");
945 parser.phase = parser._afterHeadPhase;
946 }
947
948 Token endTagHtmlBodyBr(EndTagToken token) {
949 anythingElse();
950 return token;
951 }
952
953 void endTagOther(EndTagToken token) {
954 parser.parseError("unexpected-end-tag", {"name": token.name});
955 }
956
957 void anythingElse() {
958 endTagHead(new EndTagToken("head", data: {}));
959 }
960 }
961
962
963 // XXX If we implement a parser for which scripting is disabled we need to
964 // implement this phase.
965 //
966 // class InHeadNoScriptPhase extends Phase {
967
968 class AfterHeadPhase extends Phase {
969 AfterHeadPhase(parser) : super(parser);
970
971 processStartTag(StartTagToken token) {
972 switch (token.name) {
973 case "html": return startTagHtml(token);
974 case "body": return startTagBody(token);
975 case "frameset": return startTagFrameset(token);
976 case "base": case "basefont": case "bgsound": case "link": case "meta":
977 case "noframes": case "script": case "style": case "title":
978 return startTagFromHead(token);
979 case "head": return startTagHead(token);
980 default: return startTagOther(token);
981 }
982 }
983
984 processEndTag(EndTagToken token) {
985 switch (token.name) {
986 case "body": case "html": case "br":
987 return endTagHtmlBodyBr(token);
988 default: return endTagOther(token);
989 }
990 }
991
992 bool processEOF() {
993 anythingElse();
994 return true;
995 }
996
997 Token processCharacters(CharactersToken token) {
998 anythingElse();
999 return token;
1000 }
1001
1002 Token startTagHtml(StartTagToken token) {
1003 return parser._inBodyPhase.processStartTag(token);
1004 }
1005
1006 void startTagBody(StartTagToken token) {
1007 parser.framesetOK = false;
1008 tree.insertElement(token);
1009 parser.phase = parser._inBodyPhase;
1010 }
1011
1012 void startTagFrameset(StartTagToken token) {
1013 tree.insertElement(token);
1014 parser.phase = parser._inFramesetPhase;
1015 }
1016
1017 void startTagFromHead(StartTagToken token) {
1018 parser.parseError("unexpected-start-tag-out-of-my-head",
1019 {"name": token.name});
1020 tree.openElements.add(tree.headPointer);
1021 parser._inHeadPhase.processStartTag(token);
1022 for (Node node in reversed(tree.openElements)) {
1023 if (node.tagName == "head") {
1024 removeFromList(tree.openElements, node);
1025 break;
1026 }
1027 }
1028 }
1029
1030 void startTagHead(StartTagToken token) {
1031 parser.parseError("unexpected-start-tag", {"name":token.name});
1032 }
1033
1034 Token startTagOther(StartTagToken token) {
1035 anythingElse();
1036 return token;
1037 }
1038
1039 Token endTagHtmlBodyBr(EndTagToken token) {
1040 anythingElse();
1041 return token;
1042 }
1043
1044 void endTagOther(EndTagToken token) {
1045 parser.parseError("unexpected-end-tag", {"name":token.name});
1046 }
1047
1048 void anythingElse() {
1049 tree.insertElement(new StartTagToken("body", data: {}));
1050 parser.phase = parser._inBodyPhase;
1051 parser.framesetOK = true;
1052 }
1053 }
1054
1055 typedef Token TokenProccessor(Token token);
1056
1057 class InBodyPhase extends Phase {
1058 TokenProccessor processSpaceCharactersFunc;
1059
1060 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody
1061 // the really-really-really-very crazy mode
1062 InBodyPhase(parser) : super(parser) {
1063 //Keep a ref to this for special handling of whitespace in <pre>
1064 processSpaceCharactersFunc = processSpaceCharactersNonPre;
1065 }
1066
1067 processStartTag(StartTagToken token) {
1068 switch (token.name) {
1069 case "html":
1070 return startTagHtml(token);
1071 case "base": case "basefont": case "bgsound": case "command": case "link":
1072 case "meta": case "noframes": case "script": case "style": case "title":
1073 return startTagProcessInHead(token);
1074 case "body":
1075 return startTagBody(token);
1076 case "frameset":
1077 return startTagFrameset(token);
1078 case "address": case "article": case "aside": case "blockquote":
1079 case "center": case "details": case "details": case "dir": case "div":
1080 case "dl": case "fieldset": case "figcaption": case "figure":
1081 case "footer": case "header": case "hgroup": case "menu": case "nav":
1082 case "ol": case "p": case "section": case "summary": case "ul":
1083 return startTagCloseP(token);
1084 // headingElements
1085 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1086 return startTagHeading(token);
1087 case "pre": case "listing":
1088 return startTagPreListing(token);
1089 case "form":
1090 return startTagForm(token);
1091 case "li": case "dd": case "dt":
1092 return startTagListItem(token);
1093 case "plaintext":
1094 return startTagPlaintext(token);
1095 case "a": return startTagA(token);
1096 case "b": case "big": case "code": case "em": case "font": case "i":
1097 case "s": case "small": case "strike": case "strong": case "tt": case "u":
1098 return startTagFormatting(token);
1099 case "nobr":
1100 return startTagNobr(token);
1101 case "button":
1102 return startTagButton(token);
1103 case "applet": case "marquee": case "object":
1104 return startTagAppletMarqueeObject(token);
1105 case "xmp":
1106 return startTagXmp(token);
1107 case "table":
1108 return startTagTable(token);
1109 case "area": case "br": case "embed": case "img": case "keygen":
1110 case "wbr":
1111 return startTagVoidFormatting(token);
1112 case "param": case "source": case "track":
1113 return startTagParamSource(token);
1114 case "input":
1115 return startTagInput(token);
1116 case "hr":
1117 return startTagHr(token);
1118 case "image":
1119 return startTagImage(token);
1120 case "isindex":
1121 return startTagIsIndex(token);
1122 case "textarea":
1123 return startTagTextarea(token);
1124 case "iframe":
1125 return startTagIFrame(token);
1126 case "noembed": case "noframes": case "noscript":
1127 return startTagRawtext(token);
1128 case "select":
1129 return startTagSelect(token);
1130 case "rp": case "rt":
1131 return startTagRpRt(token);
1132 case "option": case "optgroup":
1133 return startTagOpt(token);
1134 case "math":
1135 return startTagMath(token);
1136 case "svg":
1137 return startTagSvg(token);
1138 case "caption": case "col": case "colgroup": case "frame": case "head":
1139 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
1140 return startTagMisplaced(token);
1141 default: return startTagOther(token);
1142 }
1143 }
1144
1145 processEndTag(EndTagToken token) {
1146 switch (token.name) {
1147 case "body": return endTagBody(token);
1148 case "html": return endTagHtml(token);
1149 case "address": case "article": case "aside": case "blockquote":
1150 case "center": case "details": case "dir": case "div": case "dl":
1151 case "fieldset": case "figcaption": case "figure": case "footer":
1152 case "header": case "hgroup": case "listing": case "menu": case "nav":
1153 case "ol": case "pre": case "section": case "summary": case "ul":
1154 return endTagBlock(token);
1155 case "form": return endTagForm(token);
1156 case "p": return endTagP(token);
1157 case "dd": case "dt": case "li": return endTagListItem(token);
1158 // headingElements
1159 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
1160 return endTagHeading(token);
1161 case "a": case "b": case "big": case "code": case "em": case "font":
1162 case "i": case "nobr": case "s": case "small": case "strike":
1163 case "strong": case "tt": case "u":
1164 return endTagFormatting(token);
1165 case "applet": case "marquee": case "object":
1166 return endTagAppletMarqueeObject(token);
1167 case "br": return endTagBr(token);
1168 default: return endTagOther(token);
1169 }
1170 }
1171
1172 bool isMatchingFormattingElement(Node node1, Node node2) {
1173 if (node1.tagName != node2.tagName || node1.namespace != node2.namespace) {
1174 return false;
1175 } else if (node1.attributes.length != node2.attributes.length) {
1176 return false;
1177 } else {
1178 for (var key in node1.attributes.getKeys()) {
1179 if (node1.attributes[key] != node2.attributes[key]) {
1180 return false;
1181 }
1182 }
1183 }
1184 return true;
1185 }
1186
1187 // helper
1188 void addFormattingElement(token) {
1189 tree.insertElement(token);
1190 var element = tree.openElements.last();
1191
1192 var matchingElements = [];
1193 for (Node node in reversed(tree.activeFormattingElements)) {
1194 if (node === Marker) {
1195 break;
1196 } else if (isMatchingFormattingElement(node, element)) {
1197 matchingElements.add(node);
1198 }
1199 }
1200
1201 assert(matchingElements.length <= 3);
1202 if (matchingElements.length == 3) {
1203 removeFromList(tree.activeFormattingElements, matchingElements.last());
1204 }
1205 tree.activeFormattingElements.add(element);
1206 }
1207
1208 // the real deal
1209 bool processEOF() {
1210 for (Node node in reversed(tree.openElements)) {
1211 switch (node.tagName) {
1212 case "dd": case "dt": case "li": case "p": case "tbody": case "td":
1213 case "tfoot": case "th": case "thead": case "tr": case "body":
1214 case "html":
1215 continue;
1216 }
1217 parser.parseError("expected-closing-tag-but-got-eof");
1218 break;
1219 }
1220 //Stop parsing
1221 return false;
1222 }
1223
1224 Token processSpaceCharactersDropNewline(token) {
1225 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
1226 // want to drop leading newlines
1227 var data = token.data;
1228 processSpaceCharactersFunc = processSpaceCharactersNonPre;
1229 if (data.startsWith("\n")) {
1230 var lastOpen = tree.openElements.last();
1231 if (const ["pre", "listing", "textarea"].indexOf(lastOpen.tagName) >= 0
1232 && !lastOpen.hasContent()) {
1233 data = data.substring(1);
1234 }
1235 }
1236 if (data.length > 0) {
1237 tree.reconstructActiveFormattingElements();
1238 tree.insertText(data);
1239 }
1240 }
1241
1242 Token processCharacters(CharactersToken token) {
1243 if (token.data == "\u0000") {
1244 //The tokenizer should always emit null on its own
1245 return null;
1246 }
1247 tree.reconstructActiveFormattingElements();
1248 tree.insertText(token.data);
1249 if (parser.framesetOK && !allWhitespace(token.data)) {
1250 parser.framesetOK = false;
1251 }
1252 }
1253
1254 Token processSpaceCharactersNonPre(token) {
1255 tree.reconstructActiveFormattingElements();
1256 tree.insertText(token.data);
1257 }
1258
1259 Token processSpaceCharacters(token) => processSpaceCharactersFunc(token);
1260
1261 Token startTagProcessInHead(StartTagToken token) {
1262 return parser._inHeadPhase.processStartTag(token);
1263 }
1264
1265 void startTagBody(StartTagToken token) {
1266 parser.parseError("unexpected-start-tag", {"name": "body"});
1267 if (tree.openElements.length == 1
1268 || tree.openElements[1].tagName != "body") {
1269 assert(parser.innerHTMLMode);
1270 } else {
1271 parser.framesetOK = false;
1272 token.data.forEach((attr, value) {
1273 tree.openElements[1].attributes.putIfAbsent(attr, () => value);
1274 });
1275 }
1276 }
1277
1278 void startTagFrameset(StartTagToken token) {
1279 parser.parseError("unexpected-start-tag", {"name": "frameset"});
1280 if ((tree.openElements.length == 1 ||
1281 tree.openElements[1].tagName != "body")) {
1282 assert(parser.innerHTMLMode);
1283 } else if (parser.framesetOK) {
1284 if (tree.openElements[1].parent != null) {
1285 tree.openElements[1].parent.$dom_removeChild(tree.openElements[1]);
1286 }
1287 while (tree.openElements.last().tagName != "html") {
1288 tree.openElements.removeLast();
1289 }
1290 tree.insertElement(token);
1291 parser.phase = parser._inFramesetPhase;
1292 }
1293 }
1294
1295 void startTagCloseP(StartTagToken token) {
1296 if (tree.elementInScope("p", variant: "button")) {
1297 endTagP(new EndTagToken("p", data: {}));
1298 }
1299 tree.insertElement(token);
1300 }
1301
1302 void startTagPreListing(StartTagToken token) {
1303 if (tree.elementInScope("p", variant: "button")) {
1304 endTagP(new EndTagToken("p", data: {}));
1305 }
1306 tree.insertElement(token);
1307 parser.framesetOK = false;
1308 processSpaceCharactersFunc = processSpaceCharactersDropNewline;
1309 }
1310
1311 void startTagForm(StartTagToken token) {
1312 if (tree.formPointer != null) {
1313 parser.parseError("unexpected-start-tag", {"name": "form"});
1314 } else {
1315 if (tree.elementInScope("p", variant: "button")) {
1316 endTagP(new EndTagToken("p", data: {}));
1317 }
1318 tree.insertElement(token);
1319 tree.formPointer = tree.openElements.last();
1320 }
1321 }
1322
1323 void startTagListItem(StartTagToken token) {
1324 parser.framesetOK = false;
1325
1326 final stopNamesMap = const {"li": const ["li"],
1327 "dt": const ["dt", "dd"],
1328 "dd": const ["dt", "dd"]};
1329 var stopNames = stopNamesMap[token.name];
1330 for (Node node in reversed(tree.openElements)) {
1331 if (stopNames.indexOf(node.tagName) >= 0) {
1332 parser.phase.processEndTag(new EndTagToken(node.tagName, data: {}));
1333 break;
1334 }
1335 if (specialElements.indexOf(node.nameTuple) >= 0 &&
1336 const ["address", "div", "p"].indexOf(node.tagName) == -1) {
1337 break;
1338 }
1339 }
1340
1341 if (tree.elementInScope("p", variant: "button")) {
1342 parser.phase.processEndTag(new EndTagToken("p", data: {}));
1343 }
1344
1345 tree.insertElement(token);
1346 }
1347
1348 void startTagPlaintext(StartTagToken token) {
1349 if (tree.elementInScope("p", variant: "button")) {
1350 endTagP(new EndTagToken("p", data: {}));
1351 }
1352 tree.insertElement(token);
1353 parser.tokenizer.state = parser.tokenizer.plaintextState;
1354 }
1355
1356 void startTagHeading(StartTagToken token) {
1357 if (tree.elementInScope("p", variant: "button")) {
1358 endTagP(new EndTagToken("p", data: {}));
1359 }
1360 if (headingElements.indexOf(tree.openElements.last().tagName) >= 0) {
1361 parser.parseError("unexpected-start-tag", {"name": token.name});
1362 tree.openElements.removeLast();
1363 }
1364 tree.insertElement(token);
1365 }
1366
1367 void startTagA(StartTagToken token) {
1368 var afeAElement = tree.elementInActiveFormattingElements("a");
1369 if (afeAElement != null) {
1370 parser.parseError("unexpected-start-tag-implies-end-tag",
1371 {"startName": "a", "endName": "a"});
1372 endTagFormatting(new EndTagToken("a", data: {}));
1373 removeFromList(tree.openElements, afeAElement);
1374 removeFromList(tree.activeFormattingElements, afeAElement);
1375 }
1376 tree.reconstructActiveFormattingElements();
1377 addFormattingElement(token);
1378 }
1379
1380 void startTagFormatting(StartTagToken token) {
1381 tree.reconstructActiveFormattingElements();
1382 addFormattingElement(token);
1383 }
1384
1385 void startTagNobr(StartTagToken token) {
1386 tree.reconstructActiveFormattingElements();
1387 if (tree.elementInScope("nobr")) {
1388 parser.parseError("unexpected-start-tag-implies-end-tag",
1389 {"startName": "nobr", "endName": "nobr"});
1390 processEndTag(new EndTagToken("nobr", data: {}));
1391 // XXX Need tests that trigger the following
1392 tree.reconstructActiveFormattingElements();
1393 }
1394 addFormattingElement(token);
1395 }
1396
1397 Token startTagButton(StartTagToken token) {
1398 if (tree.elementInScope("button")) {
1399 parser.parseError("unexpected-start-tag-implies-end-tag",
1400 {"startName": "button", "endName": "button"});
1401 processEndTag(new EndTagToken("button", data: {}));
1402 return token;
1403 } else {
1404 tree.reconstructActiveFormattingElements();
1405 tree.insertElement(token);
1406 parser.framesetOK = false;
1407 }
1408 }
1409
1410 void startTagAppletMarqueeObject(StartTagToken token) {
1411 tree.reconstructActiveFormattingElements();
1412 tree.insertElement(token);
1413 tree.activeFormattingElements.add(Marker);
1414 parser.framesetOK = false;
1415 }
1416
1417 void startTagXmp(StartTagToken token) {
1418 if (tree.elementInScope("p", variant: "button")) {
1419 endTagP(new EndTagToken("p", data: {}));
1420 }
1421 tree.reconstructActiveFormattingElements();
1422 parser.framesetOK = false;
1423 parser.parseRCDataRawtext(token, "RAWTEXT");
1424 }
1425
1426 void startTagTable(StartTagToken token) {
1427 if (parser.compatMode != "quirks") {
1428 if (tree.elementInScope("p", variant: "button")) {
1429 processEndTag(new EndTagToken("p", data: {}));
1430 }
1431 }
1432 tree.insertElement(token);
1433 parser.framesetOK = false;
1434 parser.phase = parser._inTablePhase;
1435 }
1436
1437 void startTagVoidFormatting(StartTagToken token) {
1438 tree.reconstructActiveFormattingElements();
1439 tree.insertElement(token);
1440 tree.openElements.removeLast();
1441 token.selfClosingAcknowledged = true;
1442 parser.framesetOK = false;
1443 }
1444
1445 void startTagInput(StartTagToken token) {
1446 var savedFramesetOK = parser.framesetOK;
1447 startTagVoidFormatting(token);
1448 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
1449 //input type=hidden doesn't change framesetOK
1450 parser.framesetOK = savedFramesetOK;
1451 }
1452 }
1453
1454 void startTagParamSource(StartTagToken token) {
1455 tree.insertElement(token);
1456 tree.openElements.removeLast();
1457 token.selfClosingAcknowledged = true;
1458 }
1459
1460 void startTagHr(StartTagToken token) {
1461 if (tree.elementInScope("p", variant: "button")) {
1462 endTagP(new EndTagToken("p", data: {}));
1463 }
1464 tree.insertElement(token);
1465 tree.openElements.removeLast();
1466 token.selfClosingAcknowledged = true;
1467 parser.framesetOK = false;
1468 }
1469
1470 void startTagImage(StartTagToken token) {
1471 // No really...
1472 parser.parseError("unexpected-start-tag-treated-as",
1473 {"originalName": "image", "newName": "img"});
1474 processStartTag(new StartTagToken("img", data: token.data,
1475 selfClosing: token.selfClosing));
1476 }
1477
1478 void startTagIsIndex(StartTagToken token) {
1479 parser.parseError("deprecated-tag", {"name": "isindex"});
1480 if (tree.formPointer != null) {
1481 return;
1482 }
1483 var formAttrs = {};
1484 var dataAction = token.data["action"];
1485 if (dataAction != null) {
1486 formAttrs["action"] = dataAction;
1487 }
1488 processStartTag(new StartTagToken("form", data: formAttrs));
1489 processStartTag(new StartTagToken("hr", data: {}));
1490 processStartTag(new StartTagToken("label", data: {}));
1491 // XXX Localization ...
1492 var prompt = token.data["prompt"];
1493 if (prompt == null) {
1494 prompt = "This is a searchable index. Enter search keywords: ";
1495 }
1496 processCharacters(new CharactersToken(prompt));
1497 var attributes = new Map.from(token.data);
1498 attributes.remove('action');
1499 attributes.remove('prompt');
1500 attributes["name"] = "isindex";
1501 processStartTag(new StartTagToken("input",
1502 data: attributes, selfClosing: token.selfClosing));
1503 processEndTag(new EndTagToken("label", data: {}));
1504 processStartTag(new StartTagToken("hr", data: {}));
1505 processEndTag(new EndTagToken("form", data: {}));
1506 }
1507
1508 void startTagTextarea(StartTagToken token) {
1509 tree.insertElement(token);
1510 parser.tokenizer.state = parser.tokenizer.rcdataState;
1511 processSpaceCharactersFunc = processSpaceCharactersDropNewline;
1512 parser.framesetOK = false;
1513 }
1514
1515 void startTagIFrame(StartTagToken token) {
1516 parser.framesetOK = false;
1517 startTagRawtext(token);
1518 }
1519
1520 /** iframe, noembed noframes, noscript(if scripting enabled). */
1521 void startTagRawtext(StartTagToken token) {
1522 parser.parseRCDataRawtext(token, "RAWTEXT");
1523 }
1524
1525 void startTagOpt(StartTagToken token) {
1526 if (tree.openElements.last().tagName == "option") {
1527 parser.phase.processEndTag(new EndTagToken("option", data: {}));
1528 }
1529 tree.reconstructActiveFormattingElements();
1530 parser.tree.insertElement(token);
1531 }
1532
1533 void startTagSelect(StartTagToken token) {
1534 tree.reconstructActiveFormattingElements();
1535 tree.insertElement(token);
1536 parser.framesetOK = false;
1537
1538 if (parser._inTablePhase == parser.phase ||
1539 parser._inCaptionPhase == parser.phase ||
1540 parser._inColumnGroupPhase == parser.phase ||
1541 parser._inTableBodyPhase == parser.phase ||
1542 parser._inRowPhase == parser.phase ||
1543 parser._inCellPhase == parser.phase) {
1544 parser.phase = parser._inSelectInTablePhase;
1545 } else {
1546 parser.phase = parser._inSelectPhase;
1547 }
1548 }
1549
1550 void startTagRpRt(StartTagToken token) {
1551 if (tree.elementInScope("ruby")) {
1552 tree.generateImpliedEndTags();
1553 if (tree.openElements.last().tagName != "ruby") {
1554 parser.parseError();
1555 }
1556 }
1557 tree.insertElement(token);
1558 }
1559
1560 void startTagMath(StartTagToken token) {
1561 tree.reconstructActiveFormattingElements();
1562 parser.adjustMathMLAttributes(token);
1563 parser.adjustForeignAttributes(token);
1564 token.namespace = Namespaces.mathml;
1565 tree.insertElement(token);
1566 //Need to get the parse error right for the case where the token
1567 //has a namespace not equal to the xmlns attribute
1568 if (token.selfClosing) {
1569 tree.openElements.removeLast();
1570 token.selfClosingAcknowledged = true;
1571 }
1572 }
1573
1574 void startTagSvg(StartTagToken token) {
1575 tree.reconstructActiveFormattingElements();
1576 parser.adjustSVGAttributes(token);
1577 parser.adjustForeignAttributes(token);
1578 token.namespace = Namespaces.svg;
1579 tree.insertElement(token);
1580 //Need to get the parse error right for the case where the token
1581 //has a namespace not equal to the xmlns attribute
1582 if (token.selfClosing) {
1583 tree.openElements.removeLast();
1584 token.selfClosingAcknowledged = true;
1585 }
1586 }
1587
1588 /**
1589 * Elements that should be children of other elements that have a
1590 * different insertion mode; here they are ignored
1591 * "caption", "col", "colgroup", "frame", "frameset", "head",
1592 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
1593 * "tr", "noscript"
1594 */
1595 void startTagMisplaced(StartTagToken token) {
1596 parser.parseError("unexpected-start-tag-ignored",
1597 {"name": token.name});
1598 }
1599
1600 Token startTagOther(StartTagToken token) {
1601 tree.reconstructActiveFormattingElements();
1602 tree.insertElement(token);
1603 }
1604
1605 void endTagP(EndTagToken token) {
1606 if (!tree.elementInScope("p", variant: "button")) {
1607 startTagCloseP(new StartTagToken("p", data: {}));
1608 parser.parseError("unexpected-end-tag", {"name": "p"});
1609 endTagP(new EndTagToken("p", data: {}));
1610 } else {
1611 tree.generateImpliedEndTags("p");
1612 if (tree.openElements.last().tagName != "p") {
1613 parser.parseError("unexpected-end-tag", {"name": "p"});
1614 }
1615 popOpenElementsUntil("p");
1616 }
1617 }
1618
1619 void endTagBody(EndTagToken token) {
1620 if (!tree.elementInScope("body")) {
1621 parser.parseError();
1622 return;
1623 } else if (tree.openElements.last().tagName != "body") {
1624 for (Node node in slice(tree.openElements, 2)) {
1625 switch (node.tagName) {
1626 case "dd": case "dt": case "li": case "optgroup": case "option":
1627 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot":
1628 case "th": case "thead": case "tr": case "body": case "html":
1629 continue;
1630 }
1631 // Not sure this is the correct name for the parse error
1632 parser.parseError("expected-one-end-tag-but-got-another",
1633 {"expectedName": "body", "gotName": node.tagName});
1634 break;
1635 }
1636 }
1637 parser.phase = parser._afterBodyPhase;
1638 }
1639
1640 Token endTagHtml(EndTagToken token) {
1641 //We repeat the test for the body end tag token being ignored here
1642 if (tree.elementInScope("body")) {
1643 endTagBody(new EndTagToken("body", data: {}));
1644 return token;
1645 }
1646 }
1647
1648 void endTagBlock(EndTagToken token) {
1649 //Put us back in the right whitespace handling mode
1650 if (token.name == "pre") {
1651 processSpaceCharactersFunc = processSpaceCharactersNonPre;
1652 }
1653 var inScope = tree.elementInScope(token.name);
1654 if (inScope) {
1655 tree.generateImpliedEndTags();
1656 }
1657 if (tree.openElements.last().tagName != token.name) {
1658 parser.parseError("end-tag-too-early", {"name": token.name});
1659 }
1660 if (inScope) {
1661 popOpenElementsUntil(token.name);
1662 }
1663 }
1664
1665 void endTagForm(EndTagToken token) {
1666 var node = tree.formPointer;
1667 tree.formPointer = null;
1668 if (node === null || !tree.elementInScope(node)) {
1669 parser.parseError("unexpected-end-tag", {"name": "form"});
1670 } else {
1671 tree.generateImpliedEndTags();
1672 if (tree.openElements.last() != node) {
1673 parser.parseError("end-tag-too-early-ignored", {"name": "form"});
1674 }
1675 removeFromList(tree.openElements, node);
1676 }
1677 }
1678
1679 void endTagListItem(EndTagToken token) {
1680 var variant;
1681 if (token.name == "li") {
1682 variant = "list";
1683 } else {
1684 variant = null;
1685 }
1686 if (!tree.elementInScope(token.name, variant: variant)) {
1687 parser.parseError("unexpected-end-tag", {"name": token.name});
1688 } else {
1689 tree.generateImpliedEndTags(exclude: token.name);
1690 if (tree.openElements.last().tagName != token.name) {
1691 parser.parseError("end-tag-too-early", {"name": token.name});
1692 }
1693 popOpenElementsUntil(token.name);
1694 }
1695 }
1696
1697 void endTagHeading(EndTagToken token) {
1698 for (var item in headingElements) {
1699 if (tree.elementInScope(item)) {
1700 tree.generateImpliedEndTags();
1701 break;
1702 }
1703 }
1704 if (tree.openElements.last().tagName != token.name) {
1705 parser.parseError("end-tag-too-early", {"name": token.name});
1706 }
1707
1708 for (var item in headingElements) {
1709 if (tree.elementInScope(item)) {
1710 item = tree.openElements.removeLast();
1711 while (headingElements.indexOf(item.tagName) == -1) {
1712 item = tree.openElements.removeLast();
1713 }
1714 break;
1715 }
1716 }
1717 }
1718
1719 /** The much-feared adoption agency algorithm. */
1720 endTagFormatting(EndTagToken token) {
1721 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#adoptionAgency
1722 // TODO(jmesserly): the comments here don't match the numbered steps in the
1723 // updated spec. This needs a pass over it to verify that it still matches.
1724 // In particular the html5lib Python code skiped "step 4", I'm not sure why.
1725 // XXX Better parseError messages appreciated.
1726 int outerLoopCounter = 0;
1727 while (outerLoopCounter < 8) {
1728 outerLoopCounter += 1;
1729
1730 // Step 1 paragraph 1
1731 var formattingElement = tree.elementInActiveFormattingElements(
1732 token.name);
1733 if (formattingElement == null ||
1734 (tree.openElements.indexOf(formattingElement) >= 0 &&
1735 !tree.elementInScope(formattingElement.tagName))) {
1736 parser.parseError("adoption-agency-1.1", {"name": token.name});
1737 return;
1738 // Step 1 paragraph 2
1739 } else if (tree.openElements.indexOf(formattingElement) == -1) {
1740 parser.parseError("adoption-agency-1.2", {"name": token.name});
1741 removeFromList(tree.activeFormattingElements, formattingElement);
1742 return;
1743 }
1744
1745 // Step 1 paragraph 3
1746 if (formattingElement != tree.openElements.last()) {
1747 parser.parseError("adoption-agency-1.3", {"name": token.name});
1748 }
1749
1750 // Step 2
1751 // Start of the adoption agency algorithm proper
1752 var afeIndex = tree.openElements.indexOf(formattingElement);
1753 Node furthestBlock = null;
1754 for (Node element in slice(tree.openElements, afeIndex)) {
1755 if (specialElements.indexOf(element.nameTuple) >= 0) {
1756 furthestBlock = element;
1757 break;
1758 }
1759 }
1760 // Step 3
1761 if (furthestBlock === null) {
1762 var element = tree.openElements.removeLast();
1763 while (element != formattingElement) {
1764 element = tree.openElements.removeLast();
1765 }
1766 removeFromList(tree.activeFormattingElements, element);
1767 return;
1768 }
1769
1770 var commonAncestor = tree.openElements[afeIndex - 1];
1771
1772 // Step 5
1773 // The bookmark is supposed to help us identify where to reinsert
1774 // nodes in step 12. We have to ensure that we reinsert nodes after
1775 // the node before the active formatting element. Note the bookmark
1776 // can move in step 7.4
1777 var bookmark = tree.activeFormattingElements.indexOf(formattingElement);
1778
1779 // Step 6
1780 Node lastNode = furthestBlock;
1781 var node = furthestBlock;
1782 int innerLoopCounter = 0;
1783
1784 var index = tree.openElements.indexOf(node);
1785 while (innerLoopCounter < 3) {
1786 innerLoopCounter += 1;
1787
1788 // Node is element before node in open elements
1789 index -= 1;
1790 node = tree.openElements[index];
1791 if (tree.activeFormattingElements.indexOf(node) == -1) {
1792 removeFromList(tree.openElements, node);
1793 continue;
1794 }
1795 // Step 6.3
1796 if (node == formattingElement) {
1797 break;
1798 }
1799 // Step 6.4
1800 if (lastNode == furthestBlock) {
1801 bookmark = (tree.activeFormattingElements.indexOf(node) + 1);
1802 }
1803 // Step 6.5
1804 //cite = node.parent
1805 var clone = node.clone();
1806 // Replace node with clone
1807 tree.activeFormattingElements[
1808 tree.activeFormattingElements.indexOf(node)] = clone;
1809 tree.openElements[tree.openElements.indexOf(node)] = clone;
1810 node = clone;
1811
1812 // Step 6.6
1813 // Remove lastNode from its parents, if any
1814 if (lastNode.parent != null) {
1815 lastNode.parent.$dom_removeChild(lastNode);
1816 }
1817 node.$dom_appendChild(lastNode);
1818 // Step 7.7
1819 lastNode = node;
1820 // End of inner loop
1821 }
1822
1823 // Step 7
1824 // Foster parent lastNode if commonAncestor is a
1825 // table, tbody, tfoot, thead, or tr we need to foster parent the
1826 // lastNode
1827 if (lastNode.parent != null) {
1828 lastNode.parent.$dom_removeChild(lastNode);
1829 }
1830
1831 if (const ["table", "tbody", "tfoot", "thead", "tr"].indexOf(
1832 commonAncestor.tagName) >= 0) {
1833 var nodePos = tree.getTableMisnestedNodePosition();
1834 nodePos[0].insertBefore(lastNode, nodePos[1]);
1835 } else {
1836 commonAncestor.$dom_appendChild(lastNode);
1837 }
1838
1839 // Step 8
1840 var clone = formattingElement.clone();
1841
1842 // Step 9
1843 furthestBlock.reparentChildren(clone);
1844
1845 // Step 10
1846 furthestBlock.$dom_appendChild(clone);
1847
1848 // Step 11
1849 removeFromList(tree.activeFormattingElements, formattingElement);
1850 tree.activeFormattingElements.insertRange(
1851 min(bookmark, tree.activeFormattingElements.length), 1, clone);
1852
1853 // Step 12
1854 removeFromList(tree.openElements, formattingElement);
1855 tree.openElements.insertRange(
1856 tree.openElements.indexOf(furthestBlock) + 1, 1, clone);
1857 }
1858 }
1859
1860 void endTagAppletMarqueeObject(EndTagToken token) {
1861 if (tree.elementInScope(token.name)) {
1862 tree.generateImpliedEndTags();
1863 }
1864 if (tree.openElements.last().tagName != token.name) {
1865 parser.parseError("end-tag-too-early", {"name": token.name});
1866 }
1867 if (tree.elementInScope(token.name)) {
1868 popOpenElementsUntil(token.name);
1869 tree.clearActiveFormattingElements();
1870 }
1871 }
1872
1873 void endTagBr(EndTagToken token) {
1874 parser.parseError("unexpected-end-tag-treated-as",
1875 {"originalName": "br", "newName": "br element"});
1876 tree.reconstructActiveFormattingElements();
1877 tree.insertElement(new StartTagToken("br", data: {}));
1878 tree.openElements.removeLast();
1879 }
1880
1881 void endTagOther(EndTagToken token) {
1882 for (Node node in reversed(tree.openElements)) {
1883 if (node.tagName == token.name) {
1884 tree.generateImpliedEndTags(exclude: token.name);
1885 if (tree.openElements.last().tagName != token.name) {
1886 parser.parseError("unexpected-end-tag", {"name": token.name});
1887 }
1888 while (tree.openElements.removeLast() != node);
1889 break;
1890 } else {
1891 if (specialElements.indexOf(node.nameTuple) >= 0) {
1892 parser.parseError("unexpected-end-tag", {"name": token.name});
1893 break;
1894 }
1895 }
1896 }
1897 }
1898 }
1899
1900
1901 class TextPhase extends Phase {
1902 TextPhase(parser) : super(parser);
1903
1904 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name
1905 processStartTag(StartTagToken token) { assert(false); }
1906
1907 processEndTag(EndTagToken token) {
1908 if (token.name == 'script') return endTagScript(token);
1909 return endTagOther(token);
1910 }
1911
1912 Token processCharacters(CharactersToken token) {
1913 tree.insertText(token.data);
1914 }
1915
1916 bool processEOF() {
1917 parser.parseError("expected-named-closing-tag-but-got-eof",
1918 {'name': tree.openElements.last().tagName});
1919 tree.openElements.removeLast();
1920 parser.phase = parser.originalPhase;
1921 return true;
1922 }
1923
1924 void endTagScript(EndTagToken token) {
1925 var node = tree.openElements.removeLast();
1926 assert(node.tagName == "script");
1927 parser.phase = parser.originalPhase;
1928 //The rest of this method is all stuff that only happens if
1929 //document.write works
1930 }
1931
1932 void endTagOther(EndTagToken token) {
1933 var node = tree.openElements.removeLast();
1934 parser.phase = parser.originalPhase;
1935 }
1936 }
1937
1938 class InTablePhase extends Phase {
1939 // http://www.whatwg.org/specs/web-apps/current-work///in-table
1940 InTablePhase(parser) : super(parser);
1941
1942 processStartTag(StartTagToken token) {
1943 switch (token.name) {
1944 case "html": return startTagHtml(token);
1945 case "caption": return startTagCaption(token);
1946 case "colgroup": return startTagColgroup(token);
1947 case "col": return startTagCol(token);
1948 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token);
1949 case "td": case "th": case "tr": return startTagImplyTbody(token);
1950 case "table": return startTagTable(token);
1951 case "style": case "script": return startTagStyleScript(token);
1952 case "input": return startTagInput(token);
1953 case "form": return startTagForm(token);
1954 default: return startTagOther(token);
1955 }
1956 }
1957
1958 processEndTag(EndTagToken token) {
1959 switch (token.name) {
1960 case "table": return endTagTable(token);
1961 case "body": case "caption": case "col": case "colgroup": case "html":
1962 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":
1963 return endTagIgnore(token);
1964 default: return endTagOther(token);
1965 }
1966 }
1967
1968 // helper methods
1969 void clearStackToTableContext() {
1970 // "clear the stack back to a table context"
1971 while (tree.openElements.last().tagName != "table" &&
1972 tree.openElements.last().tagName != "html") {
1973 //parser.parseError("unexpected-implied-end-tag-in-table",
1974 // {"name": tree.openElements.last().name})
1975 tree.openElements.removeLast();
1976 }
1977 // When the current node is <html> it's an innerHTML case
1978 }
1979
1980 // processing methods
1981 bool processEOF() {
1982 if (tree.openElements.last().tagName != "html") {
1983 parser.parseError("eof-in-table");
1984 } else {
1985 assert(parser.innerHTMLMode);
1986 }
1987 //Stop parsing
1988 return false;
1989 }
1990
1991 Token processSpaceCharacters(SpaceCharactersToken token) {
1992 var originalPhase = parser.phase;
1993 parser.phase = parser._inTableTextPhase;
1994 parser._inTableTextPhase.originalPhase = originalPhase;
1995 parser.phase.processSpaceCharacters(token);
1996 }
1997
1998 Token processCharacters(CharactersToken token) {
1999 var originalPhase = parser.phase;
2000 parser.phase = parser._inTableTextPhase;
2001 parser._inTableTextPhase.originalPhase = originalPhase;
2002 parser.phase.processCharacters(token);
2003 }
2004
2005 void insertText(CharactersToken token) {
2006 // If we get here there must be at least one non-whitespace character
2007 // Do the table magic!
2008 tree.insertFromTable = true;
2009 parser._inBodyPhase.processCharacters(token);
2010 tree.insertFromTable = false;
2011 }
2012
2013 void startTagCaption(StartTagToken token) {
2014 clearStackToTableContext();
2015 tree.activeFormattingElements.add(Marker);
2016 tree.insertElement(token);
2017 parser.phase = parser._inCaptionPhase;
2018 }
2019
2020 void startTagColgroup(StartTagToken token) {
2021 clearStackToTableContext();
2022 tree.insertElement(token);
2023 parser.phase = parser._inColumnGroupPhase;
2024 }
2025
2026 Token startTagCol(StartTagToken token) {
2027 startTagColgroup(new StartTagToken("colgroup", data: {}));
2028 return token;
2029 }
2030
2031 void startTagRowGroup(StartTagToken token) {
2032 clearStackToTableContext();
2033 tree.insertElement(token);
2034 parser.phase = parser._inTableBodyPhase;
2035 }
2036
2037 Token startTagImplyTbody(StartTagToken token) {
2038 startTagRowGroup(new StartTagToken("tbody", data: {}));
2039 return token;
2040 }
2041
2042 Token startTagTable(StartTagToken token) {
2043 parser.parseError("unexpected-start-tag-implies-end-tag",
2044 {"startName": "table", "endName": "table"});
2045 parser.phase.processEndTag(new EndTagToken("table", data: {}));
2046 if (!parser.innerHTMLMode) {
2047 return token;
2048 }
2049 }
2050
2051 Token startTagStyleScript(StartTagToken token) {
2052 return parser._inHeadPhase.processStartTag(token);
2053 }
2054
2055 void startTagInput(StartTagToken token) {
2056 if (asciiUpper2Lower(token.data["type"]) == "hidden") {
2057 parser.parseError("unexpected-hidden-input-in-table");
2058 tree.insertElement(token);
2059 // XXX associate with form
2060 tree.openElements.removeLast();
2061 } else {
2062 startTagOther(token);
2063 }
2064 }
2065
2066 void startTagForm(StartTagToken token) {
2067 parser.parseError("unexpected-form-in-table");
2068 if (tree.formPointer === null) {
2069 tree.insertElement(token);
2070 tree.formPointer = tree.openElements.last();
2071 tree.openElements.removeLast();
2072 }
2073 }
2074
2075 void startTagOther(StartTagToken token) {
2076 parser.parseError("unexpected-start-tag-implies-table-voodoo",
2077 {"name": token.name});
2078 // Do the table magic!
2079 tree.insertFromTable = true;
2080 parser._inBodyPhase.processStartTag(token);
2081 tree.insertFromTable = false;
2082 }
2083
2084 void endTagTable(EndTagToken token) {
2085 if (tree.elementInScope("table", variant: "table")) {
2086 tree.generateImpliedEndTags();
2087 if (tree.openElements.last().tagName != "table") {
2088 parser.parseError("end-tag-too-early-named", {"gotName": "table",
2089 "expectedName": tree.openElements.last().tagName});
2090 }
2091 while (tree.openElements.last().tagName != "table") {
2092 tree.openElements.removeLast();
2093 }
2094 tree.openElements.removeLast();
2095 parser.resetInsertionMode();
2096 } else {
2097 // innerHTML case
2098 assert(parser.innerHTMLMode);
2099 parser.parseError();
2100 }
2101 }
2102
2103 void endTagIgnore(EndTagToken token) {
2104 parser.parseError("unexpected-end-tag", {"name": token.name});
2105 }
2106
2107 void endTagOther(EndTagToken token) {
2108 parser.parseError("unexpected-end-tag-implies-table-voodoo",
2109 {"name": token.name});
2110 // Do the table magic!
2111 tree.insertFromTable = true;
2112 parser._inBodyPhase.processEndTag(token);
2113 tree.insertFromTable = false;
2114 }
2115 }
2116
2117 class InTableTextPhase extends Phase {
2118 Phase originalPhase;
2119 List<StringToken> characterTokens;
2120
2121 InTableTextPhase(parser)
2122 : characterTokens = <StringToken>[],
2123 super(parser);
2124
2125 void flushCharacters() {
2126 var data = joinStr(characterTokens.map((t) => t.data));
2127 if (!allWhitespace(data)) {
2128 parser._inTablePhase.insertText(new CharactersToken(data));
2129 } else if (data.length > 0) {
2130 tree.insertText(data);
2131 }
2132 characterTokens = <StringToken>[];
2133 }
2134
2135 Token processComment(CommentToken token) {
2136 flushCharacters();
2137 parser.phase = originalPhase;
2138 return token;
2139 }
2140
2141 bool processEOF() {
2142 flushCharacters();
2143 parser.phase = originalPhase;
2144 return true;
2145 }
2146
2147 Token processCharacters(CharactersToken token) {
2148 if (token.data == "\u0000") {
2149 return null;
2150 }
2151 characterTokens.add(token);
2152 }
2153
2154 Token processSpaceCharacters(SpaceCharactersToken token) {
2155 //pretty sure we should never reach here
2156 characterTokens.add(token);
2157 // XXX assert(false);
2158 }
2159
2160 Token processStartTag(StartTagToken token) {
2161 flushCharacters();
2162 parser.phase = originalPhase;
2163 return token;
2164 }
2165
2166 Token processEndTag(EndTagToken token) {
2167 flushCharacters();
2168 parser.phase = originalPhase;
2169 return token;
2170 }
2171 }
2172
2173
2174 class InCaptionPhase extends Phase {
2175 // http://www.whatwg.org/specs/web-apps/current-work///in-caption
2176 InCaptionPhase(parser) : super(parser);
2177
2178 processStartTag(StartTagToken token) {
2179 switch (token.name) {
2180 case "html": return startTagHtml(token);
2181 case "caption": case "col": case "colgroup": case "tbody": case "td":
2182 case "tfoot": case "th": case "thead": case "tr":
2183 return startTagTableElement(token);
2184 default: return startTagOther(token);
2185 }
2186 }
2187
2188 processEndTag(EndTagToken token) {
2189 switch (token.name) {
2190 case "caption": return endTagCaption(token);
2191 case "table": return endTagTable(token);
2192 case "body": case "col": case "colgroup": case "html": case "tbody":
2193 case "td": case "tfoot": case "th": case "thead": case "tr":
2194 return endTagIgnore(token);
2195 default: return endTagOther(token);
2196 }
2197 }
2198
2199 bool ignoreEndTagCaption() {
2200 return !tree.elementInScope("caption", variant: "table");
2201 }
2202
2203 bool processEOF() {
2204 parser._inBodyPhase.processEOF();
2205 return false;
2206 }
2207
2208 Token processCharacters(CharactersToken token) {
2209 return parser._inBodyPhase.processCharacters(token);
2210 }
2211
2212 Token startTagTableElement(StartTagToken token) {
2213 parser.parseError();
2214 //XXX Have to duplicate logic here to find out if the tag is ignored
2215 var ignoreEndTag = ignoreEndTagCaption();
2216 parser.phase.processEndTag(new EndTagToken("caption", data: {}));
2217 if (!ignoreEndTag) {
2218 return token;
2219 }
2220 return null;
2221 }
2222
2223 Token startTagOther(StartTagToken token) {
2224 return parser._inBodyPhase.processStartTag(token);
2225 }
2226
2227 void endTagCaption(EndTagToken token) {
2228 if (!ignoreEndTagCaption()) {
2229 // AT this code is quite similar to endTagTable in "InTable"
2230 tree.generateImpliedEndTags();
2231 if (tree.openElements.last().tagName != "caption") {
2232 parser.parseError("expected-one-end-tag-but-got-another",
2233 {"gotName": "caption",
2234 "expectedName": tree.openElements.last().tagName});
2235 }
2236 while (tree.openElements.last().tagName != "caption") {
2237 tree.openElements.removeLast();
2238 }
2239 tree.openElements.removeLast();
2240 tree.clearActiveFormattingElements();
2241 parser.phase = parser._inTablePhase;
2242 } else {
2243 // innerHTML case
2244 assert(parser.innerHTMLMode);
2245 parser.parseError();
2246 }
2247 }
2248
2249 Token endTagTable(EndTagToken token) {
2250 parser.parseError();
2251 var ignoreEndTag = ignoreEndTagCaption();
2252 parser.phase.processEndTag(new EndTagToken("caption", data: {}));
2253 if (!ignoreEndTag) {
2254 return token;
2255 }
2256 return null;
2257 }
2258
2259 void endTagIgnore(EndTagToken token) {
2260 parser.parseError("unexpected-end-tag", {"name": token.name});
2261 }
2262
2263 Token endTagOther(EndTagToken token) {
2264 return parser._inBodyPhase.processEndTag(token);
2265 }
2266 }
2267
2268
2269 class InColumnGroupPhase extends Phase {
2270 // http://www.whatwg.org/specs/web-apps/current-work///in-column
2271 InColumnGroupPhase(parser) : super(parser);
2272
2273 processStartTag(StartTagToken token) {
2274 switch (token.name) {
2275 case "html": return startTagHtml(token);
2276 case "col": return startTagCol(token);
2277 default: return startTagOther(token);
2278 }
2279 }
2280
2281 processEndTag(EndTagToken token) {
2282 switch (token.name) {
2283 case "colgroup": return endTagColgroup(token);
2284 case "col": return endTagCol(token);
2285 default: return endTagOther(token);
2286 }
2287 }
2288
2289 bool ignoreEndTagColgroup() {
2290 return tree.openElements.last().tagName == "html";
2291 }
2292
2293 bool processEOF() {
2294 var ignoreEndTag = ignoreEndTagColgroup();
2295 if (ignoreEndTag) {
2296 assert(parser.innerHTMLMode);
2297 return false;
2298 } else {
2299 endTagColgroup(new EndTagToken("colgroup", data: {}));
2300 return true;
2301 }
2302 }
2303
2304 Token processCharacters(CharactersToken token) {
2305 var ignoreEndTag = ignoreEndTagColgroup();
2306 endTagColgroup(new EndTagToken("colgroup", data: {}));
2307 return ignoreEndTag ? null : token;
2308 }
2309
2310 void startTagCol(StartTagToken token) {
2311 tree.insertElement(token);
2312 tree.openElements.removeLast();
2313 }
2314
2315 Token startTagOther(StartTagToken token) {
2316 var ignoreEndTag = ignoreEndTagColgroup();
2317 endTagColgroup(new EndTagToken("colgroup", data: {}));
2318 return ignoreEndTag ? null : token;
2319 }
2320
2321 void endTagColgroup(EndTagToken token) {
2322 if (ignoreEndTagColgroup()) {
2323 // innerHTML case
2324 assert(parser.innerHTMLMode);
2325 parser.parseError();
2326 } else {
2327 tree.openElements.removeLast();
2328 parser.phase = parser._inTablePhase;
2329 }
2330 }
2331
2332 void endTagCol(EndTagToken token) {
2333 parser.parseError("no-end-tag", {"name": "col"});
2334 }
2335
2336 Token endTagOther(EndTagToken token) {
2337 var ignoreEndTag = ignoreEndTagColgroup();
2338 endTagColgroup(new EndTagToken("colgroup", data: {}));
2339 return ignoreEndTag ? null : token;
2340 }
2341 }
2342
2343
2344 class InTableBodyPhase extends Phase {
2345 // http://www.whatwg.org/specs/web-apps/current-work///in-table0
2346 InTableBodyPhase(parser) : super(parser);
2347
2348 processStartTag(StartTagToken token) {
2349 switch (token.name) {
2350 case "html": return startTagHtml(token);
2351 case "tr": return startTagTr(token);
2352 case "td": case "th": return startTagTableCell(token);
2353 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2354 case "thead":
2355 return startTagTableOther(token);
2356 default: return startTagOther(token);
2357 }
2358 }
2359
2360 processEndTag(EndTagToken token) {
2361 switch (token.name) {
2362 case "tbody": case "tfoot": case "thead":
2363 return endTagTableRowGroup(token);
2364 case "table": return endTagTable(token);
2365 case "body": case "caption": case "col": case "colgroup": case "html":
2366 case "td": case "th": case "tr":
2367 return endTagIgnore(token);
2368 default: return endTagOther(token);
2369 }
2370 }
2371
2372 // helper methods
2373 void clearStackToTableBodyContext() {
2374 while (const ["tbody", "tfoot","thead", "html"].indexOf(
2375 tree.openElements.last().tagName) == -1) {
2376 //XXX parser.parseError("unexpected-implied-end-tag-in-table",
2377 // {"name": tree.openElements.last().name})
2378 tree.openElements.removeLast();
2379 }
2380 if (tree.openElements.last().tagName == "html") {
2381 assert(parser.innerHTMLMode);
2382 }
2383 }
2384
2385 // the rest
2386 bool processEOF() {
2387 parser._inTablePhase.processEOF();
2388 return false;
2389 }
2390
2391 Token processSpaceCharacters(SpaceCharactersToken token) {
2392 return parser._inTablePhase.processSpaceCharacters(token);
2393 }
2394
2395 Token processCharacters(CharactersToken token) {
2396 return parser._inTablePhase.processCharacters(token);
2397 }
2398
2399 void startTagTr(StartTagToken token) {
2400 clearStackToTableBodyContext();
2401 tree.insertElement(token);
2402 parser.phase = parser._inRowPhase;
2403 }
2404
2405 Token startTagTableCell(StartTagToken token) {
2406 parser.parseError("unexpected-cell-in-table-body",
2407 {"name": token.name});
2408 startTagTr(new StartTagToken("tr", data: {}));
2409 return token;
2410 }
2411
2412 Token startTagTableOther(token) => endTagTable(token);
2413
2414 Token startTagOther(StartTagToken token) {
2415 return parser._inTablePhase.processStartTag(token);
2416 }
2417
2418 void endTagTableRowGroup(EndTagToken token) {
2419 if (tree.elementInScope(token.name, variant: "table")) {
2420 clearStackToTableBodyContext();
2421 tree.openElements.removeLast();
2422 parser.phase = parser._inTablePhase;
2423 } else {
2424 parser.parseError("unexpected-end-tag-in-table-body",
2425 {"name": token.name});
2426 }
2427 }
2428
2429 Token endTagTable(TagToken token) {
2430 // XXX AT Any ideas on how to share this with endTagTable?
2431 if (tree.elementInScope("tbody", variant: "table") ||
2432 tree.elementInScope("thead", variant: "table") ||
2433 tree.elementInScope("tfoot", variant: "table")) {
2434 clearStackToTableBodyContext();
2435 endTagTableRowGroup(
2436 new EndTagToken(tree.openElements.last().tagName, data: {}));
2437 return token;
2438 } else {
2439 // innerHTML case
2440 assert(parser.innerHTMLMode);
2441 parser.parseError();
2442 }
2443 return null;
2444 }
2445
2446 void endTagIgnore(EndTagToken token) {
2447 parser.parseError("unexpected-end-tag-in-table-body",
2448 {"name": token.name});
2449 }
2450
2451 Token endTagOther(EndTagToken token) {
2452 return parser._inTablePhase.processEndTag(token);
2453 }
2454 }
2455
2456
2457 class InRowPhase extends Phase {
2458 // http://www.whatwg.org/specs/web-apps/current-work///in-row
2459 InRowPhase(parser) : super(parser);
2460
2461 processStartTag(StartTagToken token) {
2462 switch (token.name) {
2463 case "html": return startTagHtml(token);
2464 case "td": case "th": return startTagTableCell(token);
2465 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":
2466 case "thead": case "tr":
2467 return startTagTableOther(token);
2468 default: return startTagOther(token);
2469 }
2470 }
2471
2472 processEndTag(EndTagToken token) {
2473 switch (token.name) {
2474 case "tr": return endTagTr(token);
2475 case "table": return endTagTable(token);
2476 case "tbody": case "tfoot": case "thead":
2477 return endTagTableRowGroup(token);
2478 case "body": case "caption": case "col": case "colgroup": case "html":
2479 case "td": case "th":
2480 return endTagIgnore(token);
2481 default: return endTagOther(token);
2482 }
2483 }
2484
2485 // helper methods (XXX unify this with other table helper methods)
2486 void clearStackToTableRowContext() {
2487 while (tree.openElements.last().tagName != "tr" &&
2488 tree.openElements.last().tagName != "html") {
2489 parser.parseError("unexpected-implied-end-tag-in-table-row",
2490 {"name": tree.openElements.last().tagName});
2491 tree.openElements.removeLast();
2492 }
2493 }
2494
2495 bool ignoreEndTagTr() {
2496 return !tree.elementInScope("tr", variant: "table");
2497 }
2498
2499 // the rest
2500 bool processEOF() {
2501 parser._inTablePhase.processEOF();
2502 return false;
2503 }
2504
2505 Token processSpaceCharacters(SpaceCharactersToken token) {
2506 return parser._inTablePhase.processSpaceCharacters(token);
2507 }
2508
2509 Token processCharacters(CharactersToken token) {
2510 return parser._inTablePhase.processCharacters(token);
2511 }
2512
2513 void startTagTableCell(StartTagToken token) {
2514 clearStackToTableRowContext();
2515 tree.insertElement(token);
2516 parser.phase = parser._inCellPhase;
2517 tree.activeFormattingElements.add(Marker);
2518 }
2519
2520 Token startTagTableOther(StartTagToken token) {
2521 bool ignoreEndTag = ignoreEndTagTr();
2522 endTagTr(new EndTagToken("tr", data: {}));
2523 // XXX how are we sure it's always ignored in the innerHTML case?
2524 return ignoreEndTag ? null : token;
2525 }
2526
2527 Token startTagOther(StartTagToken token) {
2528 return parser._inTablePhase.processStartTag(token);
2529 }
2530
2531 void endTagTr(EndTagToken token) {
2532 if (!ignoreEndTagTr()) {
2533 clearStackToTableRowContext();
2534 tree.openElements.removeLast();
2535 parser.phase = parser._inTableBodyPhase;
2536 } else {
2537 // innerHTML case
2538 assert(parser.innerHTMLMode);
2539 parser.parseError();
2540 }
2541 }
2542
2543 Token endTagTable(EndTagToken token) {
2544 var ignoreEndTag = ignoreEndTagTr();
2545 endTagTr(new EndTagToken("tr", data: {}));
2546 // Reprocess the current tag if the tr end tag was not ignored
2547 // XXX how are we sure it's always ignored in the innerHTML case?
2548 return ignoreEndTag ? null : token;
2549 }
2550
2551 Token endTagTableRowGroup(EndTagToken token) {
2552 if (tree.elementInScope(token.name, variant: "table")) {
2553 endTagTr(new EndTagToken("tr", data: {}));
2554 return token;
2555 } else {
2556 parser.parseError();
2557 return null;
2558 }
2559 }
2560
2561 void endTagIgnore(EndTagToken token) {
2562 parser.parseError("unexpected-end-tag-in-table-row",
2563 {"name": token.name});
2564 }
2565
2566 Token endTagOther(EndTagToken token) {
2567 return parser._inTablePhase.processEndTag(token);
2568 }
2569 }
2570
2571 class InCellPhase extends Phase {
2572 // http://www.whatwg.org/specs/web-apps/current-work///in-cell
2573 InCellPhase(parser) : super(parser);
2574
2575 processStartTag(StartTagToken token) {
2576 switch (token.name) {
2577 case "html": return startTagHtml(token);
2578 case "caption": case "col": case "colgroup": case "tbody": case "td":
2579 case "tfoot": case "th": case "thead": case "tr":
2580 return startTagTableOther(token);
2581 default: return startTagOther(token);
2582 }
2583 }
2584
2585 processEndTag(EndTagToken token) {
2586 switch (token.name) {
2587 case "td": case "th":
2588 return endTagTableCell(token);
2589 case "body": case "caption": case "col": case "colgroup": case "html":
2590 return endTagIgnore(token);
2591 case "table": case "tbody": case "tfoot": case "thead": case "tr":
2592 return endTagImply(token);
2593 default: return endTagOther(token);
2594 }
2595 }
2596
2597 // helper
2598 void closeCell() {
2599 if (tree.elementInScope("td", variant: "table")) {
2600 endTagTableCell(new EndTagToken("td", data: {}));
2601 } else if (tree.elementInScope("th", variant: "table")) {
2602 endTagTableCell(new EndTagToken("th", data: {}));
2603 }
2604 }
2605
2606 // the rest
2607 bool processEOF() {
2608 parser._inBodyPhase.processEOF();
2609 return false;
2610 }
2611
2612 Token processCharacters(CharactersToken token) {
2613 return parser._inBodyPhase.processCharacters(token);
2614 }
2615
2616 Token startTagTableOther(StartTagToken token) {
2617 if (tree.elementInScope("td", variant: "table") ||
2618 tree.elementInScope("th", variant: "table")) {
2619 closeCell();
2620 return token;
2621 } else {
2622 // innerHTML case
2623 assert(parser.innerHTMLMode);
2624 parser.parseError();
2625 }
2626 }
2627
2628 Token startTagOther(StartTagToken token) {
2629 return parser._inBodyPhase.processStartTag(token);
2630 }
2631
2632 void endTagTableCell(EndTagToken token) {
2633 if (tree.elementInScope(token.name, variant: "table")) {
2634 tree.generateImpliedEndTags(token.name);
2635 if (tree.openElements.last().tagName != token.name) {
2636 parser.parseError("unexpected-cell-end-tag", {"name": token.name});
2637 popOpenElementsUntil(token.name);
2638 } else {
2639 tree.openElements.removeLast();
2640 }
2641 tree.clearActiveFormattingElements();
2642 parser.phase = parser._inRowPhase;
2643 } else {
2644 parser.parseError("unexpected-end-tag", {"name": token.name});
2645 }
2646 }
2647
2648 void endTagIgnore(EndTagToken token) {
2649 parser.parseError("unexpected-end-tag", {"name": token.name});
2650 }
2651
2652 Token endTagImply(EndTagToken token) {
2653 if (tree.elementInScope(token.name, variant: "table")) {
2654 closeCell();
2655 return token;
2656 } else {
2657 // sometimes innerHTML case
2658 parser.parseError();
2659 }
2660 }
2661
2662 Token endTagOther(EndTagToken token) {
2663 return parser._inBodyPhase.processEndTag(token);
2664 }
2665 }
2666
2667 class InSelectPhase extends Phase {
2668 InSelectPhase(parser) : super(parser);
2669
2670 processStartTag(StartTagToken token) {
2671 switch (token.name) {
2672 case "html": return startTagHtml(token);
2673 case "option": return startTagOption(token);
2674 case "optgroup": return startTagOptgroup(token);
2675 case "select": return startTagSelect(token);
2676 case "input": case "keygen": case "textarea":
2677 return startTagInput(token);
2678 case "script": return startTagScript(token);
2679 default: return startTagOther(token);
2680 }
2681 }
2682
2683 processEndTag(EndTagToken token) {
2684 switch (token.name) {
2685 case "option": return endTagOption(token);
2686 case "optgroup": return endTagOptgroup(token);
2687 case "select": return endTagSelect(token);
2688 default: return endTagOther(token);
2689 }
2690 }
2691
2692 // http://www.whatwg.org/specs/web-apps/current-work///in-select
2693 bool processEOF() {
2694 if (tree.openElements.last().tagName != "html") {
2695 parser.parseError("eof-in-select");
2696 } else {
2697 assert(parser.innerHTMLMode);
2698 }
2699 return false;
2700 }
2701
2702 Token processCharacters(CharactersToken token) {
2703 if (token.data == "\u0000") {
2704 return null;
2705 }
2706 tree.insertText(token.data);
2707 }
2708
2709 void startTagOption(StartTagToken token) {
2710 // We need to imply </option> if <option> is the current node.
2711 if (tree.openElements.last().tagName == "option") {
2712 tree.openElements.removeLast();
2713 }
2714 tree.insertElement(token);
2715 }
2716
2717 void startTagOptgroup(StartTagToken token) {
2718 if (tree.openElements.last().tagName == "option") {
2719 tree.openElements.removeLast();
2720 }
2721 if (tree.openElements.last().tagName == "optgroup") {
2722 tree.openElements.removeLast();
2723 }
2724 tree.insertElement(token);
2725 }
2726
2727 void startTagSelect(StartTagToken token) {
2728 parser.parseError("unexpected-select-in-select");
2729 endTagSelect(new EndTagToken("select", data: {}));
2730 }
2731
2732 Token startTagInput(StartTagToken token) {
2733 parser.parseError("unexpected-input-in-select");
2734 if (tree.elementInScope("select", variant: "select")) {
2735 endTagSelect(new EndTagToken("select", data: {}));
2736 return token;
2737 } else {
2738 assert(parser.innerHTMLMode);
2739 }
2740 }
2741
2742 Token startTagScript(StartTagToken token) {
2743 return parser._inHeadPhase.processStartTag(token);
2744 }
2745
2746 Token startTagOther(StartTagToken token) {
2747 parser.parseError("unexpected-start-tag-in-select",
2748 {"name": token.name});
2749 }
2750
2751 void endTagOption(EndTagToken token) {
2752 if (tree.openElements.last().tagName == "option") {
2753 tree.openElements.removeLast();
2754 } else {
2755 parser.parseError("unexpected-end-tag-in-select",
2756 {"name": "option"});
2757 }
2758 }
2759
2760 void endTagOptgroup(EndTagToken token) {
2761 // </optgroup> implicitly closes <option>
2762 if (tree.openElements.last().tagName == "option" &&
2763 tree.openElements[tree.openElements.length - 2].tagName == "optgroup") {
2764 tree.openElements.removeLast();
2765 }
2766 // It also closes </optgroup>
2767 if (tree.openElements.last().tagName == "optgroup") {
2768 tree.openElements.removeLast();
2769 // But nothing else
2770 } else {
2771 parser.parseError("unexpected-end-tag-in-select",
2772 {"name": "optgroup"});
2773 }
2774 }
2775
2776 void endTagSelect(EndTagToken token) {
2777 if (tree.elementInScope("select", variant: "select")) {
2778 popOpenElementsUntil("select");
2779 parser.resetInsertionMode();
2780 } else {
2781 // innerHTML case
2782 assert(parser.innerHTMLMode);
2783 parser.parseError();
2784 }
2785 }
2786
2787 void endTagOther(EndTagToken token) {
2788 parser.parseError("unexpected-end-tag-in-select",
2789 {"name": token.name});
2790 }
2791 }
2792
2793
2794 class InSelectInTablePhase extends Phase {
2795 InSelectInTablePhase(parser) : super(parser);
2796
2797 processStartTag(StartTagToken token) {
2798 switch (token.name) {
2799 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2800 case "tr": case "td": case "th":
2801 return startTagTable(token);
2802 default: return startTagOther(token);
2803 }
2804 }
2805
2806 processEndTag(EndTagToken token) {
2807 switch (token.name) {
2808 case "caption": case "table": case "tbody": case "tfoot": case "thead":
2809 case "tr": case "td": case "th":
2810 return endTagTable(token);
2811 default: return endTagOther(token);
2812 }
2813 }
2814
2815 bool processEOF() {
2816 parser._inSelectPhase.processEOF();
2817 return false;
2818 }
2819
2820 Token processCharacters(CharactersToken token) {
2821 return parser._inSelectPhase.processCharacters(token);
2822 }
2823
2824 Token startTagTable(StartTagToken token) {
2825 parser.parseError("unexpected-table-element-start-tag-in-select-in-table",
2826 {"name": token.name});
2827 endTagOther(new EndTagToken("select", data: {}));
2828 return token;
2829 }
2830
2831 Token startTagOther(StartTagToken token) {
2832 return parser._inSelectPhase.processStartTag(token);
2833 }
2834
2835 Token endTagTable(EndTagToken token) {
2836 parser.parseError("unexpected-table-element-end-tag-in-select-in-table",
2837 {"name": token.name});
2838 if (tree.elementInScope(token.name, variant: "table")) {
2839 endTagOther(new EndTagToken("select", data: {}));
2840 return token;
2841 }
2842 }
2843
2844 Token endTagOther(EndTagToken token) {
2845 return parser._inSelectPhase.processEndTag(token);
2846 }
2847 }
2848
2849
2850 class InForeignContentPhase extends Phase {
2851 // TODO(jmesserly): this is sorted so we could binary search.
2852 const breakoutElements = const [
2853 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl',
2854 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i',
2855 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby',
2856 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u',
2857 'ul', 'var'
2858 ];
2859
2860 InForeignContentPhase(parser) : super(parser);
2861
2862 void adjustSVGTagNames(token) {
2863 final replacements = const {
2864 "altglyph":"altGlyph",
2865 "altglyphdef":"altGlyphDef",
2866 "altglyphitem":"altGlyphItem",
2867 "animatecolor":"animateColor",
2868 "animatemotion":"animateMotion",
2869 "animatetransform":"animateTransform",
2870 "clippath":"clipPath",
2871 "feblend":"feBlend",
2872 "fecolormatrix":"feColorMatrix",
2873 "fecomponenttransfer":"feComponentTransfer",
2874 "fecomposite":"feComposite",
2875 "feconvolvematrix":"feConvolveMatrix",
2876 "fediffuselighting":"feDiffuseLighting",
2877 "fedisplacementmap":"feDisplacementMap",
2878 "fedistantlight":"feDistantLight",
2879 "feflood":"feFlood",
2880 "fefunca":"feFuncA",
2881 "fefuncb":"feFuncB",
2882 "fefuncg":"feFuncG",
2883 "fefuncr":"feFuncR",
2884 "fegaussianblur":"feGaussianBlur",
2885 "feimage":"feImage",
2886 "femerge":"feMerge",
2887 "femergenode":"feMergeNode",
2888 "femorphology":"feMorphology",
2889 "feoffset":"feOffset",
2890 "fepointlight":"fePointLight",
2891 "fespecularlighting":"feSpecularLighting",
2892 "fespotlight":"feSpotLight",
2893 "fetile":"feTile",
2894 "feturbulence":"feTurbulence",
2895 "foreignobject":"foreignObject",
2896 "glyphref":"glyphRef",
2897 "lineargradient":"linearGradient",
2898 "radialgradient":"radialGradient",
2899 "textpath":"textPath"
2900 };
2901
2902 var replace = replacements[token.name];
2903 if (replace != null) {
2904 token.name = replace;
2905 }
2906 }
2907
2908 Token processCharacters(CharactersToken token) {
2909 if (token.data == "\u0000") {
2910 token.data = "\uFFFD";
2911 } else if (parser.framesetOK && !allWhitespace(token.data)) {
2912 parser.framesetOK = false;
2913 }
2914 super.processCharacters(token);
2915 }
2916
2917 Token processStartTag(StartTagToken token) {
2918 var currentNode = tree.openElements.last();
2919 if (breakoutElements.indexOf(token.name) >= 0 ||
2920 (token.name == "font" &&
2921 (token.data.containsKey("color") ||
2922 token.data.containsKey("face") ||
2923 token.data.containsKey("size")))) {
2924
2925 parser.parseError("unexpected-html-element-in-foreign-content",
2926 {'name': token.name});
2927 while (tree.openElements.last().namespace !=
2928 tree.defaultNamespace &&
2929 !parser.isHTMLIntegrationPoint(tree.openElements.last()) &&
2930 !parser.isMathMLTextIntegrationPoint(tree.openElements.last())) {
2931 tree.openElements.removeLast();
2932 }
2933 return token;
2934
2935 } else {
2936 if (currentNode.namespace == Namespaces.mathml) {
2937 parser.adjustMathMLAttributes(token);
2938 } else if (currentNode.namespace == Namespaces.svg) {
2939 adjustSVGTagNames(token);
2940 parser.adjustSVGAttributes(token);
2941 }
2942 parser.adjustForeignAttributes(token);
2943 token.namespace = currentNode.namespace;
2944 tree.insertElement(token);
2945 if (token.selfClosing) {
2946 tree.openElements.removeLast();
2947 token.selfClosingAcknowledged = true;
2948 }
2949 }
2950 }
2951
2952 Token processEndTag(EndTagToken token) {
2953 var nodeIndex = tree.openElements.length - 1;
2954 var node = tree.openElements.last();
2955 if (node.tagName != token.name) {
2956 parser.parseError("unexpected-end-tag", {"name": token.name});
2957 }
2958
2959 var newToken = null;
2960 while (true) {
2961 if (asciiUpper2Lower(node.tagName) == token.name) {
2962 //XXX this isn't in the spec but it seems necessary
2963 if (parser.phase == parser._inTableTextPhase) {
2964 InTableTextPhase inTableText = parser.phase;
2965 inTableText.flushCharacters();
2966 parser.phase = inTableText.originalPhase;
2967 }
2968 while (tree.openElements.removeLast() != node) {
2969 assert(tree.openElements.length > 0);
2970 }
2971 newToken = null;
2972 break;
2973 }
2974 nodeIndex -= 1;
2975
2976 node = tree.openElements[nodeIndex];
2977 if (node.namespace != tree.defaultNamespace) {
2978 continue;
2979 } else {
2980 newToken = parser.phase.processEndTag(token);
2981 break;
2982 }
2983 }
2984 return newToken;
2985 }
2986 }
2987
2988
2989 class AfterBodyPhase extends Phase {
2990 AfterBodyPhase(parser) : super(parser);
2991
2992 processStartTag(StartTagToken token) {
2993 if (token.name == "html") return startTagHtml(token);
2994 return startTagOther(token);
2995 }
2996
2997 processEndTag(EndTagToken token) {
2998 if (token.name == "html") return endTagHtml(token);
2999 return endTagOther(token);
3000 }
3001
3002 //Stop parsing
3003 bool processEOF() => false;
3004
3005 Token processComment(CommentToken token) {
3006 // This is needed because data is to be appended to the <html> element
3007 // here and not to whatever is currently open.
3008 tree.insertComment(token, tree.openElements[0]);
3009 }
3010
3011 Token processCharacters(CharactersToken token) {
3012 parser.parseError("unexpected-char-after-body");
3013 parser.phase = parser._inBodyPhase;
3014 return token;
3015 }
3016
3017 Token startTagHtml(StartTagToken token) {
3018 return parser._inBodyPhase.processStartTag(token);
3019 }
3020
3021 Token startTagOther(StartTagToken token) {
3022 parser.parseError("unexpected-start-tag-after-body",
3023 {"name": token.name});
3024 parser.phase = parser._inBodyPhase;
3025 return token;
3026 }
3027
3028 void endTagHtml(name) {
3029 if (parser.innerHTMLMode) {
3030 parser.parseError("unexpected-end-tag-after-body-innerhtml");
3031 } else {
3032 parser.phase = parser._afterAfterBodyPhase;
3033 }
3034 }
3035
3036 Token endTagOther(EndTagToken token) {
3037 parser.parseError("unexpected-end-tag-after-body",
3038 {"name": token.name});
3039 parser.phase = parser._inBodyPhase;
3040 return token;
3041 }
3042 }
3043
3044 class InFramesetPhase extends Phase {
3045 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset
3046 InFramesetPhase(parser) : super(parser);
3047
3048 processStartTag(StartTagToken token) {
3049 switch (token.name) {
3050 case "html": return startTagHtml(token);
3051 case "frameset": return startTagFrameset(token);
3052 case "frame": return startTagFrame(token);
3053 case "noframes": return startTagNoframes(token);
3054 default: return startTagOther(token);
3055 }
3056 }
3057
3058 processEndTag(EndTagToken token) {
3059 switch (token.name) {
3060 case "frameset": return endTagFrameset(token);
3061 default: return endTagOther(token);
3062 }
3063 }
3064
3065 bool processEOF() {
3066 if (tree.openElements.last().tagName != "html") {
3067 parser.parseError("eof-in-frameset");
3068 } else {
3069 assert(parser.innerHTMLMode);
3070 }
3071 return false;
3072 }
3073
3074 Token processCharacters(CharactersToken token) {
3075 parser.parseError("unexpected-char-in-frameset");
3076 }
3077
3078 void startTagFrameset(StartTagToken token) {
3079 tree.insertElement(token);
3080 }
3081
3082 void startTagFrame(StartTagToken token) {
3083 tree.insertElement(token);
3084 tree.openElements.removeLast();
3085 }
3086
3087 Token startTagNoframes(StartTagToken token) {
3088 return parser._inBodyPhase.processStartTag(token);
3089 }
3090
3091 Token startTagOther(StartTagToken token) {
3092 parser.parseError("unexpected-start-tag-in-frameset",
3093 {"name": token.name});
3094 }
3095
3096 void endTagFrameset(EndTagToken token) {
3097 if (tree.openElements.last().tagName == "html") {
3098 // innerHTML case
3099 parser.parseError("unexpected-frameset-in-frameset-innerhtml");
3100 } else {
3101 tree.openElements.removeLast();
3102 }
3103 if (!parser.innerHTMLMode && tree.openElements.last().tagName != "frameset") {
3104 // If we're not in innerHTML mode and the the current node is not a
3105 // "frameset" element (anymore) then switch.
3106 parser.phase = parser._afterFramesetPhase;
3107 }
3108 }
3109
3110 void endTagOther(EndTagToken token) {
3111 parser.parseError("unexpected-end-tag-in-frameset",
3112 {"name": token.name});
3113 }
3114 }
3115
3116
3117 class AfterFramesetPhase extends Phase {
3118 // http://www.whatwg.org/specs/web-apps/current-work///after3
3119 AfterFramesetPhase(parser) : super(parser);
3120
3121 processStartTag(StartTagToken token) {
3122 switch (token.name) {
3123 case "html": return startTagHtml(token);
3124 case "noframes": return startTagNoframes(token);
3125 default: return startTagOther(token);
3126 }
3127 }
3128
3129 processEndTag(EndTagToken token) {
3130 switch (token.name) {
3131 case "html": return endTagHtml(token);
3132 default: return endTagOther(token);
3133 }
3134 }
3135
3136 // Stop parsing
3137 bool processEOF() => false;
3138
3139 Token processCharacters(CharactersToken token) {
3140 parser.parseError("unexpected-char-after-frameset");
3141 }
3142
3143 Token startTagNoframes(StartTagToken token) {
3144 return parser._inHeadPhase.processStartTag(token);
3145 }
3146
3147 void startTagOther(StartTagToken token) {
3148 parser.parseError("unexpected-start-tag-after-frameset",
3149 {"name": token.name});
3150 }
3151
3152 void endTagHtml(EndTagToken token) {
3153 parser.phase = parser._afterAfterFramesetPhase;
3154 }
3155
3156 void endTagOther(EndTagToken token) {
3157 parser.parseError("unexpected-end-tag-after-frameset",
3158 {"name": token.name});
3159 }
3160 }
3161
3162
3163 class AfterAfterBodyPhase extends Phase {
3164 AfterAfterBodyPhase(parser) : super(parser);
3165
3166 processStartTag(StartTagToken token) {
3167 if (token.name == 'html') return startTagHtml(token);
3168 return startTagOther(token);
3169 }
3170
3171 bool processEOF() => false;
3172
3173 Token processComment(CommentToken token) {
3174 tree.insertComment(token, tree.document);
3175 }
3176
3177 Token processSpaceCharacters(SpaceCharactersToken token) {
3178 return parser._inBodyPhase.processSpaceCharacters(token);
3179 }
3180
3181 Token processCharacters(CharactersToken token) {
3182 parser.parseError("expected-eof-but-got-char");
3183 parser.phase = parser._inBodyPhase;
3184 return token;
3185 }
3186
3187 Token startTagHtml(StartTagToken token) {
3188 return parser._inBodyPhase.processStartTag(token);
3189 }
3190
3191 Token startTagOther(StartTagToken token) {
3192 parser.parseError("expected-eof-but-got-start-tag", {"name": token.name});
3193 parser.phase = parser._inBodyPhase;
3194 return token;
3195 }
3196
3197 Token processEndTag(EndTagToken token) {
3198 parser.parseError("expected-eof-but-got-end-tag", {"name": token.name});
3199 parser.phase = parser._inBodyPhase;
3200 return token;
3201 }
3202 }
3203
3204 class AfterAfterFramesetPhase extends Phase {
3205 AfterAfterFramesetPhase(parser) : super(parser);
3206
3207 processStartTag(StartTagToken token) {
3208 switch (token.name) {
3209 case "html": return startTagHtml(token);
3210 case "noframes": return startTagNoFrames(token);
3211 default: return startTagOther(token);
3212 }
3213 }
3214
3215 bool processEOF() => false;
3216
3217 Token processComment(CommentToken token) {
3218 tree.insertComment(token, tree.document);
3219 }
3220
3221 Token processSpaceCharacters(SpaceCharactersToken token) {
3222 return parser._inBodyPhase.processSpaceCharacters(token);
3223 }
3224
3225 Token processCharacters(CharactersToken token) {
3226 parser.parseError("expected-eof-but-got-char");
3227 }
3228
3229 Token startTagHtml(StartTagToken token) {
3230 return parser._inBodyPhase.processStartTag(token);
3231 }
3232
3233 Token startTagNoFrames(StartTagToken token) {
3234 return parser._inHeadPhase.processStartTag(token);
3235 }
3236
3237 void startTagOther(StartTagToken token) {
3238 parser.parseError("expected-eof-but-got-start-tag",
3239 {"name": token.name});
3240 }
3241
3242 Token processEndTag(EndTagToken token) {
3243 parser.parseError("expected-eof-but-got-end-tag",
3244 {"name": token.name});
3245 }
3246 }
3247
3248
3249 /** Error in parsed document. */
3250 class ParseError implements Exception {
3251 final String errorCode;
3252 final Span span;
3253 final Map data;
3254
3255 ParseError(this.errorCode, this.span, this.data);
3256
3257 int get line() => span.line;
3258
3259 int get column() => span.column;
3260
3261 String get message => formatStr(errorMessages[errorCode], data);
3262
3263 String toString() => "ParseError at line $line column $column: $message";
3264 }
OLDNEW
« no previous file with comments | « README.md ('k') | lib/char_encodings.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698