| OLD | NEW |
| (Empty) |
| 1 #library('html5parser'); | |
| 2 | |
| 3 #import('dart:math'); | |
| 4 #import('package:logging/logging.dart'); | |
| 5 #import('treebuilders/base.dart'); // for Marker | |
| 6 #import('treebuilders/simpletree.dart'); | |
| 7 #import('lib/constants.dart'); | |
| 8 #import('lib/encoding_parser.dart'); | |
| 9 #import('lib/token.dart'); | |
| 10 #import('lib/utils.dart'); | |
| 11 #import('tokenizer.dart'); | |
| 12 | |
| 13 // TODO(jmesserly): these APIs, as well as the HTMLParser contructor and | |
| 14 // HTMLParser.parse and parseFragment were changed a bit to avoid passing a | |
| 15 // first class type that is used for construction. It might be okay, but I'd | |
| 16 // like to find a good dependency-injection pattern for Dart rather than | |
| 17 // copy the Python API. | |
| 18 // TODO(jmesserly): Also some of the HTMLParser APIs are messed up to avoid | |
| 19 // editor shadowing warnings :\. Look for trailing underscores. | |
| 20 /** | |
| 21 * Parse an html5 [doc]ument that is a [String], [RandomAccessFile] or | |
| 22 * [List<int>] of bytes into a tree. | |
| 23 * | |
| 24 * The optional [encoding] must be a string that indicates the encoding. If | |
| 25 * specified, that encoding will be used, regardless of any BOM or later | |
| 26 * declaration (such as in a meta element). | |
| 27 */ | |
| 28 Document parse(doc, [TreeBuilder treebuilder, String encoding]) { | |
| 29 var tokenizer = new HTMLTokenizer(doc, encoding: encoding); | |
| 30 return new HTMLParser(treebuilder).parse(tokenizer); | |
| 31 } | |
| 32 | |
| 33 /** | |
| 34 * Parse an html5 [doc]ument fragment that is a [String], [RandomAccessFile] or | |
| 35 * [List<int>] of bytes into a tree. Pass a [container] to change the type of | |
| 36 * the containing element. | |
| 37 * | |
| 38 * The optional [encoding] must be a string that indicates the encoding. If | |
| 39 * specified, that encoding will be used, regardless of any BOM or later | |
| 40 * declaration (such as in a meta element). | |
| 41 */ | |
| 42 DocumentFragment parseFragment(doc, [String container = "div", | |
| 43 TreeBuilder treebuilder, String encoding]) { | |
| 44 var tokenizer = new HTMLTokenizer(doc, encoding: encoding); | |
| 45 var parser = new HTMLParser(treebuilder); | |
| 46 return parser.parseFragment(tokenizer, container_: container); | |
| 47 } | |
| 48 | |
| 49 | |
| 50 /** | |
| 51 * HTML parser. Generates a tree structure from a stream of (possibly malformed) | |
| 52 * HTML. | |
| 53 */ | |
| 54 class HTMLParser { | |
| 55 /** Raise an exception on the first error encountered. */ | |
| 56 bool strict; | |
| 57 | |
| 58 final TreeBuilder tree; | |
| 59 | |
| 60 List<ParseError> errors; | |
| 61 | |
| 62 // TODO(jmesserly): would be faster not to use Map lookup. | |
| 63 Map<String, Phase> phases; | |
| 64 | |
| 65 bool innerHTMLMode; | |
| 66 | |
| 67 String container; | |
| 68 | |
| 69 bool firstStartTag = false; | |
| 70 | |
| 71 // TODO(jmesserly): use enum? | |
| 72 /** "quirks" / "limited quirks" / "no quirks" */ | |
| 73 String compatMode = "no quirks"; | |
| 74 | |
| 75 /** innerHTML container when parsing document fragment. */ | |
| 76 String innerHTML; | |
| 77 | |
| 78 Phase phase; | |
| 79 | |
| 80 Phase lastPhase; | |
| 81 | |
| 82 Phase originalPhase; | |
| 83 | |
| 84 Phase beforeRCDataPhase; | |
| 85 | |
| 86 bool framesetOK; | |
| 87 | |
| 88 HTMLTokenizer tokenizer; | |
| 89 | |
| 90 // These fields hold the different phase singletons. At any given time one | |
| 91 // of them will be active. | |
| 92 InitialPhase _initialPhase; | |
| 93 BeforeHtmlPhase _beforeHtmlPhase; | |
| 94 BeforeHeadPhase _beforeHeadPhase; | |
| 95 InHeadPhase _inHeadPhase; | |
| 96 AfterHeadPhase _afterHeadPhase; | |
| 97 InBodyPhase _inBodyPhase; | |
| 98 TextPhase _textPhase; | |
| 99 InTablePhase _inTablePhase; | |
| 100 InTableTextPhase _inTableTextPhase; | |
| 101 InCaptionPhase _inCaptionPhase; | |
| 102 InColumnGroupPhase _inColumnGroupPhase; | |
| 103 InTableBodyPhase _inTableBodyPhase; | |
| 104 InRowPhase _inRowPhase; | |
| 105 InCellPhase _inCellPhase; | |
| 106 InSelectPhase _inSelectPhase; | |
| 107 InSelectInTablePhase _inSelectInTablePhase; | |
| 108 InForeignContentPhase _inForeignContentPhase; | |
| 109 AfterBodyPhase _afterBodyPhase; | |
| 110 InFramesetPhase _inFramesetPhase; | |
| 111 AfterFramesetPhase _afterFramesetPhase; | |
| 112 AfterAfterBodyPhase _afterAfterBodyPhase; | |
| 113 AfterAfterFramesetPhase _afterAfterFramesetPhase; | |
| 114 | |
| 115 /** | |
| 116 * Create a new HTMLParser and configure the [tree] builder and [strict] mode. | |
| 117 */ | |
| 118 HTMLParser([TreeBuilder tree, this.strict = false]) | |
| 119 : tree = tree != null ? tree : new TreeBuilder(true), | |
| 120 errors = <ParseError>[] { | |
| 121 | |
| 122 _initialPhase = new InitialPhase(this); | |
| 123 _beforeHtmlPhase = new BeforeHtmlPhase(this); | |
| 124 _beforeHeadPhase = new BeforeHeadPhase(this); | |
| 125 _inHeadPhase = new InHeadPhase(this); | |
| 126 // XXX "inHeadNoscript": new InHeadNoScriptPhase(this); | |
| 127 _afterHeadPhase = new AfterHeadPhase(this); | |
| 128 _inBodyPhase = new InBodyPhase(this); | |
| 129 _textPhase = new TextPhase(this); | |
| 130 _inTablePhase = new InTablePhase(this); | |
| 131 _inTableTextPhase = new InTableTextPhase(this); | |
| 132 _inCaptionPhase = new InCaptionPhase(this); | |
| 133 _inColumnGroupPhase = new InColumnGroupPhase(this); | |
| 134 _inTableBodyPhase = new InTableBodyPhase(this); | |
| 135 _inRowPhase = new InRowPhase(this); | |
| 136 _inCellPhase = new InCellPhase(this); | |
| 137 _inSelectPhase = new InSelectPhase(this); | |
| 138 _inSelectInTablePhase = new InSelectInTablePhase(this); | |
| 139 _inForeignContentPhase = new InForeignContentPhase(this); | |
| 140 _afterBodyPhase = new AfterBodyPhase(this); | |
| 141 _inFramesetPhase = new InFramesetPhase(this); | |
| 142 _afterFramesetPhase = new AfterFramesetPhase(this); | |
| 143 _afterAfterBodyPhase = new AfterAfterBodyPhase(this); | |
| 144 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this); | |
| 145 // XXX after after frameset | |
| 146 } | |
| 147 | |
| 148 /** | |
| 149 * Parse a HTML document into a well-formed tree | |
| 150 * | |
| 151 * [tokenizer_] - an object that provides a stream of tokens to the | |
| 152 * treebuilder. This may be replaced for e.g. a sanitizer which converts some | |
| 153 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the | |
| 154 * appropriate options. | |
| 155 */ | |
| 156 Document parse(HTMLTokenizer tokenizer_) { | |
| 157 _parse(tokenizer_, innerHTML_: false); | |
| 158 return tree.getDocument(); | |
| 159 } | |
| 160 | |
| 161 /** | |
| 162 * Parse a HTML fragment into a well-formed tree fragment. | |
| 163 * | |
| 164 * [container_] - name of the element we're setting the innerHTML property | |
| 165 * if set to null, default to 'div'. | |
| 166 * | |
| 167 * [tokenizer_] - an object that provides a stream of tokens to the | |
| 168 * treebuilder. This may be replaced for e.g. a sanitizer which converts some | |
| 169 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the | |
| 170 * appropriate options. | |
| 171 */ | |
| 172 DocumentFragment parseFragment(HTMLTokenizer tokenizer_, | |
| 173 [String container_ = "div"]) { | |
| 174 _parse(tokenizer_, innerHTML_: true, container_: container_); | |
| 175 return tree.getFragment(); | |
| 176 } | |
| 177 | |
| 178 void _parse(HTMLTokenizer tokenizer_, [bool innerHTML_ = false, | |
| 179 String container_ = "div"]) { | |
| 180 | |
| 181 innerHTMLMode = innerHTML_; | |
| 182 container = container_; | |
| 183 tokenizer = tokenizer_; | |
| 184 // TODO(jmesserly): this feels a little strange, but it's needed for CDATA. | |
| 185 // Maybe we should change the API to having the parser create the tokenizer. | |
| 186 tokenizer.parser = this; | |
| 187 | |
| 188 reset(); | |
| 189 | |
| 190 while (true) { | |
| 191 try { | |
| 192 mainLoop(); | |
| 193 break; | |
| 194 } on ReparseException catch (e) { | |
| 195 reset(); | |
| 196 } | |
| 197 } | |
| 198 } | |
| 199 | |
| 200 void reset() { | |
| 201 tree.reset(); | |
| 202 firstStartTag = false; | |
| 203 errors = <ParseError>[]; | |
| 204 // "quirks" / "limited quirks" / "no quirks" | |
| 205 compatMode = "no quirks"; | |
| 206 | |
| 207 if (innerHTMLMode) { | |
| 208 innerHTML = container.toLowerCase(); | |
| 209 | |
| 210 if (cdataElements.indexOf(innerHTML) >= 0) { | |
| 211 tokenizer.state = tokenizer.rcdataState; | |
| 212 } else if (rcdataElements.indexOf(innerHTML) >= 0) { | |
| 213 tokenizer.state = tokenizer.rawtextState; | |
| 214 } else if (innerHTML == 'plaintext') { | |
| 215 tokenizer.state = tokenizer.plaintextState; | |
| 216 } else { | |
| 217 // state already is data state | |
| 218 // tokenizer.state = tokenizer.dataState; | |
| 219 } | |
| 220 phase = _beforeHtmlPhase; | |
| 221 _beforeHtmlPhase.insertHtmlElement(); | |
| 222 resetInsertionMode(); | |
| 223 } else { | |
| 224 innerHTML = null; | |
| 225 phase = _initialPhase; | |
| 226 } | |
| 227 | |
| 228 lastPhase = null; | |
| 229 beforeRCDataPhase = null; | |
| 230 framesetOK = true; | |
| 231 } | |
| 232 | |
| 233 bool isHTMLIntegrationPoint(Node element) { | |
| 234 if (element.tagName == "annotation-xml" && | |
| 235 element.namespace == Namespaces.mathml) { | |
| 236 var enc = element.attributes["encoding"]; | |
| 237 if (enc != null) enc = asciiUpper2Lower(enc); | |
| 238 return enc == "text/html" || enc == "application/xhtml+xml"; | |
| 239 } else { | |
| 240 return htmlIntegrationPointElements.indexOf( | |
| 241 new Pair(element.namespace, element.tagName)) >= 0; | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 bool isMathMLTextIntegrationPoint(Node element) { | |
| 246 return mathmlTextIntegrationPointElements.indexOf( | |
| 247 new Pair(element.namespace, element.tagName)) >= 0; | |
| 248 } | |
| 249 | |
| 250 bool inForeignContent(Token token, int type) { | |
| 251 if (tree.openElements.length == 0) return false; | |
| 252 | |
| 253 var node = tree.openElements.last(); | |
| 254 if (node.namespace == tree.defaultNamespace) return false; | |
| 255 | |
| 256 if (isMathMLTextIntegrationPoint(node)) { | |
| 257 if (type == TokenKind.startTag && | |
| 258 (token as StartTagToken).name != "mglyph" && | |
| 259 (token as StartTagToken).name != "malignmark") { | |
| 260 return false; | |
| 261 } | |
| 262 if (type == TokenKind.characters || type == TokenKind.spaceCharacters) { | |
| 263 return false; | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 if (node.tagName == "annotation-xml" && type == TokenKind.startTag && | |
| 268 (token as StartTagToken).name == "svg") { | |
| 269 return false; | |
| 270 } | |
| 271 | |
| 272 if (isHTMLIntegrationPoint(node)) { | |
| 273 if (type == TokenKind.startTag || | |
| 274 type == TokenKind.characters || | |
| 275 type == TokenKind.spaceCharacters) { | |
| 276 return false; | |
| 277 } | |
| 278 } | |
| 279 | |
| 280 return true; | |
| 281 } | |
| 282 | |
| 283 void mainLoop() { | |
| 284 while (tokenizer.hasNext()) { | |
| 285 var token = normalizeToken(tokenizer.next()); | |
| 286 var newToken = token; | |
| 287 int type; | |
| 288 while (newToken !== null) { | |
| 289 type = newToken.kind; | |
| 290 | |
| 291 // Note: avoid "is" test here, see http://dartbug.com/4795 | |
| 292 if (type == TokenKind.parseError) { | |
| 293 ParseErrorToken error = newToken; | |
| 294 parseError(error.data, error.messageParams); | |
| 295 newToken = null; | |
| 296 } else { | |
| 297 Phase phase_ = phase; | |
| 298 if (inForeignContent(token, type)) { | |
| 299 phase_ = _inForeignContentPhase; | |
| 300 } | |
| 301 | |
| 302 switch (type) { | |
| 303 case TokenKind.characters: | |
| 304 newToken = phase_.processCharacters(newToken); | |
| 305 break; | |
| 306 case TokenKind.spaceCharacters: | |
| 307 newToken = phase_.processSpaceCharacters(newToken); | |
| 308 break; | |
| 309 case TokenKind.startTag: | |
| 310 newToken = phase_.processStartTag(newToken); | |
| 311 break; | |
| 312 case TokenKind.endTag: | |
| 313 newToken = phase_.processEndTag(newToken); | |
| 314 break; | |
| 315 case TokenKind.comment: | |
| 316 newToken = phase_.processComment(newToken); | |
| 317 break; | |
| 318 case TokenKind.doctype: | |
| 319 newToken = phase_.processDoctype(newToken); | |
| 320 break; | |
| 321 } | |
| 322 } | |
| 323 } | |
| 324 | |
| 325 if (token is StartTagToken) { | |
| 326 if (token.selfClosing && !token.selfClosingAcknowledged) { | |
| 327 parseError("non-void-element-with-trailing-solidus", | |
| 328 {"name": token.name}); | |
| 329 } | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 // When the loop finishes it's EOF | |
| 334 var reprocess = true; | |
| 335 var reprocessPhases = []; | |
| 336 while (reprocess) { | |
| 337 reprocessPhases.add(phase); | |
| 338 reprocess = phase.processEOF(); | |
| 339 if (reprocess) { | |
| 340 assert(reprocessPhases.indexOf(phase) == -1); | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 void parseError([String errorcode = "XXX-undefined-error", | |
| 346 Map datavars = const {}]) { | |
| 347 // XXX The idea is to make errorcode mandatory. | |
| 348 var position = tokenizer.stream.position(); | |
| 349 var err = new ParseError(errorcode, position, datavars); | |
| 350 errors.add(err); | |
| 351 if (strict) throw err; | |
| 352 } | |
| 353 | |
| 354 /** HTML5 specific normalizations to the token stream. */ | |
| 355 Token normalizeToken(Token token) { | |
| 356 if (token is StartTagToken) { | |
| 357 token.data = makeDict(token.data); | |
| 358 } | |
| 359 return token; | |
| 360 } | |
| 361 | |
| 362 void adjustMathMLAttributes(StartTagToken token) { | |
| 363 var orig = token.data.remove("definitionurl"); | |
| 364 if (orig != null) { | |
| 365 token.data["definitionURL"] = orig; | |
| 366 } | |
| 367 } | |
| 368 | |
| 369 void adjustSVGAttributes(Token token) { | |
| 370 final replacements = const { | |
| 371 "attributename":"attributeName", | |
| 372 "attributetype":"attributeType", | |
| 373 "basefrequency":"baseFrequency", | |
| 374 "baseprofile":"baseProfile", | |
| 375 "calcmode":"calcMode", | |
| 376 "clippathunits":"clipPathUnits", | |
| 377 "contentscripttype":"contentScriptType", | |
| 378 "contentstyletype":"contentStyleType", | |
| 379 "diffuseconstant":"diffuseConstant", | |
| 380 "edgemode":"edgeMode", | |
| 381 "externalresourcesrequired":"externalResourcesRequired", | |
| 382 "filterres":"filterRes", | |
| 383 "filterunits":"filterUnits", | |
| 384 "glyphref":"glyphRef", | |
| 385 "gradienttransform":"gradientTransform", | |
| 386 "gradientunits":"gradientUnits", | |
| 387 "kernelmatrix":"kernelMatrix", | |
| 388 "kernelunitlength":"kernelUnitLength", | |
| 389 "keypoints":"keyPoints", | |
| 390 "keysplines":"keySplines", | |
| 391 "keytimes":"keyTimes", | |
| 392 "lengthadjust":"lengthAdjust", | |
| 393 "limitingconeangle":"limitingConeAngle", | |
| 394 "markerheight":"markerHeight", | |
| 395 "markerunits":"markerUnits", | |
| 396 "markerwidth":"markerWidth", | |
| 397 "maskcontentunits":"maskContentUnits", | |
| 398 "maskunits":"maskUnits", | |
| 399 "numoctaves":"numOctaves", | |
| 400 "pathlength":"pathLength", | |
| 401 "patterncontentunits":"patternContentUnits", | |
| 402 "patterntransform":"patternTransform", | |
| 403 "patternunits":"patternUnits", | |
| 404 "pointsatx":"pointsAtX", | |
| 405 "pointsaty":"pointsAtY", | |
| 406 "pointsatz":"pointsAtZ", | |
| 407 "preservealpha":"preserveAlpha", | |
| 408 "preserveaspectratio":"preserveAspectRatio", | |
| 409 "primitiveunits":"primitiveUnits", | |
| 410 "refx":"refX", | |
| 411 "refy":"refY", | |
| 412 "repeatcount":"repeatCount", | |
| 413 "repeatdur":"repeatDur", | |
| 414 "requiredextensions":"requiredExtensions", | |
| 415 "requiredfeatures":"requiredFeatures", | |
| 416 "specularconstant":"specularConstant", | |
| 417 "specularexponent":"specularExponent", | |
| 418 "spreadmethod":"spreadMethod", | |
| 419 "startoffset":"startOffset", | |
| 420 "stddeviation":"stdDeviation", | |
| 421 "stitchtiles":"stitchTiles", | |
| 422 "surfacescale":"surfaceScale", | |
| 423 "systemlanguage":"systemLanguage", | |
| 424 "tablevalues":"tableValues", | |
| 425 "targetx":"targetX", | |
| 426 "targety":"targetY", | |
| 427 "textlength":"textLength", | |
| 428 "viewbox":"viewBox", | |
| 429 "viewtarget":"viewTarget", | |
| 430 "xchannelselector":"xChannelSelector", | |
| 431 "ychannelselector":"yChannelSelector", | |
| 432 "zoomandpan":"zoomAndPan" | |
| 433 }; | |
| 434 for (var originalName in token.data.getKeys()) { | |
| 435 var svgName = replacements[originalName]; | |
| 436 if (svgName != null) { | |
| 437 token.data[svgName] = token.data.remove(originalName); | |
| 438 } | |
| 439 } | |
| 440 } | |
| 441 | |
| 442 void adjustForeignAttributes(Token token) { | |
| 443 // TODO(jmesserly): I don't like mixing non-string objects with strings in | |
| 444 // the Node.attributes Map. Is there another solution? | |
| 445 final replacements = const { | |
| 446 "xlink:actuate": const AttributeName("xlink", "actuate", | |
| 447 Namespaces.xlink), | |
| 448 "xlink:arcrole": const AttributeName("xlink", "arcrole", | |
| 449 Namespaces.xlink), | |
| 450 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink), | |
| 451 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink), | |
| 452 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink), | |
| 453 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink), | |
| 454 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink), | |
| 455 "xml:base": const AttributeName("xml", "base", Namespaces.xml), | |
| 456 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml), | |
| 457 "xml:space": const AttributeName("xml", "space", Namespaces.xml), | |
| 458 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns), | |
| 459 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns) | |
| 460 }; | |
| 461 | |
| 462 for (var originalName in token.data.getKeys()) { | |
| 463 var foreignName = replacements[originalName]; | |
| 464 if (foreignName != null) { | |
| 465 token.data[foreignName] = token.data.remove(originalName); | |
| 466 } | |
| 467 } | |
| 468 } | |
| 469 | |
| 470 void resetInsertionMode() { | |
| 471 // The name of this method is mostly historical. (It's also used in the | |
| 472 // specification.) | |
| 473 for (Node node in reversed(tree.openElements)) { | |
| 474 var nodeName = node.tagName; | |
| 475 bool last = node == tree.openElements[0]; | |
| 476 if (last) { | |
| 477 assert(innerHTMLMode); | |
| 478 nodeName = innerHTML; | |
| 479 } | |
| 480 // Check for conditions that should only happen in the innerHTML | |
| 481 // case | |
| 482 switch (nodeName) { | |
| 483 case "select": case "colgroup": case "head": case "html": | |
| 484 assert(innerHTMLMode); | |
| 485 break; | |
| 486 } | |
| 487 if (!last && node.namespace != tree.defaultNamespace) { | |
| 488 continue; | |
| 489 } | |
| 490 switch (nodeName) { | |
| 491 case "select": phase = _inSelectPhase; return; | |
| 492 case "td": phase = _inCellPhase; return; | |
| 493 case "th": phase = _inCellPhase; return; | |
| 494 case "tr": phase = _inRowPhase; return; | |
| 495 case "tbody": phase = _inTableBodyPhase; return; | |
| 496 case "thead": phase = _inTableBodyPhase; return; | |
| 497 case "tfoot": phase = _inTableBodyPhase; return; | |
| 498 case "caption": phase = _inCaptionPhase; return; | |
| 499 case "colgroup": phase = _inColumnGroupPhase; return; | |
| 500 case "table": phase = _inTablePhase; return; | |
| 501 case "head": phase = _inBodyPhase; return; | |
| 502 case "body": phase = _inBodyPhase; return; | |
| 503 case "frameset": phase = _inFramesetPhase; return; | |
| 504 case "html": phase = _beforeHeadPhase; return; | |
| 505 } | |
| 506 } | |
| 507 phase = _inBodyPhase; | |
| 508 } | |
| 509 | |
| 510 /** | |
| 511 * Generic RCDATA/RAWTEXT Parsing algorithm | |
| 512 * [contentType] - RCDATA or RAWTEXT | |
| 513 */ | |
| 514 void parseRCDataRawtext(Token token, String contentType) { | |
| 515 assert(contentType == "RAWTEXT" || contentType == "RCDATA"); | |
| 516 | |
| 517 var element = tree.insertElement(token); | |
| 518 | |
| 519 if (contentType == "RAWTEXT") { | |
| 520 tokenizer.state = tokenizer.rawtextState; | |
| 521 } else { | |
| 522 tokenizer.state = tokenizer.rcdataState; | |
| 523 } | |
| 524 | |
| 525 originalPhase = phase; | |
| 526 phase = _textPhase; | |
| 527 } | |
| 528 } | |
| 529 | |
| 530 | |
| 531 /** Base class for helper object that implements each phase of processing. */ | |
| 532 class Phase { | |
| 533 // Order should be (they can be omitted): | |
| 534 // * EOF | |
| 535 // * Comment | |
| 536 // * Doctype | |
| 537 // * SpaceCharacters | |
| 538 // * Characters | |
| 539 // * StartTag | |
| 540 // - startTag* methods | |
| 541 // * EndTag | |
| 542 // - endTag* methods | |
| 543 | |
| 544 final HTMLParser parser; | |
| 545 | |
| 546 final TreeBuilder tree; | |
| 547 | |
| 548 Phase(HTMLParser parser) : parser = parser, tree = parser.tree; | |
| 549 | |
| 550 bool processEOF() { | |
| 551 throw const NotImplementedException(); | |
| 552 } | |
| 553 | |
| 554 Token processComment(CommentToken token) { | |
| 555 // For most phases the following is correct. Where it's not it will be | |
| 556 // overridden. | |
| 557 tree.insertComment(token, tree.openElements.last()); | |
| 558 } | |
| 559 | |
| 560 Token processDoctype(DoctypeToken token) { | |
| 561 parser.parseError("unexpected-doctype"); | |
| 562 } | |
| 563 | |
| 564 Token processCharacters(CharactersToken token) { | |
| 565 tree.insertText(token.data); | |
| 566 } | |
| 567 | |
| 568 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 569 tree.insertText(token.data); | |
| 570 } | |
| 571 | |
| 572 Token processStartTag(StartTagToken token) { | |
| 573 throw const NotImplementedException(); | |
| 574 } | |
| 575 | |
| 576 Token startTagHtml(StartTagToken token) { | |
| 577 if (parser.firstStartTag == false && token.name == "html") { | |
| 578 parser.parseError("non-html-root"); | |
| 579 } | |
| 580 // XXX Need a check here to see if the first start tag token emitted is | |
| 581 // this token... If it's not, invoke parser.parseError(). | |
| 582 token.data.forEach((attr, value) { | |
| 583 tree.openElements[0].attributes.putIfAbsent(attr, () => value); | |
| 584 }); | |
| 585 parser.firstStartTag = false; | |
| 586 } | |
| 587 | |
| 588 Token processEndTag(EndTagToken token) { | |
| 589 throw const NotImplementedException(); | |
| 590 } | |
| 591 | |
| 592 /** Helper method for popping openElements. */ | |
| 593 void popOpenElementsUntil(String name) { | |
| 594 var node = tree.openElements.removeLast(); | |
| 595 while (node.tagName != name) { | |
| 596 node = tree.openElements.removeLast(); | |
| 597 } | |
| 598 } | |
| 599 } | |
| 600 | |
| 601 class InitialPhase extends Phase { | |
| 602 InitialPhase(parser) : super(parser); | |
| 603 | |
| 604 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 605 } | |
| 606 | |
| 607 Token processComment(CommentToken token) { | |
| 608 tree.insertComment(token, tree.document); | |
| 609 } | |
| 610 | |
| 611 Token processDoctype(DoctypeToken token) { | |
| 612 var name = token.name; | |
| 613 String publicId = token.publicId; | |
| 614 var systemId = token.systemId; | |
| 615 var correct = token.correct; | |
| 616 | |
| 617 if ((name != "html" || publicId != null || | |
| 618 systemId != null && systemId != "about:legacy-compat")) { | |
| 619 parser.parseError("unknown-doctype"); | |
| 620 } | |
| 621 | |
| 622 if (publicId === null) { | |
| 623 publicId = ""; | |
| 624 } | |
| 625 | |
| 626 tree.insertDoctype(token); | |
| 627 | |
| 628 if (publicId != "") { | |
| 629 publicId = asciiUpper2Lower(publicId); | |
| 630 } | |
| 631 | |
| 632 if (!correct || token.name != "html" | |
| 633 || startsWithAny(publicId, const [ | |
| 634 "+//silmaril//dtd html pro v0r11 19970101//", | |
| 635 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | |
| 636 "-//as//dtd html 3.0 aswedit + extensions//", | |
| 637 "-//ietf//dtd html 2.0 level 1//", | |
| 638 "-//ietf//dtd html 2.0 level 2//", | |
| 639 "-//ietf//dtd html 2.0 strict level 1//", | |
| 640 "-//ietf//dtd html 2.0 strict level 2//", | |
| 641 "-//ietf//dtd html 2.0 strict//", | |
| 642 "-//ietf//dtd html 2.0//", | |
| 643 "-//ietf//dtd html 2.1e//", | |
| 644 "-//ietf//dtd html 3.0//", | |
| 645 "-//ietf//dtd html 3.2 final//", | |
| 646 "-//ietf//dtd html 3.2//", | |
| 647 "-//ietf//dtd html 3//", | |
| 648 "-//ietf//dtd html level 0//", | |
| 649 "-//ietf//dtd html level 1//", | |
| 650 "-//ietf//dtd html level 2//", | |
| 651 "-//ietf//dtd html level 3//", | |
| 652 "-//ietf//dtd html strict level 0//", | |
| 653 "-//ietf//dtd html strict level 1//", | |
| 654 "-//ietf//dtd html strict level 2//", | |
| 655 "-//ietf//dtd html strict level 3//", | |
| 656 "-//ietf//dtd html strict//", | |
| 657 "-//ietf//dtd html//", | |
| 658 "-//metrius//dtd metrius presentational//", | |
| 659 "-//microsoft//dtd internet explorer 2.0 html strict//", | |
| 660 "-//microsoft//dtd internet explorer 2.0 html//", | |
| 661 "-//microsoft//dtd internet explorer 2.0 tables//", | |
| 662 "-//microsoft//dtd internet explorer 3.0 html strict//", | |
| 663 "-//microsoft//dtd internet explorer 3.0 html//", | |
| 664 "-//microsoft//dtd internet explorer 3.0 tables//", | |
| 665 "-//netscape comm. corp.//dtd html//", | |
| 666 "-//netscape comm. corp.//dtd strict html//", | |
| 667 "-//o'reilly and associates//dtd html 2.0//", | |
| 668 "-//o'reilly and associates//dtd html extended 1.0//", | |
| 669 "-//o'reilly and associates//dtd html extended relaxed 1.0//", | |
| 670 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h
tml 4.0//", | |
| 671 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//
", | |
| 672 "-//spyglass//dtd html 2.0 extended//", | |
| 673 "-//sq//dtd html 2.0 hotmetal + extensions//", | |
| 674 "-//sun microsystems corp.//dtd hotjava html//", | |
| 675 "-//sun microsystems corp.//dtd hotjava strict html//", | |
| 676 "-//w3c//dtd html 3 1995-03-24//", | |
| 677 "-//w3c//dtd html 3.2 draft//", | |
| 678 "-//w3c//dtd html 3.2 final//", | |
| 679 "-//w3c//dtd html 3.2//", | |
| 680 "-//w3c//dtd html 3.2s draft//", | |
| 681 "-//w3c//dtd html 4.0 frameset//", | |
| 682 "-//w3c//dtd html 4.0 transitional//", | |
| 683 "-//w3c//dtd html experimental 19960712//", | |
| 684 "-//w3c//dtd html experimental 970421//", | |
| 685 "-//w3c//dtd w3 html//", | |
| 686 "-//w3o//dtd w3 html 3.0//", | |
| 687 "-//webtechs//dtd mozilla html 2.0//", | |
| 688 "-//webtechs//dtd mozilla html//"]) | |
| 689 || const ["-//w3o//dtd w3 html strict 3.0//en//", | |
| 690 "-/w3c/dtd html 4.0 transitional/en", | |
| 691 "html"].indexOf(publicId) >= 0 | |
| 692 || startsWithAny(publicId, const [ | |
| 693 "-//w3c//dtd html 4.01 frameset//", | |
| 694 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null | |
| 695 || systemId != null && systemId.toLowerCase() == | |
| 696 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { | |
| 697 | |
| 698 parser.compatMode = "quirks"; | |
| 699 } else if (startsWithAny(publicId, const [ | |
| 700 "-//w3c//dtd xhtml 1.0 frameset//", | |
| 701 "-//w3c//dtd xhtml 1.0 transitional//"]) | |
| 702 || startsWithAny(publicId, const [ | |
| 703 "-//w3c//dtd html 4.01 frameset//", | |
| 704 "-//w3c//dtd html 4.01 transitional//"]) && | |
| 705 systemId != null) { | |
| 706 parser.compatMode = "limited quirks"; | |
| 707 } | |
| 708 parser.phase = parser._beforeHtmlPhase; | |
| 709 } | |
| 710 | |
| 711 void anythingElse() { | |
| 712 parser.compatMode = "quirks"; | |
| 713 parser.phase = parser._beforeHtmlPhase; | |
| 714 } | |
| 715 | |
| 716 Token processCharacters(CharactersToken token) { | |
| 717 parser.parseError("expected-doctype-but-got-chars"); | |
| 718 anythingElse(); | |
| 719 return token; | |
| 720 } | |
| 721 | |
| 722 Token processStartTag(StartTagToken token) { | |
| 723 parser.parseError("expected-doctype-but-got-start-tag", | |
| 724 {"name": token.name}); | |
| 725 anythingElse(); | |
| 726 return token; | |
| 727 } | |
| 728 | |
| 729 Token processEndTag(EndTagToken token) { | |
| 730 parser.parseError("expected-doctype-but-got-end-tag", | |
| 731 {"name": token.name}); | |
| 732 anythingElse(); | |
| 733 return token; | |
| 734 } | |
| 735 | |
| 736 bool processEOF() { | |
| 737 parser.parseError("expected-doctype-but-got-eof"); | |
| 738 anythingElse(); | |
| 739 return true; | |
| 740 } | |
| 741 } | |
| 742 | |
| 743 | |
| 744 class BeforeHtmlPhase extends Phase { | |
| 745 BeforeHtmlPhase(parser) : super(parser); | |
| 746 | |
| 747 // helper methods | |
| 748 void insertHtmlElement() { | |
| 749 tree.insertRoot(new StartTagToken("html", data: {})); | |
| 750 parser.phase = parser._beforeHeadPhase; | |
| 751 } | |
| 752 | |
| 753 // other | |
| 754 bool processEOF() { | |
| 755 insertHtmlElement(); | |
| 756 return true; | |
| 757 } | |
| 758 | |
| 759 Token processComment(CommentToken token) { | |
| 760 tree.insertComment(token, tree.document); | |
| 761 } | |
| 762 | |
| 763 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 764 } | |
| 765 | |
| 766 Token processCharacters(CharactersToken token) { | |
| 767 insertHtmlElement(); | |
| 768 return token; | |
| 769 } | |
| 770 | |
| 771 Token processStartTag(StartTagToken token) { | |
| 772 if (token.name == "html") { | |
| 773 parser.firstStartTag = true; | |
| 774 } | |
| 775 insertHtmlElement(); | |
| 776 return token; | |
| 777 } | |
| 778 | |
| 779 Token processEndTag(EndTagToken token) { | |
| 780 switch (token.name) { | |
| 781 case "head": case "body": case "html": case "br": | |
| 782 insertHtmlElement(); | |
| 783 return token; | |
| 784 default: | |
| 785 parser.parseError("unexpected-end-tag-before-html", | |
| 786 {"name": token.name}); | |
| 787 return null; | |
| 788 } | |
| 789 } | |
| 790 } | |
| 791 | |
| 792 | |
| 793 class BeforeHeadPhase extends Phase { | |
| 794 BeforeHeadPhase(parser) : super(parser); | |
| 795 | |
| 796 processStartTag(StartTagToken token) { | |
| 797 switch (token.name) { | |
| 798 case 'html': return startTagHtml(token); | |
| 799 case 'head': return startTagHead(token); | |
| 800 default: return startTagOther(token); | |
| 801 } | |
| 802 } | |
| 803 | |
| 804 processEndTag(EndTagToken token) { | |
| 805 switch (token.name) { | |
| 806 case "head": case "body": case "html": case "br": | |
| 807 return endTagImplyHead(token); | |
| 808 default: return endTagOther(token); | |
| 809 } | |
| 810 } | |
| 811 | |
| 812 bool processEOF() { | |
| 813 startTagHead(new StartTagToken("head", data: {})); | |
| 814 return true; | |
| 815 } | |
| 816 | |
| 817 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 818 } | |
| 819 | |
| 820 Token processCharacters(CharactersToken token) { | |
| 821 startTagHead(new StartTagToken("head", data: {})); | |
| 822 return token; | |
| 823 } | |
| 824 | |
| 825 Token startTagHtml(StartTagToken token) { | |
| 826 return parser._inBodyPhase.processStartTag(token); | |
| 827 } | |
| 828 | |
| 829 void startTagHead(StartTagToken token) { | |
| 830 tree.insertElement(token); | |
| 831 tree.headPointer = tree.openElements.last(); | |
| 832 parser.phase = parser._inHeadPhase; | |
| 833 } | |
| 834 | |
| 835 Token startTagOther(StartTagToken token) { | |
| 836 startTagHead(new StartTagToken("head", data: {})); | |
| 837 return token; | |
| 838 } | |
| 839 | |
| 840 Token endTagImplyHead(EndTagToken token) { | |
| 841 startTagHead(new StartTagToken("head", data: {})); | |
| 842 return token; | |
| 843 } | |
| 844 | |
| 845 void endTagOther(EndTagToken token) { | |
| 846 parser.parseError("end-tag-after-implied-root", | |
| 847 {"name": token.name}); | |
| 848 } | |
| 849 } | |
| 850 | |
| 851 class InHeadPhase extends Phase { | |
| 852 InHeadPhase(parser) : super(parser); | |
| 853 | |
| 854 processStartTag(StartTagToken token) { | |
| 855 switch (token.name) { | |
| 856 case "html": return startTagHtml(token); | |
| 857 case "title": return startTagTitle(token); | |
| 858 case "noscript": case "noframes": case "style": | |
| 859 return startTagNoScriptNoFramesStyle(token); | |
| 860 case "script": return startTagScript(token); | |
| 861 case "base": case "basefont": case "bgsound": case "command": case "link": | |
| 862 return startTagBaseLinkCommand(token); | |
| 863 case "meta": return startTagMeta(token); | |
| 864 case "head": return startTagHead(token); | |
| 865 default: return startTagOther(token); | |
| 866 } | |
| 867 } | |
| 868 | |
| 869 processEndTag(EndTagToken token) { | |
| 870 switch (token.name) { | |
| 871 case "head": return endTagHead(token); | |
| 872 case "br": case "html": case "body": return endTagHtmlBodyBr(token); | |
| 873 default: return endTagOther(token); | |
| 874 } | |
| 875 } | |
| 876 | |
| 877 // the real thing | |
| 878 bool processEOF() { | |
| 879 anythingElse(); | |
| 880 return true; | |
| 881 } | |
| 882 | |
| 883 Token processCharacters(CharactersToken token) { | |
| 884 anythingElse(); | |
| 885 return token; | |
| 886 } | |
| 887 | |
| 888 Token startTagHtml(StartTagToken token) { | |
| 889 return parser._inBodyPhase.processStartTag(token); | |
| 890 } | |
| 891 | |
| 892 void startTagHead(StartTagToken token) { | |
| 893 parser.parseError("two-heads-are-not-better-than-one"); | |
| 894 } | |
| 895 | |
| 896 void startTagBaseLinkCommand(StartTagToken token) { | |
| 897 tree.insertElement(token); | |
| 898 tree.openElements.removeLast(); | |
| 899 token.selfClosingAcknowledged = true; | |
| 900 } | |
| 901 | |
| 902 void startTagMeta(StartTagToken token) { | |
| 903 tree.insertElement(token); | |
| 904 tree.openElements.removeLast(); | |
| 905 token.selfClosingAcknowledged = true; | |
| 906 | |
| 907 var attributes = token.data; | |
| 908 if (!parser.tokenizer.stream.charEncodingCertain) { | |
| 909 var charset = attributes["charset"]; | |
| 910 var content = attributes["content"]; | |
| 911 if (charset != null) { | |
| 912 parser.tokenizer.stream.changeEncoding(charset); | |
| 913 } else if (content != null) { | |
| 914 var data = new EncodingBytes(content); | |
| 915 var codec = new ContentAttrParser(data).parse(); | |
| 916 parser.tokenizer.stream.changeEncoding(codec); | |
| 917 } | |
| 918 } | |
| 919 } | |
| 920 | |
| 921 void startTagTitle(StartTagToken token) { | |
| 922 parser.parseRCDataRawtext(token, "RCDATA"); | |
| 923 } | |
| 924 | |
| 925 void startTagNoScriptNoFramesStyle(StartTagToken token) { | |
| 926 // Need to decide whether to implement the scripting-disabled case | |
| 927 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
| 928 } | |
| 929 | |
| 930 void startTagScript(StartTagToken token) { | |
| 931 tree.insertElement(token); | |
| 932 parser.tokenizer.state = parser.tokenizer.scriptDataState; | |
| 933 parser.originalPhase = parser.phase; | |
| 934 parser.phase = parser._textPhase; | |
| 935 } | |
| 936 | |
| 937 Token startTagOther(StartTagToken token) { | |
| 938 anythingElse(); | |
| 939 return token; | |
| 940 } | |
| 941 | |
| 942 void endTagHead(EndTagToken token) { | |
| 943 var node = parser.tree.openElements.removeLast(); | |
| 944 assert(node.tagName == "head"); | |
| 945 parser.phase = parser._afterHeadPhase; | |
| 946 } | |
| 947 | |
| 948 Token endTagHtmlBodyBr(EndTagToken token) { | |
| 949 anythingElse(); | |
| 950 return token; | |
| 951 } | |
| 952 | |
| 953 void endTagOther(EndTagToken token) { | |
| 954 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 955 } | |
| 956 | |
| 957 void anythingElse() { | |
| 958 endTagHead(new EndTagToken("head", data: {})); | |
| 959 } | |
| 960 } | |
| 961 | |
| 962 | |
| 963 // XXX If we implement a parser for which scripting is disabled we need to | |
| 964 // implement this phase. | |
| 965 // | |
| 966 // class InHeadNoScriptPhase extends Phase { | |
| 967 | |
| 968 class AfterHeadPhase extends Phase { | |
| 969 AfterHeadPhase(parser) : super(parser); | |
| 970 | |
| 971 processStartTag(StartTagToken token) { | |
| 972 switch (token.name) { | |
| 973 case "html": return startTagHtml(token); | |
| 974 case "body": return startTagBody(token); | |
| 975 case "frameset": return startTagFrameset(token); | |
| 976 case "base": case "basefont": case "bgsound": case "link": case "meta": | |
| 977 case "noframes": case "script": case "style": case "title": | |
| 978 return startTagFromHead(token); | |
| 979 case "head": return startTagHead(token); | |
| 980 default: return startTagOther(token); | |
| 981 } | |
| 982 } | |
| 983 | |
| 984 processEndTag(EndTagToken token) { | |
| 985 switch (token.name) { | |
| 986 case "body": case "html": case "br": | |
| 987 return endTagHtmlBodyBr(token); | |
| 988 default: return endTagOther(token); | |
| 989 } | |
| 990 } | |
| 991 | |
| 992 bool processEOF() { | |
| 993 anythingElse(); | |
| 994 return true; | |
| 995 } | |
| 996 | |
| 997 Token processCharacters(CharactersToken token) { | |
| 998 anythingElse(); | |
| 999 return token; | |
| 1000 } | |
| 1001 | |
| 1002 Token startTagHtml(StartTagToken token) { | |
| 1003 return parser._inBodyPhase.processStartTag(token); | |
| 1004 } | |
| 1005 | |
| 1006 void startTagBody(StartTagToken token) { | |
| 1007 parser.framesetOK = false; | |
| 1008 tree.insertElement(token); | |
| 1009 parser.phase = parser._inBodyPhase; | |
| 1010 } | |
| 1011 | |
| 1012 void startTagFrameset(StartTagToken token) { | |
| 1013 tree.insertElement(token); | |
| 1014 parser.phase = parser._inFramesetPhase; | |
| 1015 } | |
| 1016 | |
| 1017 void startTagFromHead(StartTagToken token) { | |
| 1018 parser.parseError("unexpected-start-tag-out-of-my-head", | |
| 1019 {"name": token.name}); | |
| 1020 tree.openElements.add(tree.headPointer); | |
| 1021 parser._inHeadPhase.processStartTag(token); | |
| 1022 for (Node node in reversed(tree.openElements)) { | |
| 1023 if (node.tagName == "head") { | |
| 1024 removeFromList(tree.openElements, node); | |
| 1025 break; | |
| 1026 } | |
| 1027 } | |
| 1028 } | |
| 1029 | |
| 1030 void startTagHead(StartTagToken token) { | |
| 1031 parser.parseError("unexpected-start-tag", {"name":token.name}); | |
| 1032 } | |
| 1033 | |
| 1034 Token startTagOther(StartTagToken token) { | |
| 1035 anythingElse(); | |
| 1036 return token; | |
| 1037 } | |
| 1038 | |
| 1039 Token endTagHtmlBodyBr(EndTagToken token) { | |
| 1040 anythingElse(); | |
| 1041 return token; | |
| 1042 } | |
| 1043 | |
| 1044 void endTagOther(EndTagToken token) { | |
| 1045 parser.parseError("unexpected-end-tag", {"name":token.name}); | |
| 1046 } | |
| 1047 | |
| 1048 void anythingElse() { | |
| 1049 tree.insertElement(new StartTagToken("body", data: {})); | |
| 1050 parser.phase = parser._inBodyPhase; | |
| 1051 parser.framesetOK = true; | |
| 1052 } | |
| 1053 } | |
| 1054 | |
| 1055 typedef Token TokenProccessor(Token token); | |
| 1056 | |
| 1057 class InBodyPhase extends Phase { | |
| 1058 TokenProccessor processSpaceCharactersFunc; | |
| 1059 | |
| 1060 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody | |
| 1061 // the really-really-really-very crazy mode | |
| 1062 InBodyPhase(parser) : super(parser) { | |
| 1063 //Keep a ref to this for special handling of whitespace in <pre> | |
| 1064 processSpaceCharactersFunc = processSpaceCharactersNonPre; | |
| 1065 } | |
| 1066 | |
| 1067 processStartTag(StartTagToken token) { | |
| 1068 switch (token.name) { | |
| 1069 case "html": | |
| 1070 return startTagHtml(token); | |
| 1071 case "base": case "basefont": case "bgsound": case "command": case "link": | |
| 1072 case "meta": case "noframes": case "script": case "style": case "title": | |
| 1073 return startTagProcessInHead(token); | |
| 1074 case "body": | |
| 1075 return startTagBody(token); | |
| 1076 case "frameset": | |
| 1077 return startTagFrameset(token); | |
| 1078 case "address": case "article": case "aside": case "blockquote": | |
| 1079 case "center": case "details": case "details": case "dir": case "div": | |
| 1080 case "dl": case "fieldset": case "figcaption": case "figure": | |
| 1081 case "footer": case "header": case "hgroup": case "menu": case "nav": | |
| 1082 case "ol": case "p": case "section": case "summary": case "ul": | |
| 1083 return startTagCloseP(token); | |
| 1084 // headingElements | |
| 1085 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": | |
| 1086 return startTagHeading(token); | |
| 1087 case "pre": case "listing": | |
| 1088 return startTagPreListing(token); | |
| 1089 case "form": | |
| 1090 return startTagForm(token); | |
| 1091 case "li": case "dd": case "dt": | |
| 1092 return startTagListItem(token); | |
| 1093 case "plaintext": | |
| 1094 return startTagPlaintext(token); | |
| 1095 case "a": return startTagA(token); | |
| 1096 case "b": case "big": case "code": case "em": case "font": case "i": | |
| 1097 case "s": case "small": case "strike": case "strong": case "tt": case "u": | |
| 1098 return startTagFormatting(token); | |
| 1099 case "nobr": | |
| 1100 return startTagNobr(token); | |
| 1101 case "button": | |
| 1102 return startTagButton(token); | |
| 1103 case "applet": case "marquee": case "object": | |
| 1104 return startTagAppletMarqueeObject(token); | |
| 1105 case "xmp": | |
| 1106 return startTagXmp(token); | |
| 1107 case "table": | |
| 1108 return startTagTable(token); | |
| 1109 case "area": case "br": case "embed": case "img": case "keygen": | |
| 1110 case "wbr": | |
| 1111 return startTagVoidFormatting(token); | |
| 1112 case "param": case "source": case "track": | |
| 1113 return startTagParamSource(token); | |
| 1114 case "input": | |
| 1115 return startTagInput(token); | |
| 1116 case "hr": | |
| 1117 return startTagHr(token); | |
| 1118 case "image": | |
| 1119 return startTagImage(token); | |
| 1120 case "isindex": | |
| 1121 return startTagIsIndex(token); | |
| 1122 case "textarea": | |
| 1123 return startTagTextarea(token); | |
| 1124 case "iframe": | |
| 1125 return startTagIFrame(token); | |
| 1126 case "noembed": case "noframes": case "noscript": | |
| 1127 return startTagRawtext(token); | |
| 1128 case "select": | |
| 1129 return startTagSelect(token); | |
| 1130 case "rp": case "rt": | |
| 1131 return startTagRpRt(token); | |
| 1132 case "option": case "optgroup": | |
| 1133 return startTagOpt(token); | |
| 1134 case "math": | |
| 1135 return startTagMath(token); | |
| 1136 case "svg": | |
| 1137 return startTagSvg(token); | |
| 1138 case "caption": case "col": case "colgroup": case "frame": case "head": | |
| 1139 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": | |
| 1140 return startTagMisplaced(token); | |
| 1141 default: return startTagOther(token); | |
| 1142 } | |
| 1143 } | |
| 1144 | |
| 1145 processEndTag(EndTagToken token) { | |
| 1146 switch (token.name) { | |
| 1147 case "body": return endTagBody(token); | |
| 1148 case "html": return endTagHtml(token); | |
| 1149 case "address": case "article": case "aside": case "blockquote": | |
| 1150 case "center": case "details": case "dir": case "div": case "dl": | |
| 1151 case "fieldset": case "figcaption": case "figure": case "footer": | |
| 1152 case "header": case "hgroup": case "listing": case "menu": case "nav": | |
| 1153 case "ol": case "pre": case "section": case "summary": case "ul": | |
| 1154 return endTagBlock(token); | |
| 1155 case "form": return endTagForm(token); | |
| 1156 case "p": return endTagP(token); | |
| 1157 case "dd": case "dt": case "li": return endTagListItem(token); | |
| 1158 // headingElements | |
| 1159 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": | |
| 1160 return endTagHeading(token); | |
| 1161 case "a": case "b": case "big": case "code": case "em": case "font": | |
| 1162 case "i": case "nobr": case "s": case "small": case "strike": | |
| 1163 case "strong": case "tt": case "u": | |
| 1164 return endTagFormatting(token); | |
| 1165 case "applet": case "marquee": case "object": | |
| 1166 return endTagAppletMarqueeObject(token); | |
| 1167 case "br": return endTagBr(token); | |
| 1168 default: return endTagOther(token); | |
| 1169 } | |
| 1170 } | |
| 1171 | |
| 1172 bool isMatchingFormattingElement(Node node1, Node node2) { | |
| 1173 if (node1.tagName != node2.tagName || node1.namespace != node2.namespace) { | |
| 1174 return false; | |
| 1175 } else if (node1.attributes.length != node2.attributes.length) { | |
| 1176 return false; | |
| 1177 } else { | |
| 1178 for (var key in node1.attributes.getKeys()) { | |
| 1179 if (node1.attributes[key] != node2.attributes[key]) { | |
| 1180 return false; | |
| 1181 } | |
| 1182 } | |
| 1183 } | |
| 1184 return true; | |
| 1185 } | |
| 1186 | |
| 1187 // helper | |
| 1188 void addFormattingElement(token) { | |
| 1189 tree.insertElement(token); | |
| 1190 var element = tree.openElements.last(); | |
| 1191 | |
| 1192 var matchingElements = []; | |
| 1193 for (Node node in reversed(tree.activeFormattingElements)) { | |
| 1194 if (node === Marker) { | |
| 1195 break; | |
| 1196 } else if (isMatchingFormattingElement(node, element)) { | |
| 1197 matchingElements.add(node); | |
| 1198 } | |
| 1199 } | |
| 1200 | |
| 1201 assert(matchingElements.length <= 3); | |
| 1202 if (matchingElements.length == 3) { | |
| 1203 removeFromList(tree.activeFormattingElements, matchingElements.last()); | |
| 1204 } | |
| 1205 tree.activeFormattingElements.add(element); | |
| 1206 } | |
| 1207 | |
| 1208 // the real deal | |
| 1209 bool processEOF() { | |
| 1210 for (Node node in reversed(tree.openElements)) { | |
| 1211 switch (node.tagName) { | |
| 1212 case "dd": case "dt": case "li": case "p": case "tbody": case "td": | |
| 1213 case "tfoot": case "th": case "thead": case "tr": case "body": | |
| 1214 case "html": | |
| 1215 continue; | |
| 1216 } | |
| 1217 parser.parseError("expected-closing-tag-but-got-eof"); | |
| 1218 break; | |
| 1219 } | |
| 1220 //Stop parsing | |
| 1221 return false; | |
| 1222 } | |
| 1223 | |
| 1224 Token processSpaceCharactersDropNewline(token) { | |
| 1225 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | |
| 1226 // want to drop leading newlines | |
| 1227 var data = token.data; | |
| 1228 processSpaceCharactersFunc = processSpaceCharactersNonPre; | |
| 1229 if (data.startsWith("\n")) { | |
| 1230 var lastOpen = tree.openElements.last(); | |
| 1231 if (const ["pre", "listing", "textarea"].indexOf(lastOpen.tagName) >= 0 | |
| 1232 && !lastOpen.hasContent()) { | |
| 1233 data = data.substring(1); | |
| 1234 } | |
| 1235 } | |
| 1236 if (data.length > 0) { | |
| 1237 tree.reconstructActiveFormattingElements(); | |
| 1238 tree.insertText(data); | |
| 1239 } | |
| 1240 } | |
| 1241 | |
| 1242 Token processCharacters(CharactersToken token) { | |
| 1243 if (token.data == "\u0000") { | |
| 1244 //The tokenizer should always emit null on its own | |
| 1245 return null; | |
| 1246 } | |
| 1247 tree.reconstructActiveFormattingElements(); | |
| 1248 tree.insertText(token.data); | |
| 1249 if (parser.framesetOK && !allWhitespace(token.data)) { | |
| 1250 parser.framesetOK = false; | |
| 1251 } | |
| 1252 } | |
| 1253 | |
| 1254 Token processSpaceCharactersNonPre(token) { | |
| 1255 tree.reconstructActiveFormattingElements(); | |
| 1256 tree.insertText(token.data); | |
| 1257 } | |
| 1258 | |
| 1259 Token processSpaceCharacters(token) => processSpaceCharactersFunc(token); | |
| 1260 | |
| 1261 Token startTagProcessInHead(StartTagToken token) { | |
| 1262 return parser._inHeadPhase.processStartTag(token); | |
| 1263 } | |
| 1264 | |
| 1265 void startTagBody(StartTagToken token) { | |
| 1266 parser.parseError("unexpected-start-tag", {"name": "body"}); | |
| 1267 if (tree.openElements.length == 1 | |
| 1268 || tree.openElements[1].tagName != "body") { | |
| 1269 assert(parser.innerHTMLMode); | |
| 1270 } else { | |
| 1271 parser.framesetOK = false; | |
| 1272 token.data.forEach((attr, value) { | |
| 1273 tree.openElements[1].attributes.putIfAbsent(attr, () => value); | |
| 1274 }); | |
| 1275 } | |
| 1276 } | |
| 1277 | |
| 1278 void startTagFrameset(StartTagToken token) { | |
| 1279 parser.parseError("unexpected-start-tag", {"name": "frameset"}); | |
| 1280 if ((tree.openElements.length == 1 || | |
| 1281 tree.openElements[1].tagName != "body")) { | |
| 1282 assert(parser.innerHTMLMode); | |
| 1283 } else if (parser.framesetOK) { | |
| 1284 if (tree.openElements[1].parent != null) { | |
| 1285 tree.openElements[1].parent.$dom_removeChild(tree.openElements[1]); | |
| 1286 } | |
| 1287 while (tree.openElements.last().tagName != "html") { | |
| 1288 tree.openElements.removeLast(); | |
| 1289 } | |
| 1290 tree.insertElement(token); | |
| 1291 parser.phase = parser._inFramesetPhase; | |
| 1292 } | |
| 1293 } | |
| 1294 | |
| 1295 void startTagCloseP(StartTagToken token) { | |
| 1296 if (tree.elementInScope("p", variant: "button")) { | |
| 1297 endTagP(new EndTagToken("p", data: {})); | |
| 1298 } | |
| 1299 tree.insertElement(token); | |
| 1300 } | |
| 1301 | |
| 1302 void startTagPreListing(StartTagToken token) { | |
| 1303 if (tree.elementInScope("p", variant: "button")) { | |
| 1304 endTagP(new EndTagToken("p", data: {})); | |
| 1305 } | |
| 1306 tree.insertElement(token); | |
| 1307 parser.framesetOK = false; | |
| 1308 processSpaceCharactersFunc = processSpaceCharactersDropNewline; | |
| 1309 } | |
| 1310 | |
| 1311 void startTagForm(StartTagToken token) { | |
| 1312 if (tree.formPointer != null) { | |
| 1313 parser.parseError("unexpected-start-tag", {"name": "form"}); | |
| 1314 } else { | |
| 1315 if (tree.elementInScope("p", variant: "button")) { | |
| 1316 endTagP(new EndTagToken("p", data: {})); | |
| 1317 } | |
| 1318 tree.insertElement(token); | |
| 1319 tree.formPointer = tree.openElements.last(); | |
| 1320 } | |
| 1321 } | |
| 1322 | |
| 1323 void startTagListItem(StartTagToken token) { | |
| 1324 parser.framesetOK = false; | |
| 1325 | |
| 1326 final stopNamesMap = const {"li": const ["li"], | |
| 1327 "dt": const ["dt", "dd"], | |
| 1328 "dd": const ["dt", "dd"]}; | |
| 1329 var stopNames = stopNamesMap[token.name]; | |
| 1330 for (Node node in reversed(tree.openElements)) { | |
| 1331 if (stopNames.indexOf(node.tagName) >= 0) { | |
| 1332 parser.phase.processEndTag(new EndTagToken(node.tagName, data: {})); | |
| 1333 break; | |
| 1334 } | |
| 1335 if (specialElements.indexOf(node.nameTuple) >= 0 && | |
| 1336 const ["address", "div", "p"].indexOf(node.tagName) == -1) { | |
| 1337 break; | |
| 1338 } | |
| 1339 } | |
| 1340 | |
| 1341 if (tree.elementInScope("p", variant: "button")) { | |
| 1342 parser.phase.processEndTag(new EndTagToken("p", data: {})); | |
| 1343 } | |
| 1344 | |
| 1345 tree.insertElement(token); | |
| 1346 } | |
| 1347 | |
| 1348 void startTagPlaintext(StartTagToken token) { | |
| 1349 if (tree.elementInScope("p", variant: "button")) { | |
| 1350 endTagP(new EndTagToken("p", data: {})); | |
| 1351 } | |
| 1352 tree.insertElement(token); | |
| 1353 parser.tokenizer.state = parser.tokenizer.plaintextState; | |
| 1354 } | |
| 1355 | |
| 1356 void startTagHeading(StartTagToken token) { | |
| 1357 if (tree.elementInScope("p", variant: "button")) { | |
| 1358 endTagP(new EndTagToken("p", data: {})); | |
| 1359 } | |
| 1360 if (headingElements.indexOf(tree.openElements.last().tagName) >= 0) { | |
| 1361 parser.parseError("unexpected-start-tag", {"name": token.name}); | |
| 1362 tree.openElements.removeLast(); | |
| 1363 } | |
| 1364 tree.insertElement(token); | |
| 1365 } | |
| 1366 | |
| 1367 void startTagA(StartTagToken token) { | |
| 1368 var afeAElement = tree.elementInActiveFormattingElements("a"); | |
| 1369 if (afeAElement != null) { | |
| 1370 parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1371 {"startName": "a", "endName": "a"}); | |
| 1372 endTagFormatting(new EndTagToken("a", data: {})); | |
| 1373 removeFromList(tree.openElements, afeAElement); | |
| 1374 removeFromList(tree.activeFormattingElements, afeAElement); | |
| 1375 } | |
| 1376 tree.reconstructActiveFormattingElements(); | |
| 1377 addFormattingElement(token); | |
| 1378 } | |
| 1379 | |
| 1380 void startTagFormatting(StartTagToken token) { | |
| 1381 tree.reconstructActiveFormattingElements(); | |
| 1382 addFormattingElement(token); | |
| 1383 } | |
| 1384 | |
| 1385 void startTagNobr(StartTagToken token) { | |
| 1386 tree.reconstructActiveFormattingElements(); | |
| 1387 if (tree.elementInScope("nobr")) { | |
| 1388 parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1389 {"startName": "nobr", "endName": "nobr"}); | |
| 1390 processEndTag(new EndTagToken("nobr", data: {})); | |
| 1391 // XXX Need tests that trigger the following | |
| 1392 tree.reconstructActiveFormattingElements(); | |
| 1393 } | |
| 1394 addFormattingElement(token); | |
| 1395 } | |
| 1396 | |
| 1397 Token startTagButton(StartTagToken token) { | |
| 1398 if (tree.elementInScope("button")) { | |
| 1399 parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1400 {"startName": "button", "endName": "button"}); | |
| 1401 processEndTag(new EndTagToken("button", data: {})); | |
| 1402 return token; | |
| 1403 } else { | |
| 1404 tree.reconstructActiveFormattingElements(); | |
| 1405 tree.insertElement(token); | |
| 1406 parser.framesetOK = false; | |
| 1407 } | |
| 1408 } | |
| 1409 | |
| 1410 void startTagAppletMarqueeObject(StartTagToken token) { | |
| 1411 tree.reconstructActiveFormattingElements(); | |
| 1412 tree.insertElement(token); | |
| 1413 tree.activeFormattingElements.add(Marker); | |
| 1414 parser.framesetOK = false; | |
| 1415 } | |
| 1416 | |
| 1417 void startTagXmp(StartTagToken token) { | |
| 1418 if (tree.elementInScope("p", variant: "button")) { | |
| 1419 endTagP(new EndTagToken("p", data: {})); | |
| 1420 } | |
| 1421 tree.reconstructActiveFormattingElements(); | |
| 1422 parser.framesetOK = false; | |
| 1423 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
| 1424 } | |
| 1425 | |
| 1426 void startTagTable(StartTagToken token) { | |
| 1427 if (parser.compatMode != "quirks") { | |
| 1428 if (tree.elementInScope("p", variant: "button")) { | |
| 1429 processEndTag(new EndTagToken("p", data: {})); | |
| 1430 } | |
| 1431 } | |
| 1432 tree.insertElement(token); | |
| 1433 parser.framesetOK = false; | |
| 1434 parser.phase = parser._inTablePhase; | |
| 1435 } | |
| 1436 | |
| 1437 void startTagVoidFormatting(StartTagToken token) { | |
| 1438 tree.reconstructActiveFormattingElements(); | |
| 1439 tree.insertElement(token); | |
| 1440 tree.openElements.removeLast(); | |
| 1441 token.selfClosingAcknowledged = true; | |
| 1442 parser.framesetOK = false; | |
| 1443 } | |
| 1444 | |
| 1445 void startTagInput(StartTagToken token) { | |
| 1446 var savedFramesetOK = parser.framesetOK; | |
| 1447 startTagVoidFormatting(token); | |
| 1448 if (asciiUpper2Lower(token.data["type"]) == "hidden") { | |
| 1449 //input type=hidden doesn't change framesetOK | |
| 1450 parser.framesetOK = savedFramesetOK; | |
| 1451 } | |
| 1452 } | |
| 1453 | |
| 1454 void startTagParamSource(StartTagToken token) { | |
| 1455 tree.insertElement(token); | |
| 1456 tree.openElements.removeLast(); | |
| 1457 token.selfClosingAcknowledged = true; | |
| 1458 } | |
| 1459 | |
| 1460 void startTagHr(StartTagToken token) { | |
| 1461 if (tree.elementInScope("p", variant: "button")) { | |
| 1462 endTagP(new EndTagToken("p", data: {})); | |
| 1463 } | |
| 1464 tree.insertElement(token); | |
| 1465 tree.openElements.removeLast(); | |
| 1466 token.selfClosingAcknowledged = true; | |
| 1467 parser.framesetOK = false; | |
| 1468 } | |
| 1469 | |
| 1470 void startTagImage(StartTagToken token) { | |
| 1471 // No really... | |
| 1472 parser.parseError("unexpected-start-tag-treated-as", | |
| 1473 {"originalName": "image", "newName": "img"}); | |
| 1474 processStartTag(new StartTagToken("img", data: token.data, | |
| 1475 selfClosing: token.selfClosing)); | |
| 1476 } | |
| 1477 | |
| 1478 void startTagIsIndex(StartTagToken token) { | |
| 1479 parser.parseError("deprecated-tag", {"name": "isindex"}); | |
| 1480 if (tree.formPointer != null) { | |
| 1481 return; | |
| 1482 } | |
| 1483 var formAttrs = {}; | |
| 1484 var dataAction = token.data["action"]; | |
| 1485 if (dataAction != null) { | |
| 1486 formAttrs["action"] = dataAction; | |
| 1487 } | |
| 1488 processStartTag(new StartTagToken("form", data: formAttrs)); | |
| 1489 processStartTag(new StartTagToken("hr", data: {})); | |
| 1490 processStartTag(new StartTagToken("label", data: {})); | |
| 1491 // XXX Localization ... | |
| 1492 var prompt = token.data["prompt"]; | |
| 1493 if (prompt == null) { | |
| 1494 prompt = "This is a searchable index. Enter search keywords: "; | |
| 1495 } | |
| 1496 processCharacters(new CharactersToken(prompt)); | |
| 1497 var attributes = new Map.from(token.data); | |
| 1498 attributes.remove('action'); | |
| 1499 attributes.remove('prompt'); | |
| 1500 attributes["name"] = "isindex"; | |
| 1501 processStartTag(new StartTagToken("input", | |
| 1502 data: attributes, selfClosing: token.selfClosing)); | |
| 1503 processEndTag(new EndTagToken("label", data: {})); | |
| 1504 processStartTag(new StartTagToken("hr", data: {})); | |
| 1505 processEndTag(new EndTagToken("form", data: {})); | |
| 1506 } | |
| 1507 | |
| 1508 void startTagTextarea(StartTagToken token) { | |
| 1509 tree.insertElement(token); | |
| 1510 parser.tokenizer.state = parser.tokenizer.rcdataState; | |
| 1511 processSpaceCharactersFunc = processSpaceCharactersDropNewline; | |
| 1512 parser.framesetOK = false; | |
| 1513 } | |
| 1514 | |
| 1515 void startTagIFrame(StartTagToken token) { | |
| 1516 parser.framesetOK = false; | |
| 1517 startTagRawtext(token); | |
| 1518 } | |
| 1519 | |
| 1520 /** iframe, noembed noframes, noscript(if scripting enabled). */ | |
| 1521 void startTagRawtext(StartTagToken token) { | |
| 1522 parser.parseRCDataRawtext(token, "RAWTEXT"); | |
| 1523 } | |
| 1524 | |
| 1525 void startTagOpt(StartTagToken token) { | |
| 1526 if (tree.openElements.last().tagName == "option") { | |
| 1527 parser.phase.processEndTag(new EndTagToken("option", data: {})); | |
| 1528 } | |
| 1529 tree.reconstructActiveFormattingElements(); | |
| 1530 parser.tree.insertElement(token); | |
| 1531 } | |
| 1532 | |
| 1533 void startTagSelect(StartTagToken token) { | |
| 1534 tree.reconstructActiveFormattingElements(); | |
| 1535 tree.insertElement(token); | |
| 1536 parser.framesetOK = false; | |
| 1537 | |
| 1538 if (parser._inTablePhase == parser.phase || | |
| 1539 parser._inCaptionPhase == parser.phase || | |
| 1540 parser._inColumnGroupPhase == parser.phase || | |
| 1541 parser._inTableBodyPhase == parser.phase || | |
| 1542 parser._inRowPhase == parser.phase || | |
| 1543 parser._inCellPhase == parser.phase) { | |
| 1544 parser.phase = parser._inSelectInTablePhase; | |
| 1545 } else { | |
| 1546 parser.phase = parser._inSelectPhase; | |
| 1547 } | |
| 1548 } | |
| 1549 | |
| 1550 void startTagRpRt(StartTagToken token) { | |
| 1551 if (tree.elementInScope("ruby")) { | |
| 1552 tree.generateImpliedEndTags(); | |
| 1553 if (tree.openElements.last().tagName != "ruby") { | |
| 1554 parser.parseError(); | |
| 1555 } | |
| 1556 } | |
| 1557 tree.insertElement(token); | |
| 1558 } | |
| 1559 | |
| 1560 void startTagMath(StartTagToken token) { | |
| 1561 tree.reconstructActiveFormattingElements(); | |
| 1562 parser.adjustMathMLAttributes(token); | |
| 1563 parser.adjustForeignAttributes(token); | |
| 1564 token.namespace = Namespaces.mathml; | |
| 1565 tree.insertElement(token); | |
| 1566 //Need to get the parse error right for the case where the token | |
| 1567 //has a namespace not equal to the xmlns attribute | |
| 1568 if (token.selfClosing) { | |
| 1569 tree.openElements.removeLast(); | |
| 1570 token.selfClosingAcknowledged = true; | |
| 1571 } | |
| 1572 } | |
| 1573 | |
| 1574 void startTagSvg(StartTagToken token) { | |
| 1575 tree.reconstructActiveFormattingElements(); | |
| 1576 parser.adjustSVGAttributes(token); | |
| 1577 parser.adjustForeignAttributes(token); | |
| 1578 token.namespace = Namespaces.svg; | |
| 1579 tree.insertElement(token); | |
| 1580 //Need to get the parse error right for the case where the token | |
| 1581 //has a namespace not equal to the xmlns attribute | |
| 1582 if (token.selfClosing) { | |
| 1583 tree.openElements.removeLast(); | |
| 1584 token.selfClosingAcknowledged = true; | |
| 1585 } | |
| 1586 } | |
| 1587 | |
| 1588 /** | |
| 1589 * Elements that should be children of other elements that have a | |
| 1590 * different insertion mode; here they are ignored | |
| 1591 * "caption", "col", "colgroup", "frame", "frameset", "head", | |
| 1592 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | |
| 1593 * "tr", "noscript" | |
| 1594 */ | |
| 1595 void startTagMisplaced(StartTagToken token) { | |
| 1596 parser.parseError("unexpected-start-tag-ignored", | |
| 1597 {"name": token.name}); | |
| 1598 } | |
| 1599 | |
| 1600 Token startTagOther(StartTagToken token) { | |
| 1601 tree.reconstructActiveFormattingElements(); | |
| 1602 tree.insertElement(token); | |
| 1603 } | |
| 1604 | |
| 1605 void endTagP(EndTagToken token) { | |
| 1606 if (!tree.elementInScope("p", variant: "button")) { | |
| 1607 startTagCloseP(new StartTagToken("p", data: {})); | |
| 1608 parser.parseError("unexpected-end-tag", {"name": "p"}); | |
| 1609 endTagP(new EndTagToken("p", data: {})); | |
| 1610 } else { | |
| 1611 tree.generateImpliedEndTags("p"); | |
| 1612 if (tree.openElements.last().tagName != "p") { | |
| 1613 parser.parseError("unexpected-end-tag", {"name": "p"}); | |
| 1614 } | |
| 1615 popOpenElementsUntil("p"); | |
| 1616 } | |
| 1617 } | |
| 1618 | |
| 1619 void endTagBody(EndTagToken token) { | |
| 1620 if (!tree.elementInScope("body")) { | |
| 1621 parser.parseError(); | |
| 1622 return; | |
| 1623 } else if (tree.openElements.last().tagName != "body") { | |
| 1624 for (Node node in slice(tree.openElements, 2)) { | |
| 1625 switch (node.tagName) { | |
| 1626 case "dd": case "dt": case "li": case "optgroup": case "option": | |
| 1627 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot": | |
| 1628 case "th": case "thead": case "tr": case "body": case "html": | |
| 1629 continue; | |
| 1630 } | |
| 1631 // Not sure this is the correct name for the parse error | |
| 1632 parser.parseError("expected-one-end-tag-but-got-another", | |
| 1633 {"expectedName": "body", "gotName": node.tagName}); | |
| 1634 break; | |
| 1635 } | |
| 1636 } | |
| 1637 parser.phase = parser._afterBodyPhase; | |
| 1638 } | |
| 1639 | |
| 1640 Token endTagHtml(EndTagToken token) { | |
| 1641 //We repeat the test for the body end tag token being ignored here | |
| 1642 if (tree.elementInScope("body")) { | |
| 1643 endTagBody(new EndTagToken("body", data: {})); | |
| 1644 return token; | |
| 1645 } | |
| 1646 } | |
| 1647 | |
| 1648 void endTagBlock(EndTagToken token) { | |
| 1649 //Put us back in the right whitespace handling mode | |
| 1650 if (token.name == "pre") { | |
| 1651 processSpaceCharactersFunc = processSpaceCharactersNonPre; | |
| 1652 } | |
| 1653 var inScope = tree.elementInScope(token.name); | |
| 1654 if (inScope) { | |
| 1655 tree.generateImpliedEndTags(); | |
| 1656 } | |
| 1657 if (tree.openElements.last().tagName != token.name) { | |
| 1658 parser.parseError("end-tag-too-early", {"name": token.name}); | |
| 1659 } | |
| 1660 if (inScope) { | |
| 1661 popOpenElementsUntil(token.name); | |
| 1662 } | |
| 1663 } | |
| 1664 | |
| 1665 void endTagForm(EndTagToken token) { | |
| 1666 var node = tree.formPointer; | |
| 1667 tree.formPointer = null; | |
| 1668 if (node === null || !tree.elementInScope(node)) { | |
| 1669 parser.parseError("unexpected-end-tag", {"name": "form"}); | |
| 1670 } else { | |
| 1671 tree.generateImpliedEndTags(); | |
| 1672 if (tree.openElements.last() != node) { | |
| 1673 parser.parseError("end-tag-too-early-ignored", {"name": "form"}); | |
| 1674 } | |
| 1675 removeFromList(tree.openElements, node); | |
| 1676 } | |
| 1677 } | |
| 1678 | |
| 1679 void endTagListItem(EndTagToken token) { | |
| 1680 var variant; | |
| 1681 if (token.name == "li") { | |
| 1682 variant = "list"; | |
| 1683 } else { | |
| 1684 variant = null; | |
| 1685 } | |
| 1686 if (!tree.elementInScope(token.name, variant: variant)) { | |
| 1687 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 1688 } else { | |
| 1689 tree.generateImpliedEndTags(exclude: token.name); | |
| 1690 if (tree.openElements.last().tagName != token.name) { | |
| 1691 parser.parseError("end-tag-too-early", {"name": token.name}); | |
| 1692 } | |
| 1693 popOpenElementsUntil(token.name); | |
| 1694 } | |
| 1695 } | |
| 1696 | |
| 1697 void endTagHeading(EndTagToken token) { | |
| 1698 for (var item in headingElements) { | |
| 1699 if (tree.elementInScope(item)) { | |
| 1700 tree.generateImpliedEndTags(); | |
| 1701 break; | |
| 1702 } | |
| 1703 } | |
| 1704 if (tree.openElements.last().tagName != token.name) { | |
| 1705 parser.parseError("end-tag-too-early", {"name": token.name}); | |
| 1706 } | |
| 1707 | |
| 1708 for (var item in headingElements) { | |
| 1709 if (tree.elementInScope(item)) { | |
| 1710 item = tree.openElements.removeLast(); | |
| 1711 while (headingElements.indexOf(item.tagName) == -1) { | |
| 1712 item = tree.openElements.removeLast(); | |
| 1713 } | |
| 1714 break; | |
| 1715 } | |
| 1716 } | |
| 1717 } | |
| 1718 | |
| 1719 /** The much-feared adoption agency algorithm. */ | |
| 1720 endTagFormatting(EndTagToken token) { | |
| 1721 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#adoptionAgency | |
| 1722 // TODO(jmesserly): the comments here don't match the numbered steps in the | |
| 1723 // updated spec. This needs a pass over it to verify that it still matches. | |
| 1724 // In particular the html5lib Python code skiped "step 4", I'm not sure why. | |
| 1725 // XXX Better parseError messages appreciated. | |
| 1726 int outerLoopCounter = 0; | |
| 1727 while (outerLoopCounter < 8) { | |
| 1728 outerLoopCounter += 1; | |
| 1729 | |
| 1730 // Step 1 paragraph 1 | |
| 1731 var formattingElement = tree.elementInActiveFormattingElements( | |
| 1732 token.name); | |
| 1733 if (formattingElement == null || | |
| 1734 (tree.openElements.indexOf(formattingElement) >= 0 && | |
| 1735 !tree.elementInScope(formattingElement.tagName))) { | |
| 1736 parser.parseError("adoption-agency-1.1", {"name": token.name}); | |
| 1737 return; | |
| 1738 // Step 1 paragraph 2 | |
| 1739 } else if (tree.openElements.indexOf(formattingElement) == -1) { | |
| 1740 parser.parseError("adoption-agency-1.2", {"name": token.name}); | |
| 1741 removeFromList(tree.activeFormattingElements, formattingElement); | |
| 1742 return; | |
| 1743 } | |
| 1744 | |
| 1745 // Step 1 paragraph 3 | |
| 1746 if (formattingElement != tree.openElements.last()) { | |
| 1747 parser.parseError("adoption-agency-1.3", {"name": token.name}); | |
| 1748 } | |
| 1749 | |
| 1750 // Step 2 | |
| 1751 // Start of the adoption agency algorithm proper | |
| 1752 var afeIndex = tree.openElements.indexOf(formattingElement); | |
| 1753 Node furthestBlock = null; | |
| 1754 for (Node element in slice(tree.openElements, afeIndex)) { | |
| 1755 if (specialElements.indexOf(element.nameTuple) >= 0) { | |
| 1756 furthestBlock = element; | |
| 1757 break; | |
| 1758 } | |
| 1759 } | |
| 1760 // Step 3 | |
| 1761 if (furthestBlock === null) { | |
| 1762 var element = tree.openElements.removeLast(); | |
| 1763 while (element != formattingElement) { | |
| 1764 element = tree.openElements.removeLast(); | |
| 1765 } | |
| 1766 removeFromList(tree.activeFormattingElements, element); | |
| 1767 return; | |
| 1768 } | |
| 1769 | |
| 1770 var commonAncestor = tree.openElements[afeIndex - 1]; | |
| 1771 | |
| 1772 // Step 5 | |
| 1773 // The bookmark is supposed to help us identify where to reinsert | |
| 1774 // nodes in step 12. We have to ensure that we reinsert nodes after | |
| 1775 // the node before the active formatting element. Note the bookmark | |
| 1776 // can move in step 7.4 | |
| 1777 var bookmark = tree.activeFormattingElements.indexOf(formattingElement); | |
| 1778 | |
| 1779 // Step 6 | |
| 1780 Node lastNode = furthestBlock; | |
| 1781 var node = furthestBlock; | |
| 1782 int innerLoopCounter = 0; | |
| 1783 | |
| 1784 var index = tree.openElements.indexOf(node); | |
| 1785 while (innerLoopCounter < 3) { | |
| 1786 innerLoopCounter += 1; | |
| 1787 | |
| 1788 // Node is element before node in open elements | |
| 1789 index -= 1; | |
| 1790 node = tree.openElements[index]; | |
| 1791 if (tree.activeFormattingElements.indexOf(node) == -1) { | |
| 1792 removeFromList(tree.openElements, node); | |
| 1793 continue; | |
| 1794 } | |
| 1795 // Step 6.3 | |
| 1796 if (node == formattingElement) { | |
| 1797 break; | |
| 1798 } | |
| 1799 // Step 6.4 | |
| 1800 if (lastNode == furthestBlock) { | |
| 1801 bookmark = (tree.activeFormattingElements.indexOf(node) + 1); | |
| 1802 } | |
| 1803 // Step 6.5 | |
| 1804 //cite = node.parent | |
| 1805 var clone = node.clone(); | |
| 1806 // Replace node with clone | |
| 1807 tree.activeFormattingElements[ | |
| 1808 tree.activeFormattingElements.indexOf(node)] = clone; | |
| 1809 tree.openElements[tree.openElements.indexOf(node)] = clone; | |
| 1810 node = clone; | |
| 1811 | |
| 1812 // Step 6.6 | |
| 1813 // Remove lastNode from its parents, if any | |
| 1814 if (lastNode.parent != null) { | |
| 1815 lastNode.parent.$dom_removeChild(lastNode); | |
| 1816 } | |
| 1817 node.$dom_appendChild(lastNode); | |
| 1818 // Step 7.7 | |
| 1819 lastNode = node; | |
| 1820 // End of inner loop | |
| 1821 } | |
| 1822 | |
| 1823 // Step 7 | |
| 1824 // Foster parent lastNode if commonAncestor is a | |
| 1825 // table, tbody, tfoot, thead, or tr we need to foster parent the | |
| 1826 // lastNode | |
| 1827 if (lastNode.parent != null) { | |
| 1828 lastNode.parent.$dom_removeChild(lastNode); | |
| 1829 } | |
| 1830 | |
| 1831 if (const ["table", "tbody", "tfoot", "thead", "tr"].indexOf( | |
| 1832 commonAncestor.tagName) >= 0) { | |
| 1833 var nodePos = tree.getTableMisnestedNodePosition(); | |
| 1834 nodePos[0].insertBefore(lastNode, nodePos[1]); | |
| 1835 } else { | |
| 1836 commonAncestor.$dom_appendChild(lastNode); | |
| 1837 } | |
| 1838 | |
| 1839 // Step 8 | |
| 1840 var clone = formattingElement.clone(); | |
| 1841 | |
| 1842 // Step 9 | |
| 1843 furthestBlock.reparentChildren(clone); | |
| 1844 | |
| 1845 // Step 10 | |
| 1846 furthestBlock.$dom_appendChild(clone); | |
| 1847 | |
| 1848 // Step 11 | |
| 1849 removeFromList(tree.activeFormattingElements, formattingElement); | |
| 1850 tree.activeFormattingElements.insertRange( | |
| 1851 min(bookmark, tree.activeFormattingElements.length), 1, clone); | |
| 1852 | |
| 1853 // Step 12 | |
| 1854 removeFromList(tree.openElements, formattingElement); | |
| 1855 tree.openElements.insertRange( | |
| 1856 tree.openElements.indexOf(furthestBlock) + 1, 1, clone); | |
| 1857 } | |
| 1858 } | |
| 1859 | |
| 1860 void endTagAppletMarqueeObject(EndTagToken token) { | |
| 1861 if (tree.elementInScope(token.name)) { | |
| 1862 tree.generateImpliedEndTags(); | |
| 1863 } | |
| 1864 if (tree.openElements.last().tagName != token.name) { | |
| 1865 parser.parseError("end-tag-too-early", {"name": token.name}); | |
| 1866 } | |
| 1867 if (tree.elementInScope(token.name)) { | |
| 1868 popOpenElementsUntil(token.name); | |
| 1869 tree.clearActiveFormattingElements(); | |
| 1870 } | |
| 1871 } | |
| 1872 | |
| 1873 void endTagBr(EndTagToken token) { | |
| 1874 parser.parseError("unexpected-end-tag-treated-as", | |
| 1875 {"originalName": "br", "newName": "br element"}); | |
| 1876 tree.reconstructActiveFormattingElements(); | |
| 1877 tree.insertElement(new StartTagToken("br", data: {})); | |
| 1878 tree.openElements.removeLast(); | |
| 1879 } | |
| 1880 | |
| 1881 void endTagOther(EndTagToken token) { | |
| 1882 for (Node node in reversed(tree.openElements)) { | |
| 1883 if (node.tagName == token.name) { | |
| 1884 tree.generateImpliedEndTags(exclude: token.name); | |
| 1885 if (tree.openElements.last().tagName != token.name) { | |
| 1886 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 1887 } | |
| 1888 while (tree.openElements.removeLast() != node); | |
| 1889 break; | |
| 1890 } else { | |
| 1891 if (specialElements.indexOf(node.nameTuple) >= 0) { | |
| 1892 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 1893 break; | |
| 1894 } | |
| 1895 } | |
| 1896 } | |
| 1897 } | |
| 1898 } | |
| 1899 | |
| 1900 | |
| 1901 class TextPhase extends Phase { | |
| 1902 TextPhase(parser) : super(parser); | |
| 1903 | |
| 1904 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name | |
| 1905 processStartTag(StartTagToken token) { assert(false); } | |
| 1906 | |
| 1907 processEndTag(EndTagToken token) { | |
| 1908 if (token.name == 'script') return endTagScript(token); | |
| 1909 return endTagOther(token); | |
| 1910 } | |
| 1911 | |
| 1912 Token processCharacters(CharactersToken token) { | |
| 1913 tree.insertText(token.data); | |
| 1914 } | |
| 1915 | |
| 1916 bool processEOF() { | |
| 1917 parser.parseError("expected-named-closing-tag-but-got-eof", | |
| 1918 {'name': tree.openElements.last().tagName}); | |
| 1919 tree.openElements.removeLast(); | |
| 1920 parser.phase = parser.originalPhase; | |
| 1921 return true; | |
| 1922 } | |
| 1923 | |
| 1924 void endTagScript(EndTagToken token) { | |
| 1925 var node = tree.openElements.removeLast(); | |
| 1926 assert(node.tagName == "script"); | |
| 1927 parser.phase = parser.originalPhase; | |
| 1928 //The rest of this method is all stuff that only happens if | |
| 1929 //document.write works | |
| 1930 } | |
| 1931 | |
| 1932 void endTagOther(EndTagToken token) { | |
| 1933 var node = tree.openElements.removeLast(); | |
| 1934 parser.phase = parser.originalPhase; | |
| 1935 } | |
| 1936 } | |
| 1937 | |
| 1938 class InTablePhase extends Phase { | |
| 1939 // http://www.whatwg.org/specs/web-apps/current-work///in-table | |
| 1940 InTablePhase(parser) : super(parser); | |
| 1941 | |
| 1942 processStartTag(StartTagToken token) { | |
| 1943 switch (token.name) { | |
| 1944 case "html": return startTagHtml(token); | |
| 1945 case "caption": return startTagCaption(token); | |
| 1946 case "colgroup": return startTagColgroup(token); | |
| 1947 case "col": return startTagCol(token); | |
| 1948 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token); | |
| 1949 case "td": case "th": case "tr": return startTagImplyTbody(token); | |
| 1950 case "table": return startTagTable(token); | |
| 1951 case "style": case "script": return startTagStyleScript(token); | |
| 1952 case "input": return startTagInput(token); | |
| 1953 case "form": return startTagForm(token); | |
| 1954 default: return startTagOther(token); | |
| 1955 } | |
| 1956 } | |
| 1957 | |
| 1958 processEndTag(EndTagToken token) { | |
| 1959 switch (token.name) { | |
| 1960 case "table": return endTagTable(token); | |
| 1961 case "body": case "caption": case "col": case "colgroup": case "html": | |
| 1962 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": | |
| 1963 return endTagIgnore(token); | |
| 1964 default: return endTagOther(token); | |
| 1965 } | |
| 1966 } | |
| 1967 | |
| 1968 // helper methods | |
| 1969 void clearStackToTableContext() { | |
| 1970 // "clear the stack back to a table context" | |
| 1971 while (tree.openElements.last().tagName != "table" && | |
| 1972 tree.openElements.last().tagName != "html") { | |
| 1973 //parser.parseError("unexpected-implied-end-tag-in-table", | |
| 1974 // {"name": tree.openElements.last().name}) | |
| 1975 tree.openElements.removeLast(); | |
| 1976 } | |
| 1977 // When the current node is <html> it's an innerHTML case | |
| 1978 } | |
| 1979 | |
| 1980 // processing methods | |
| 1981 bool processEOF() { | |
| 1982 if (tree.openElements.last().tagName != "html") { | |
| 1983 parser.parseError("eof-in-table"); | |
| 1984 } else { | |
| 1985 assert(parser.innerHTMLMode); | |
| 1986 } | |
| 1987 //Stop parsing | |
| 1988 return false; | |
| 1989 } | |
| 1990 | |
| 1991 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 1992 var originalPhase = parser.phase; | |
| 1993 parser.phase = parser._inTableTextPhase; | |
| 1994 parser._inTableTextPhase.originalPhase = originalPhase; | |
| 1995 parser.phase.processSpaceCharacters(token); | |
| 1996 } | |
| 1997 | |
| 1998 Token processCharacters(CharactersToken token) { | |
| 1999 var originalPhase = parser.phase; | |
| 2000 parser.phase = parser._inTableTextPhase; | |
| 2001 parser._inTableTextPhase.originalPhase = originalPhase; | |
| 2002 parser.phase.processCharacters(token); | |
| 2003 } | |
| 2004 | |
| 2005 void insertText(CharactersToken token) { | |
| 2006 // If we get here there must be at least one non-whitespace character | |
| 2007 // Do the table magic! | |
| 2008 tree.insertFromTable = true; | |
| 2009 parser._inBodyPhase.processCharacters(token); | |
| 2010 tree.insertFromTable = false; | |
| 2011 } | |
| 2012 | |
| 2013 void startTagCaption(StartTagToken token) { | |
| 2014 clearStackToTableContext(); | |
| 2015 tree.activeFormattingElements.add(Marker); | |
| 2016 tree.insertElement(token); | |
| 2017 parser.phase = parser._inCaptionPhase; | |
| 2018 } | |
| 2019 | |
| 2020 void startTagColgroup(StartTagToken token) { | |
| 2021 clearStackToTableContext(); | |
| 2022 tree.insertElement(token); | |
| 2023 parser.phase = parser._inColumnGroupPhase; | |
| 2024 } | |
| 2025 | |
| 2026 Token startTagCol(StartTagToken token) { | |
| 2027 startTagColgroup(new StartTagToken("colgroup", data: {})); | |
| 2028 return token; | |
| 2029 } | |
| 2030 | |
| 2031 void startTagRowGroup(StartTagToken token) { | |
| 2032 clearStackToTableContext(); | |
| 2033 tree.insertElement(token); | |
| 2034 parser.phase = parser._inTableBodyPhase; | |
| 2035 } | |
| 2036 | |
| 2037 Token startTagImplyTbody(StartTagToken token) { | |
| 2038 startTagRowGroup(new StartTagToken("tbody", data: {})); | |
| 2039 return token; | |
| 2040 } | |
| 2041 | |
| 2042 Token startTagTable(StartTagToken token) { | |
| 2043 parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 2044 {"startName": "table", "endName": "table"}); | |
| 2045 parser.phase.processEndTag(new EndTagToken("table", data: {})); | |
| 2046 if (!parser.innerHTMLMode) { | |
| 2047 return token; | |
| 2048 } | |
| 2049 } | |
| 2050 | |
| 2051 Token startTagStyleScript(StartTagToken token) { | |
| 2052 return parser._inHeadPhase.processStartTag(token); | |
| 2053 } | |
| 2054 | |
| 2055 void startTagInput(StartTagToken token) { | |
| 2056 if (asciiUpper2Lower(token.data["type"]) == "hidden") { | |
| 2057 parser.parseError("unexpected-hidden-input-in-table"); | |
| 2058 tree.insertElement(token); | |
| 2059 // XXX associate with form | |
| 2060 tree.openElements.removeLast(); | |
| 2061 } else { | |
| 2062 startTagOther(token); | |
| 2063 } | |
| 2064 } | |
| 2065 | |
| 2066 void startTagForm(StartTagToken token) { | |
| 2067 parser.parseError("unexpected-form-in-table"); | |
| 2068 if (tree.formPointer === null) { | |
| 2069 tree.insertElement(token); | |
| 2070 tree.formPointer = tree.openElements.last(); | |
| 2071 tree.openElements.removeLast(); | |
| 2072 } | |
| 2073 } | |
| 2074 | |
| 2075 void startTagOther(StartTagToken token) { | |
| 2076 parser.parseError("unexpected-start-tag-implies-table-voodoo", | |
| 2077 {"name": token.name}); | |
| 2078 // Do the table magic! | |
| 2079 tree.insertFromTable = true; | |
| 2080 parser._inBodyPhase.processStartTag(token); | |
| 2081 tree.insertFromTable = false; | |
| 2082 } | |
| 2083 | |
| 2084 void endTagTable(EndTagToken token) { | |
| 2085 if (tree.elementInScope("table", variant: "table")) { | |
| 2086 tree.generateImpliedEndTags(); | |
| 2087 if (tree.openElements.last().tagName != "table") { | |
| 2088 parser.parseError("end-tag-too-early-named", {"gotName": "table", | |
| 2089 "expectedName": tree.openElements.last().tagName}); | |
| 2090 } | |
| 2091 while (tree.openElements.last().tagName != "table") { | |
| 2092 tree.openElements.removeLast(); | |
| 2093 } | |
| 2094 tree.openElements.removeLast(); | |
| 2095 parser.resetInsertionMode(); | |
| 2096 } else { | |
| 2097 // innerHTML case | |
| 2098 assert(parser.innerHTMLMode); | |
| 2099 parser.parseError(); | |
| 2100 } | |
| 2101 } | |
| 2102 | |
| 2103 void endTagIgnore(EndTagToken token) { | |
| 2104 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 2105 } | |
| 2106 | |
| 2107 void endTagOther(EndTagToken token) { | |
| 2108 parser.parseError("unexpected-end-tag-implies-table-voodoo", | |
| 2109 {"name": token.name}); | |
| 2110 // Do the table magic! | |
| 2111 tree.insertFromTable = true; | |
| 2112 parser._inBodyPhase.processEndTag(token); | |
| 2113 tree.insertFromTable = false; | |
| 2114 } | |
| 2115 } | |
| 2116 | |
| 2117 class InTableTextPhase extends Phase { | |
| 2118 Phase originalPhase; | |
| 2119 List<StringToken> characterTokens; | |
| 2120 | |
| 2121 InTableTextPhase(parser) | |
| 2122 : characterTokens = <StringToken>[], | |
| 2123 super(parser); | |
| 2124 | |
| 2125 void flushCharacters() { | |
| 2126 var data = joinStr(characterTokens.map((t) => t.data)); | |
| 2127 if (!allWhitespace(data)) { | |
| 2128 parser._inTablePhase.insertText(new CharactersToken(data)); | |
| 2129 } else if (data.length > 0) { | |
| 2130 tree.insertText(data); | |
| 2131 } | |
| 2132 characterTokens = <StringToken>[]; | |
| 2133 } | |
| 2134 | |
| 2135 Token processComment(CommentToken token) { | |
| 2136 flushCharacters(); | |
| 2137 parser.phase = originalPhase; | |
| 2138 return token; | |
| 2139 } | |
| 2140 | |
| 2141 bool processEOF() { | |
| 2142 flushCharacters(); | |
| 2143 parser.phase = originalPhase; | |
| 2144 return true; | |
| 2145 } | |
| 2146 | |
| 2147 Token processCharacters(CharactersToken token) { | |
| 2148 if (token.data == "\u0000") { | |
| 2149 return null; | |
| 2150 } | |
| 2151 characterTokens.add(token); | |
| 2152 } | |
| 2153 | |
| 2154 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 2155 //pretty sure we should never reach here | |
| 2156 characterTokens.add(token); | |
| 2157 // XXX assert(false); | |
| 2158 } | |
| 2159 | |
| 2160 Token processStartTag(StartTagToken token) { | |
| 2161 flushCharacters(); | |
| 2162 parser.phase = originalPhase; | |
| 2163 return token; | |
| 2164 } | |
| 2165 | |
| 2166 Token processEndTag(EndTagToken token) { | |
| 2167 flushCharacters(); | |
| 2168 parser.phase = originalPhase; | |
| 2169 return token; | |
| 2170 } | |
| 2171 } | |
| 2172 | |
| 2173 | |
| 2174 class InCaptionPhase extends Phase { | |
| 2175 // http://www.whatwg.org/specs/web-apps/current-work///in-caption | |
| 2176 InCaptionPhase(parser) : super(parser); | |
| 2177 | |
| 2178 processStartTag(StartTagToken token) { | |
| 2179 switch (token.name) { | |
| 2180 case "html": return startTagHtml(token); | |
| 2181 case "caption": case "col": case "colgroup": case "tbody": case "td": | |
| 2182 case "tfoot": case "th": case "thead": case "tr": | |
| 2183 return startTagTableElement(token); | |
| 2184 default: return startTagOther(token); | |
| 2185 } | |
| 2186 } | |
| 2187 | |
| 2188 processEndTag(EndTagToken token) { | |
| 2189 switch (token.name) { | |
| 2190 case "caption": return endTagCaption(token); | |
| 2191 case "table": return endTagTable(token); | |
| 2192 case "body": case "col": case "colgroup": case "html": case "tbody": | |
| 2193 case "td": case "tfoot": case "th": case "thead": case "tr": | |
| 2194 return endTagIgnore(token); | |
| 2195 default: return endTagOther(token); | |
| 2196 } | |
| 2197 } | |
| 2198 | |
| 2199 bool ignoreEndTagCaption() { | |
| 2200 return !tree.elementInScope("caption", variant: "table"); | |
| 2201 } | |
| 2202 | |
| 2203 bool processEOF() { | |
| 2204 parser._inBodyPhase.processEOF(); | |
| 2205 return false; | |
| 2206 } | |
| 2207 | |
| 2208 Token processCharacters(CharactersToken token) { | |
| 2209 return parser._inBodyPhase.processCharacters(token); | |
| 2210 } | |
| 2211 | |
| 2212 Token startTagTableElement(StartTagToken token) { | |
| 2213 parser.parseError(); | |
| 2214 //XXX Have to duplicate logic here to find out if the tag is ignored | |
| 2215 var ignoreEndTag = ignoreEndTagCaption(); | |
| 2216 parser.phase.processEndTag(new EndTagToken("caption", data: {})); | |
| 2217 if (!ignoreEndTag) { | |
| 2218 return token; | |
| 2219 } | |
| 2220 return null; | |
| 2221 } | |
| 2222 | |
| 2223 Token startTagOther(StartTagToken token) { | |
| 2224 return parser._inBodyPhase.processStartTag(token); | |
| 2225 } | |
| 2226 | |
| 2227 void endTagCaption(EndTagToken token) { | |
| 2228 if (!ignoreEndTagCaption()) { | |
| 2229 // AT this code is quite similar to endTagTable in "InTable" | |
| 2230 tree.generateImpliedEndTags(); | |
| 2231 if (tree.openElements.last().tagName != "caption") { | |
| 2232 parser.parseError("expected-one-end-tag-but-got-another", | |
| 2233 {"gotName": "caption", | |
| 2234 "expectedName": tree.openElements.last().tagName}); | |
| 2235 } | |
| 2236 while (tree.openElements.last().tagName != "caption") { | |
| 2237 tree.openElements.removeLast(); | |
| 2238 } | |
| 2239 tree.openElements.removeLast(); | |
| 2240 tree.clearActiveFormattingElements(); | |
| 2241 parser.phase = parser._inTablePhase; | |
| 2242 } else { | |
| 2243 // innerHTML case | |
| 2244 assert(parser.innerHTMLMode); | |
| 2245 parser.parseError(); | |
| 2246 } | |
| 2247 } | |
| 2248 | |
| 2249 Token endTagTable(EndTagToken token) { | |
| 2250 parser.parseError(); | |
| 2251 var ignoreEndTag = ignoreEndTagCaption(); | |
| 2252 parser.phase.processEndTag(new EndTagToken("caption", data: {})); | |
| 2253 if (!ignoreEndTag) { | |
| 2254 return token; | |
| 2255 } | |
| 2256 return null; | |
| 2257 } | |
| 2258 | |
| 2259 void endTagIgnore(EndTagToken token) { | |
| 2260 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 2261 } | |
| 2262 | |
| 2263 Token endTagOther(EndTagToken token) { | |
| 2264 return parser._inBodyPhase.processEndTag(token); | |
| 2265 } | |
| 2266 } | |
| 2267 | |
| 2268 | |
| 2269 class InColumnGroupPhase extends Phase { | |
| 2270 // http://www.whatwg.org/specs/web-apps/current-work///in-column | |
| 2271 InColumnGroupPhase(parser) : super(parser); | |
| 2272 | |
| 2273 processStartTag(StartTagToken token) { | |
| 2274 switch (token.name) { | |
| 2275 case "html": return startTagHtml(token); | |
| 2276 case "col": return startTagCol(token); | |
| 2277 default: return startTagOther(token); | |
| 2278 } | |
| 2279 } | |
| 2280 | |
| 2281 processEndTag(EndTagToken token) { | |
| 2282 switch (token.name) { | |
| 2283 case "colgroup": return endTagColgroup(token); | |
| 2284 case "col": return endTagCol(token); | |
| 2285 default: return endTagOther(token); | |
| 2286 } | |
| 2287 } | |
| 2288 | |
| 2289 bool ignoreEndTagColgroup() { | |
| 2290 return tree.openElements.last().tagName == "html"; | |
| 2291 } | |
| 2292 | |
| 2293 bool processEOF() { | |
| 2294 var ignoreEndTag = ignoreEndTagColgroup(); | |
| 2295 if (ignoreEndTag) { | |
| 2296 assert(parser.innerHTMLMode); | |
| 2297 return false; | |
| 2298 } else { | |
| 2299 endTagColgroup(new EndTagToken("colgroup", data: {})); | |
| 2300 return true; | |
| 2301 } | |
| 2302 } | |
| 2303 | |
| 2304 Token processCharacters(CharactersToken token) { | |
| 2305 var ignoreEndTag = ignoreEndTagColgroup(); | |
| 2306 endTagColgroup(new EndTagToken("colgroup", data: {})); | |
| 2307 return ignoreEndTag ? null : token; | |
| 2308 } | |
| 2309 | |
| 2310 void startTagCol(StartTagToken token) { | |
| 2311 tree.insertElement(token); | |
| 2312 tree.openElements.removeLast(); | |
| 2313 } | |
| 2314 | |
| 2315 Token startTagOther(StartTagToken token) { | |
| 2316 var ignoreEndTag = ignoreEndTagColgroup(); | |
| 2317 endTagColgroup(new EndTagToken("colgroup", data: {})); | |
| 2318 return ignoreEndTag ? null : token; | |
| 2319 } | |
| 2320 | |
| 2321 void endTagColgroup(EndTagToken token) { | |
| 2322 if (ignoreEndTagColgroup()) { | |
| 2323 // innerHTML case | |
| 2324 assert(parser.innerHTMLMode); | |
| 2325 parser.parseError(); | |
| 2326 } else { | |
| 2327 tree.openElements.removeLast(); | |
| 2328 parser.phase = parser._inTablePhase; | |
| 2329 } | |
| 2330 } | |
| 2331 | |
| 2332 void endTagCol(EndTagToken token) { | |
| 2333 parser.parseError("no-end-tag", {"name": "col"}); | |
| 2334 } | |
| 2335 | |
| 2336 Token endTagOther(EndTagToken token) { | |
| 2337 var ignoreEndTag = ignoreEndTagColgroup(); | |
| 2338 endTagColgroup(new EndTagToken("colgroup", data: {})); | |
| 2339 return ignoreEndTag ? null : token; | |
| 2340 } | |
| 2341 } | |
| 2342 | |
| 2343 | |
| 2344 class InTableBodyPhase extends Phase { | |
| 2345 // http://www.whatwg.org/specs/web-apps/current-work///in-table0 | |
| 2346 InTableBodyPhase(parser) : super(parser); | |
| 2347 | |
| 2348 processStartTag(StartTagToken token) { | |
| 2349 switch (token.name) { | |
| 2350 case "html": return startTagHtml(token); | |
| 2351 case "tr": return startTagTr(token); | |
| 2352 case "td": case "th": return startTagTableCell(token); | |
| 2353 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": | |
| 2354 case "thead": | |
| 2355 return startTagTableOther(token); | |
| 2356 default: return startTagOther(token); | |
| 2357 } | |
| 2358 } | |
| 2359 | |
| 2360 processEndTag(EndTagToken token) { | |
| 2361 switch (token.name) { | |
| 2362 case "tbody": case "tfoot": case "thead": | |
| 2363 return endTagTableRowGroup(token); | |
| 2364 case "table": return endTagTable(token); | |
| 2365 case "body": case "caption": case "col": case "colgroup": case "html": | |
| 2366 case "td": case "th": case "tr": | |
| 2367 return endTagIgnore(token); | |
| 2368 default: return endTagOther(token); | |
| 2369 } | |
| 2370 } | |
| 2371 | |
| 2372 // helper methods | |
| 2373 void clearStackToTableBodyContext() { | |
| 2374 while (const ["tbody", "tfoot","thead", "html"].indexOf( | |
| 2375 tree.openElements.last().tagName) == -1) { | |
| 2376 //XXX parser.parseError("unexpected-implied-end-tag-in-table", | |
| 2377 // {"name": tree.openElements.last().name}) | |
| 2378 tree.openElements.removeLast(); | |
| 2379 } | |
| 2380 if (tree.openElements.last().tagName == "html") { | |
| 2381 assert(parser.innerHTMLMode); | |
| 2382 } | |
| 2383 } | |
| 2384 | |
| 2385 // the rest | |
| 2386 bool processEOF() { | |
| 2387 parser._inTablePhase.processEOF(); | |
| 2388 return false; | |
| 2389 } | |
| 2390 | |
| 2391 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 2392 return parser._inTablePhase.processSpaceCharacters(token); | |
| 2393 } | |
| 2394 | |
| 2395 Token processCharacters(CharactersToken token) { | |
| 2396 return parser._inTablePhase.processCharacters(token); | |
| 2397 } | |
| 2398 | |
| 2399 void startTagTr(StartTagToken token) { | |
| 2400 clearStackToTableBodyContext(); | |
| 2401 tree.insertElement(token); | |
| 2402 parser.phase = parser._inRowPhase; | |
| 2403 } | |
| 2404 | |
| 2405 Token startTagTableCell(StartTagToken token) { | |
| 2406 parser.parseError("unexpected-cell-in-table-body", | |
| 2407 {"name": token.name}); | |
| 2408 startTagTr(new StartTagToken("tr", data: {})); | |
| 2409 return token; | |
| 2410 } | |
| 2411 | |
| 2412 Token startTagTableOther(token) => endTagTable(token); | |
| 2413 | |
| 2414 Token startTagOther(StartTagToken token) { | |
| 2415 return parser._inTablePhase.processStartTag(token); | |
| 2416 } | |
| 2417 | |
| 2418 void endTagTableRowGroup(EndTagToken token) { | |
| 2419 if (tree.elementInScope(token.name, variant: "table")) { | |
| 2420 clearStackToTableBodyContext(); | |
| 2421 tree.openElements.removeLast(); | |
| 2422 parser.phase = parser._inTablePhase; | |
| 2423 } else { | |
| 2424 parser.parseError("unexpected-end-tag-in-table-body", | |
| 2425 {"name": token.name}); | |
| 2426 } | |
| 2427 } | |
| 2428 | |
| 2429 Token endTagTable(TagToken token) { | |
| 2430 // XXX AT Any ideas on how to share this with endTagTable? | |
| 2431 if (tree.elementInScope("tbody", variant: "table") || | |
| 2432 tree.elementInScope("thead", variant: "table") || | |
| 2433 tree.elementInScope("tfoot", variant: "table")) { | |
| 2434 clearStackToTableBodyContext(); | |
| 2435 endTagTableRowGroup( | |
| 2436 new EndTagToken(tree.openElements.last().tagName, data: {})); | |
| 2437 return token; | |
| 2438 } else { | |
| 2439 // innerHTML case | |
| 2440 assert(parser.innerHTMLMode); | |
| 2441 parser.parseError(); | |
| 2442 } | |
| 2443 return null; | |
| 2444 } | |
| 2445 | |
| 2446 void endTagIgnore(EndTagToken token) { | |
| 2447 parser.parseError("unexpected-end-tag-in-table-body", | |
| 2448 {"name": token.name}); | |
| 2449 } | |
| 2450 | |
| 2451 Token endTagOther(EndTagToken token) { | |
| 2452 return parser._inTablePhase.processEndTag(token); | |
| 2453 } | |
| 2454 } | |
| 2455 | |
| 2456 | |
| 2457 class InRowPhase extends Phase { | |
| 2458 // http://www.whatwg.org/specs/web-apps/current-work///in-row | |
| 2459 InRowPhase(parser) : super(parser); | |
| 2460 | |
| 2461 processStartTag(StartTagToken token) { | |
| 2462 switch (token.name) { | |
| 2463 case "html": return startTagHtml(token); | |
| 2464 case "td": case "th": return startTagTableCell(token); | |
| 2465 case "caption": case "col": case "colgroup": case "tbody": case "tfoot": | |
| 2466 case "thead": case "tr": | |
| 2467 return startTagTableOther(token); | |
| 2468 default: return startTagOther(token); | |
| 2469 } | |
| 2470 } | |
| 2471 | |
| 2472 processEndTag(EndTagToken token) { | |
| 2473 switch (token.name) { | |
| 2474 case "tr": return endTagTr(token); | |
| 2475 case "table": return endTagTable(token); | |
| 2476 case "tbody": case "tfoot": case "thead": | |
| 2477 return endTagTableRowGroup(token); | |
| 2478 case "body": case "caption": case "col": case "colgroup": case "html": | |
| 2479 case "td": case "th": | |
| 2480 return endTagIgnore(token); | |
| 2481 default: return endTagOther(token); | |
| 2482 } | |
| 2483 } | |
| 2484 | |
| 2485 // helper methods (XXX unify this with other table helper methods) | |
| 2486 void clearStackToTableRowContext() { | |
| 2487 while (tree.openElements.last().tagName != "tr" && | |
| 2488 tree.openElements.last().tagName != "html") { | |
| 2489 parser.parseError("unexpected-implied-end-tag-in-table-row", | |
| 2490 {"name": tree.openElements.last().tagName}); | |
| 2491 tree.openElements.removeLast(); | |
| 2492 } | |
| 2493 } | |
| 2494 | |
| 2495 bool ignoreEndTagTr() { | |
| 2496 return !tree.elementInScope("tr", variant: "table"); | |
| 2497 } | |
| 2498 | |
| 2499 // the rest | |
| 2500 bool processEOF() { | |
| 2501 parser._inTablePhase.processEOF(); | |
| 2502 return false; | |
| 2503 } | |
| 2504 | |
| 2505 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 2506 return parser._inTablePhase.processSpaceCharacters(token); | |
| 2507 } | |
| 2508 | |
| 2509 Token processCharacters(CharactersToken token) { | |
| 2510 return parser._inTablePhase.processCharacters(token); | |
| 2511 } | |
| 2512 | |
| 2513 void startTagTableCell(StartTagToken token) { | |
| 2514 clearStackToTableRowContext(); | |
| 2515 tree.insertElement(token); | |
| 2516 parser.phase = parser._inCellPhase; | |
| 2517 tree.activeFormattingElements.add(Marker); | |
| 2518 } | |
| 2519 | |
| 2520 Token startTagTableOther(StartTagToken token) { | |
| 2521 bool ignoreEndTag = ignoreEndTagTr(); | |
| 2522 endTagTr(new EndTagToken("tr", data: {})); | |
| 2523 // XXX how are we sure it's always ignored in the innerHTML case? | |
| 2524 return ignoreEndTag ? null : token; | |
| 2525 } | |
| 2526 | |
| 2527 Token startTagOther(StartTagToken token) { | |
| 2528 return parser._inTablePhase.processStartTag(token); | |
| 2529 } | |
| 2530 | |
| 2531 void endTagTr(EndTagToken token) { | |
| 2532 if (!ignoreEndTagTr()) { | |
| 2533 clearStackToTableRowContext(); | |
| 2534 tree.openElements.removeLast(); | |
| 2535 parser.phase = parser._inTableBodyPhase; | |
| 2536 } else { | |
| 2537 // innerHTML case | |
| 2538 assert(parser.innerHTMLMode); | |
| 2539 parser.parseError(); | |
| 2540 } | |
| 2541 } | |
| 2542 | |
| 2543 Token endTagTable(EndTagToken token) { | |
| 2544 var ignoreEndTag = ignoreEndTagTr(); | |
| 2545 endTagTr(new EndTagToken("tr", data: {})); | |
| 2546 // Reprocess the current tag if the tr end tag was not ignored | |
| 2547 // XXX how are we sure it's always ignored in the innerHTML case? | |
| 2548 return ignoreEndTag ? null : token; | |
| 2549 } | |
| 2550 | |
| 2551 Token endTagTableRowGroup(EndTagToken token) { | |
| 2552 if (tree.elementInScope(token.name, variant: "table")) { | |
| 2553 endTagTr(new EndTagToken("tr", data: {})); | |
| 2554 return token; | |
| 2555 } else { | |
| 2556 parser.parseError(); | |
| 2557 return null; | |
| 2558 } | |
| 2559 } | |
| 2560 | |
| 2561 void endTagIgnore(EndTagToken token) { | |
| 2562 parser.parseError("unexpected-end-tag-in-table-row", | |
| 2563 {"name": token.name}); | |
| 2564 } | |
| 2565 | |
| 2566 Token endTagOther(EndTagToken token) { | |
| 2567 return parser._inTablePhase.processEndTag(token); | |
| 2568 } | |
| 2569 } | |
| 2570 | |
| 2571 class InCellPhase extends Phase { | |
| 2572 // http://www.whatwg.org/specs/web-apps/current-work///in-cell | |
| 2573 InCellPhase(parser) : super(parser); | |
| 2574 | |
| 2575 processStartTag(StartTagToken token) { | |
| 2576 switch (token.name) { | |
| 2577 case "html": return startTagHtml(token); | |
| 2578 case "caption": case "col": case "colgroup": case "tbody": case "td": | |
| 2579 case "tfoot": case "th": case "thead": case "tr": | |
| 2580 return startTagTableOther(token); | |
| 2581 default: return startTagOther(token); | |
| 2582 } | |
| 2583 } | |
| 2584 | |
| 2585 processEndTag(EndTagToken token) { | |
| 2586 switch (token.name) { | |
| 2587 case "td": case "th": | |
| 2588 return endTagTableCell(token); | |
| 2589 case "body": case "caption": case "col": case "colgroup": case "html": | |
| 2590 return endTagIgnore(token); | |
| 2591 case "table": case "tbody": case "tfoot": case "thead": case "tr": | |
| 2592 return endTagImply(token); | |
| 2593 default: return endTagOther(token); | |
| 2594 } | |
| 2595 } | |
| 2596 | |
| 2597 // helper | |
| 2598 void closeCell() { | |
| 2599 if (tree.elementInScope("td", variant: "table")) { | |
| 2600 endTagTableCell(new EndTagToken("td", data: {})); | |
| 2601 } else if (tree.elementInScope("th", variant: "table")) { | |
| 2602 endTagTableCell(new EndTagToken("th", data: {})); | |
| 2603 } | |
| 2604 } | |
| 2605 | |
| 2606 // the rest | |
| 2607 bool processEOF() { | |
| 2608 parser._inBodyPhase.processEOF(); | |
| 2609 return false; | |
| 2610 } | |
| 2611 | |
| 2612 Token processCharacters(CharactersToken token) { | |
| 2613 return parser._inBodyPhase.processCharacters(token); | |
| 2614 } | |
| 2615 | |
| 2616 Token startTagTableOther(StartTagToken token) { | |
| 2617 if (tree.elementInScope("td", variant: "table") || | |
| 2618 tree.elementInScope("th", variant: "table")) { | |
| 2619 closeCell(); | |
| 2620 return token; | |
| 2621 } else { | |
| 2622 // innerHTML case | |
| 2623 assert(parser.innerHTMLMode); | |
| 2624 parser.parseError(); | |
| 2625 } | |
| 2626 } | |
| 2627 | |
| 2628 Token startTagOther(StartTagToken token) { | |
| 2629 return parser._inBodyPhase.processStartTag(token); | |
| 2630 } | |
| 2631 | |
| 2632 void endTagTableCell(EndTagToken token) { | |
| 2633 if (tree.elementInScope(token.name, variant: "table")) { | |
| 2634 tree.generateImpliedEndTags(token.name); | |
| 2635 if (tree.openElements.last().tagName != token.name) { | |
| 2636 parser.parseError("unexpected-cell-end-tag", {"name": token.name}); | |
| 2637 popOpenElementsUntil(token.name); | |
| 2638 } else { | |
| 2639 tree.openElements.removeLast(); | |
| 2640 } | |
| 2641 tree.clearActiveFormattingElements(); | |
| 2642 parser.phase = parser._inRowPhase; | |
| 2643 } else { | |
| 2644 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 2645 } | |
| 2646 } | |
| 2647 | |
| 2648 void endTagIgnore(EndTagToken token) { | |
| 2649 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 2650 } | |
| 2651 | |
| 2652 Token endTagImply(EndTagToken token) { | |
| 2653 if (tree.elementInScope(token.name, variant: "table")) { | |
| 2654 closeCell(); | |
| 2655 return token; | |
| 2656 } else { | |
| 2657 // sometimes innerHTML case | |
| 2658 parser.parseError(); | |
| 2659 } | |
| 2660 } | |
| 2661 | |
| 2662 Token endTagOther(EndTagToken token) { | |
| 2663 return parser._inBodyPhase.processEndTag(token); | |
| 2664 } | |
| 2665 } | |
| 2666 | |
| 2667 class InSelectPhase extends Phase { | |
| 2668 InSelectPhase(parser) : super(parser); | |
| 2669 | |
| 2670 processStartTag(StartTagToken token) { | |
| 2671 switch (token.name) { | |
| 2672 case "html": return startTagHtml(token); | |
| 2673 case "option": return startTagOption(token); | |
| 2674 case "optgroup": return startTagOptgroup(token); | |
| 2675 case "select": return startTagSelect(token); | |
| 2676 case "input": case "keygen": case "textarea": | |
| 2677 return startTagInput(token); | |
| 2678 case "script": return startTagScript(token); | |
| 2679 default: return startTagOther(token); | |
| 2680 } | |
| 2681 } | |
| 2682 | |
| 2683 processEndTag(EndTagToken token) { | |
| 2684 switch (token.name) { | |
| 2685 case "option": return endTagOption(token); | |
| 2686 case "optgroup": return endTagOptgroup(token); | |
| 2687 case "select": return endTagSelect(token); | |
| 2688 default: return endTagOther(token); | |
| 2689 } | |
| 2690 } | |
| 2691 | |
| 2692 // http://www.whatwg.org/specs/web-apps/current-work///in-select | |
| 2693 bool processEOF() { | |
| 2694 if (tree.openElements.last().tagName != "html") { | |
| 2695 parser.parseError("eof-in-select"); | |
| 2696 } else { | |
| 2697 assert(parser.innerHTMLMode); | |
| 2698 } | |
| 2699 return false; | |
| 2700 } | |
| 2701 | |
| 2702 Token processCharacters(CharactersToken token) { | |
| 2703 if (token.data == "\u0000") { | |
| 2704 return null; | |
| 2705 } | |
| 2706 tree.insertText(token.data); | |
| 2707 } | |
| 2708 | |
| 2709 void startTagOption(StartTagToken token) { | |
| 2710 // We need to imply </option> if <option> is the current node. | |
| 2711 if (tree.openElements.last().tagName == "option") { | |
| 2712 tree.openElements.removeLast(); | |
| 2713 } | |
| 2714 tree.insertElement(token); | |
| 2715 } | |
| 2716 | |
| 2717 void startTagOptgroup(StartTagToken token) { | |
| 2718 if (tree.openElements.last().tagName == "option") { | |
| 2719 tree.openElements.removeLast(); | |
| 2720 } | |
| 2721 if (tree.openElements.last().tagName == "optgroup") { | |
| 2722 tree.openElements.removeLast(); | |
| 2723 } | |
| 2724 tree.insertElement(token); | |
| 2725 } | |
| 2726 | |
| 2727 void startTagSelect(StartTagToken token) { | |
| 2728 parser.parseError("unexpected-select-in-select"); | |
| 2729 endTagSelect(new EndTagToken("select", data: {})); | |
| 2730 } | |
| 2731 | |
| 2732 Token startTagInput(StartTagToken token) { | |
| 2733 parser.parseError("unexpected-input-in-select"); | |
| 2734 if (tree.elementInScope("select", variant: "select")) { | |
| 2735 endTagSelect(new EndTagToken("select", data: {})); | |
| 2736 return token; | |
| 2737 } else { | |
| 2738 assert(parser.innerHTMLMode); | |
| 2739 } | |
| 2740 } | |
| 2741 | |
| 2742 Token startTagScript(StartTagToken token) { | |
| 2743 return parser._inHeadPhase.processStartTag(token); | |
| 2744 } | |
| 2745 | |
| 2746 Token startTagOther(StartTagToken token) { | |
| 2747 parser.parseError("unexpected-start-tag-in-select", | |
| 2748 {"name": token.name}); | |
| 2749 } | |
| 2750 | |
| 2751 void endTagOption(EndTagToken token) { | |
| 2752 if (tree.openElements.last().tagName == "option") { | |
| 2753 tree.openElements.removeLast(); | |
| 2754 } else { | |
| 2755 parser.parseError("unexpected-end-tag-in-select", | |
| 2756 {"name": "option"}); | |
| 2757 } | |
| 2758 } | |
| 2759 | |
| 2760 void endTagOptgroup(EndTagToken token) { | |
| 2761 // </optgroup> implicitly closes <option> | |
| 2762 if (tree.openElements.last().tagName == "option" && | |
| 2763 tree.openElements[tree.openElements.length - 2].tagName == "optgroup") { | |
| 2764 tree.openElements.removeLast(); | |
| 2765 } | |
| 2766 // It also closes </optgroup> | |
| 2767 if (tree.openElements.last().tagName == "optgroup") { | |
| 2768 tree.openElements.removeLast(); | |
| 2769 // But nothing else | |
| 2770 } else { | |
| 2771 parser.parseError("unexpected-end-tag-in-select", | |
| 2772 {"name": "optgroup"}); | |
| 2773 } | |
| 2774 } | |
| 2775 | |
| 2776 void endTagSelect(EndTagToken token) { | |
| 2777 if (tree.elementInScope("select", variant: "select")) { | |
| 2778 popOpenElementsUntil("select"); | |
| 2779 parser.resetInsertionMode(); | |
| 2780 } else { | |
| 2781 // innerHTML case | |
| 2782 assert(parser.innerHTMLMode); | |
| 2783 parser.parseError(); | |
| 2784 } | |
| 2785 } | |
| 2786 | |
| 2787 void endTagOther(EndTagToken token) { | |
| 2788 parser.parseError("unexpected-end-tag-in-select", | |
| 2789 {"name": token.name}); | |
| 2790 } | |
| 2791 } | |
| 2792 | |
| 2793 | |
| 2794 class InSelectInTablePhase extends Phase { | |
| 2795 InSelectInTablePhase(parser) : super(parser); | |
| 2796 | |
| 2797 processStartTag(StartTagToken token) { | |
| 2798 switch (token.name) { | |
| 2799 case "caption": case "table": case "tbody": case "tfoot": case "thead": | |
| 2800 case "tr": case "td": case "th": | |
| 2801 return startTagTable(token); | |
| 2802 default: return startTagOther(token); | |
| 2803 } | |
| 2804 } | |
| 2805 | |
| 2806 processEndTag(EndTagToken token) { | |
| 2807 switch (token.name) { | |
| 2808 case "caption": case "table": case "tbody": case "tfoot": case "thead": | |
| 2809 case "tr": case "td": case "th": | |
| 2810 return endTagTable(token); | |
| 2811 default: return endTagOther(token); | |
| 2812 } | |
| 2813 } | |
| 2814 | |
| 2815 bool processEOF() { | |
| 2816 parser._inSelectPhase.processEOF(); | |
| 2817 return false; | |
| 2818 } | |
| 2819 | |
| 2820 Token processCharacters(CharactersToken token) { | |
| 2821 return parser._inSelectPhase.processCharacters(token); | |
| 2822 } | |
| 2823 | |
| 2824 Token startTagTable(StartTagToken token) { | |
| 2825 parser.parseError("unexpected-table-element-start-tag-in-select-in-table", | |
| 2826 {"name": token.name}); | |
| 2827 endTagOther(new EndTagToken("select", data: {})); | |
| 2828 return token; | |
| 2829 } | |
| 2830 | |
| 2831 Token startTagOther(StartTagToken token) { | |
| 2832 return parser._inSelectPhase.processStartTag(token); | |
| 2833 } | |
| 2834 | |
| 2835 Token endTagTable(EndTagToken token) { | |
| 2836 parser.parseError("unexpected-table-element-end-tag-in-select-in-table", | |
| 2837 {"name": token.name}); | |
| 2838 if (tree.elementInScope(token.name, variant: "table")) { | |
| 2839 endTagOther(new EndTagToken("select", data: {})); | |
| 2840 return token; | |
| 2841 } | |
| 2842 } | |
| 2843 | |
| 2844 Token endTagOther(EndTagToken token) { | |
| 2845 return parser._inSelectPhase.processEndTag(token); | |
| 2846 } | |
| 2847 } | |
| 2848 | |
| 2849 | |
| 2850 class InForeignContentPhase extends Phase { | |
| 2851 // TODO(jmesserly): this is sorted so we could binary search. | |
| 2852 const breakoutElements = const [ | |
| 2853 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl', | |
| 2854 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', | |
| 2855 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', | |
| 2856 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u', | |
| 2857 'ul', 'var' | |
| 2858 ]; | |
| 2859 | |
| 2860 InForeignContentPhase(parser) : super(parser); | |
| 2861 | |
| 2862 void adjustSVGTagNames(token) { | |
| 2863 final replacements = const { | |
| 2864 "altglyph":"altGlyph", | |
| 2865 "altglyphdef":"altGlyphDef", | |
| 2866 "altglyphitem":"altGlyphItem", | |
| 2867 "animatecolor":"animateColor", | |
| 2868 "animatemotion":"animateMotion", | |
| 2869 "animatetransform":"animateTransform", | |
| 2870 "clippath":"clipPath", | |
| 2871 "feblend":"feBlend", | |
| 2872 "fecolormatrix":"feColorMatrix", | |
| 2873 "fecomponenttransfer":"feComponentTransfer", | |
| 2874 "fecomposite":"feComposite", | |
| 2875 "feconvolvematrix":"feConvolveMatrix", | |
| 2876 "fediffuselighting":"feDiffuseLighting", | |
| 2877 "fedisplacementmap":"feDisplacementMap", | |
| 2878 "fedistantlight":"feDistantLight", | |
| 2879 "feflood":"feFlood", | |
| 2880 "fefunca":"feFuncA", | |
| 2881 "fefuncb":"feFuncB", | |
| 2882 "fefuncg":"feFuncG", | |
| 2883 "fefuncr":"feFuncR", | |
| 2884 "fegaussianblur":"feGaussianBlur", | |
| 2885 "feimage":"feImage", | |
| 2886 "femerge":"feMerge", | |
| 2887 "femergenode":"feMergeNode", | |
| 2888 "femorphology":"feMorphology", | |
| 2889 "feoffset":"feOffset", | |
| 2890 "fepointlight":"fePointLight", | |
| 2891 "fespecularlighting":"feSpecularLighting", | |
| 2892 "fespotlight":"feSpotLight", | |
| 2893 "fetile":"feTile", | |
| 2894 "feturbulence":"feTurbulence", | |
| 2895 "foreignobject":"foreignObject", | |
| 2896 "glyphref":"glyphRef", | |
| 2897 "lineargradient":"linearGradient", | |
| 2898 "radialgradient":"radialGradient", | |
| 2899 "textpath":"textPath" | |
| 2900 }; | |
| 2901 | |
| 2902 var replace = replacements[token.name]; | |
| 2903 if (replace != null) { | |
| 2904 token.name = replace; | |
| 2905 } | |
| 2906 } | |
| 2907 | |
| 2908 Token processCharacters(CharactersToken token) { | |
| 2909 if (token.data == "\u0000") { | |
| 2910 token.data = "\uFFFD"; | |
| 2911 } else if (parser.framesetOK && !allWhitespace(token.data)) { | |
| 2912 parser.framesetOK = false; | |
| 2913 } | |
| 2914 super.processCharacters(token); | |
| 2915 } | |
| 2916 | |
| 2917 Token processStartTag(StartTagToken token) { | |
| 2918 var currentNode = tree.openElements.last(); | |
| 2919 if (breakoutElements.indexOf(token.name) >= 0 || | |
| 2920 (token.name == "font" && | |
| 2921 (token.data.containsKey("color") || | |
| 2922 token.data.containsKey("face") || | |
| 2923 token.data.containsKey("size")))) { | |
| 2924 | |
| 2925 parser.parseError("unexpected-html-element-in-foreign-content", | |
| 2926 {'name': token.name}); | |
| 2927 while (tree.openElements.last().namespace != | |
| 2928 tree.defaultNamespace && | |
| 2929 !parser.isHTMLIntegrationPoint(tree.openElements.last()) && | |
| 2930 !parser.isMathMLTextIntegrationPoint(tree.openElements.last())) { | |
| 2931 tree.openElements.removeLast(); | |
| 2932 } | |
| 2933 return token; | |
| 2934 | |
| 2935 } else { | |
| 2936 if (currentNode.namespace == Namespaces.mathml) { | |
| 2937 parser.adjustMathMLAttributes(token); | |
| 2938 } else if (currentNode.namespace == Namespaces.svg) { | |
| 2939 adjustSVGTagNames(token); | |
| 2940 parser.adjustSVGAttributes(token); | |
| 2941 } | |
| 2942 parser.adjustForeignAttributes(token); | |
| 2943 token.namespace = currentNode.namespace; | |
| 2944 tree.insertElement(token); | |
| 2945 if (token.selfClosing) { | |
| 2946 tree.openElements.removeLast(); | |
| 2947 token.selfClosingAcknowledged = true; | |
| 2948 } | |
| 2949 } | |
| 2950 } | |
| 2951 | |
| 2952 Token processEndTag(EndTagToken token) { | |
| 2953 var nodeIndex = tree.openElements.length - 1; | |
| 2954 var node = tree.openElements.last(); | |
| 2955 if (node.tagName != token.name) { | |
| 2956 parser.parseError("unexpected-end-tag", {"name": token.name}); | |
| 2957 } | |
| 2958 | |
| 2959 var newToken = null; | |
| 2960 while (true) { | |
| 2961 if (asciiUpper2Lower(node.tagName) == token.name) { | |
| 2962 //XXX this isn't in the spec but it seems necessary | |
| 2963 if (parser.phase == parser._inTableTextPhase) { | |
| 2964 InTableTextPhase inTableText = parser.phase; | |
| 2965 inTableText.flushCharacters(); | |
| 2966 parser.phase = inTableText.originalPhase; | |
| 2967 } | |
| 2968 while (tree.openElements.removeLast() != node) { | |
| 2969 assert(tree.openElements.length > 0); | |
| 2970 } | |
| 2971 newToken = null; | |
| 2972 break; | |
| 2973 } | |
| 2974 nodeIndex -= 1; | |
| 2975 | |
| 2976 node = tree.openElements[nodeIndex]; | |
| 2977 if (node.namespace != tree.defaultNamespace) { | |
| 2978 continue; | |
| 2979 } else { | |
| 2980 newToken = parser.phase.processEndTag(token); | |
| 2981 break; | |
| 2982 } | |
| 2983 } | |
| 2984 return newToken; | |
| 2985 } | |
| 2986 } | |
| 2987 | |
| 2988 | |
| 2989 class AfterBodyPhase extends Phase { | |
| 2990 AfterBodyPhase(parser) : super(parser); | |
| 2991 | |
| 2992 processStartTag(StartTagToken token) { | |
| 2993 if (token.name == "html") return startTagHtml(token); | |
| 2994 return startTagOther(token); | |
| 2995 } | |
| 2996 | |
| 2997 processEndTag(EndTagToken token) { | |
| 2998 if (token.name == "html") return endTagHtml(token); | |
| 2999 return endTagOther(token); | |
| 3000 } | |
| 3001 | |
| 3002 //Stop parsing | |
| 3003 bool processEOF() => false; | |
| 3004 | |
| 3005 Token processComment(CommentToken token) { | |
| 3006 // This is needed because data is to be appended to the <html> element | |
| 3007 // here and not to whatever is currently open. | |
| 3008 tree.insertComment(token, tree.openElements[0]); | |
| 3009 } | |
| 3010 | |
| 3011 Token processCharacters(CharactersToken token) { | |
| 3012 parser.parseError("unexpected-char-after-body"); | |
| 3013 parser.phase = parser._inBodyPhase; | |
| 3014 return token; | |
| 3015 } | |
| 3016 | |
| 3017 Token startTagHtml(StartTagToken token) { | |
| 3018 return parser._inBodyPhase.processStartTag(token); | |
| 3019 } | |
| 3020 | |
| 3021 Token startTagOther(StartTagToken token) { | |
| 3022 parser.parseError("unexpected-start-tag-after-body", | |
| 3023 {"name": token.name}); | |
| 3024 parser.phase = parser._inBodyPhase; | |
| 3025 return token; | |
| 3026 } | |
| 3027 | |
| 3028 void endTagHtml(name) { | |
| 3029 if (parser.innerHTMLMode) { | |
| 3030 parser.parseError("unexpected-end-tag-after-body-innerhtml"); | |
| 3031 } else { | |
| 3032 parser.phase = parser._afterAfterBodyPhase; | |
| 3033 } | |
| 3034 } | |
| 3035 | |
| 3036 Token endTagOther(EndTagToken token) { | |
| 3037 parser.parseError("unexpected-end-tag-after-body", | |
| 3038 {"name": token.name}); | |
| 3039 parser.phase = parser._inBodyPhase; | |
| 3040 return token; | |
| 3041 } | |
| 3042 } | |
| 3043 | |
| 3044 class InFramesetPhase extends Phase { | |
| 3045 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset | |
| 3046 InFramesetPhase(parser) : super(parser); | |
| 3047 | |
| 3048 processStartTag(StartTagToken token) { | |
| 3049 switch (token.name) { | |
| 3050 case "html": return startTagHtml(token); | |
| 3051 case "frameset": return startTagFrameset(token); | |
| 3052 case "frame": return startTagFrame(token); | |
| 3053 case "noframes": return startTagNoframes(token); | |
| 3054 default: return startTagOther(token); | |
| 3055 } | |
| 3056 } | |
| 3057 | |
| 3058 processEndTag(EndTagToken token) { | |
| 3059 switch (token.name) { | |
| 3060 case "frameset": return endTagFrameset(token); | |
| 3061 default: return endTagOther(token); | |
| 3062 } | |
| 3063 } | |
| 3064 | |
| 3065 bool processEOF() { | |
| 3066 if (tree.openElements.last().tagName != "html") { | |
| 3067 parser.parseError("eof-in-frameset"); | |
| 3068 } else { | |
| 3069 assert(parser.innerHTMLMode); | |
| 3070 } | |
| 3071 return false; | |
| 3072 } | |
| 3073 | |
| 3074 Token processCharacters(CharactersToken token) { | |
| 3075 parser.parseError("unexpected-char-in-frameset"); | |
| 3076 } | |
| 3077 | |
| 3078 void startTagFrameset(StartTagToken token) { | |
| 3079 tree.insertElement(token); | |
| 3080 } | |
| 3081 | |
| 3082 void startTagFrame(StartTagToken token) { | |
| 3083 tree.insertElement(token); | |
| 3084 tree.openElements.removeLast(); | |
| 3085 } | |
| 3086 | |
| 3087 Token startTagNoframes(StartTagToken token) { | |
| 3088 return parser._inBodyPhase.processStartTag(token); | |
| 3089 } | |
| 3090 | |
| 3091 Token startTagOther(StartTagToken token) { | |
| 3092 parser.parseError("unexpected-start-tag-in-frameset", | |
| 3093 {"name": token.name}); | |
| 3094 } | |
| 3095 | |
| 3096 void endTagFrameset(EndTagToken token) { | |
| 3097 if (tree.openElements.last().tagName == "html") { | |
| 3098 // innerHTML case | |
| 3099 parser.parseError("unexpected-frameset-in-frameset-innerhtml"); | |
| 3100 } else { | |
| 3101 tree.openElements.removeLast(); | |
| 3102 } | |
| 3103 if (!parser.innerHTMLMode && tree.openElements.last().tagName != "frameset")
{ | |
| 3104 // If we're not in innerHTML mode and the the current node is not a | |
| 3105 // "frameset" element (anymore) then switch. | |
| 3106 parser.phase = parser._afterFramesetPhase; | |
| 3107 } | |
| 3108 } | |
| 3109 | |
| 3110 void endTagOther(EndTagToken token) { | |
| 3111 parser.parseError("unexpected-end-tag-in-frameset", | |
| 3112 {"name": token.name}); | |
| 3113 } | |
| 3114 } | |
| 3115 | |
| 3116 | |
| 3117 class AfterFramesetPhase extends Phase { | |
| 3118 // http://www.whatwg.org/specs/web-apps/current-work///after3 | |
| 3119 AfterFramesetPhase(parser) : super(parser); | |
| 3120 | |
| 3121 processStartTag(StartTagToken token) { | |
| 3122 switch (token.name) { | |
| 3123 case "html": return startTagHtml(token); | |
| 3124 case "noframes": return startTagNoframes(token); | |
| 3125 default: return startTagOther(token); | |
| 3126 } | |
| 3127 } | |
| 3128 | |
| 3129 processEndTag(EndTagToken token) { | |
| 3130 switch (token.name) { | |
| 3131 case "html": return endTagHtml(token); | |
| 3132 default: return endTagOther(token); | |
| 3133 } | |
| 3134 } | |
| 3135 | |
| 3136 // Stop parsing | |
| 3137 bool processEOF() => false; | |
| 3138 | |
| 3139 Token processCharacters(CharactersToken token) { | |
| 3140 parser.parseError("unexpected-char-after-frameset"); | |
| 3141 } | |
| 3142 | |
| 3143 Token startTagNoframes(StartTagToken token) { | |
| 3144 return parser._inHeadPhase.processStartTag(token); | |
| 3145 } | |
| 3146 | |
| 3147 void startTagOther(StartTagToken token) { | |
| 3148 parser.parseError("unexpected-start-tag-after-frameset", | |
| 3149 {"name": token.name}); | |
| 3150 } | |
| 3151 | |
| 3152 void endTagHtml(EndTagToken token) { | |
| 3153 parser.phase = parser._afterAfterFramesetPhase; | |
| 3154 } | |
| 3155 | |
| 3156 void endTagOther(EndTagToken token) { | |
| 3157 parser.parseError("unexpected-end-tag-after-frameset", | |
| 3158 {"name": token.name}); | |
| 3159 } | |
| 3160 } | |
| 3161 | |
| 3162 | |
| 3163 class AfterAfterBodyPhase extends Phase { | |
| 3164 AfterAfterBodyPhase(parser) : super(parser); | |
| 3165 | |
| 3166 processStartTag(StartTagToken token) { | |
| 3167 if (token.name == 'html') return startTagHtml(token); | |
| 3168 return startTagOther(token); | |
| 3169 } | |
| 3170 | |
| 3171 bool processEOF() => false; | |
| 3172 | |
| 3173 Token processComment(CommentToken token) { | |
| 3174 tree.insertComment(token, tree.document); | |
| 3175 } | |
| 3176 | |
| 3177 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 3178 return parser._inBodyPhase.processSpaceCharacters(token); | |
| 3179 } | |
| 3180 | |
| 3181 Token processCharacters(CharactersToken token) { | |
| 3182 parser.parseError("expected-eof-but-got-char"); | |
| 3183 parser.phase = parser._inBodyPhase; | |
| 3184 return token; | |
| 3185 } | |
| 3186 | |
| 3187 Token startTagHtml(StartTagToken token) { | |
| 3188 return parser._inBodyPhase.processStartTag(token); | |
| 3189 } | |
| 3190 | |
| 3191 Token startTagOther(StartTagToken token) { | |
| 3192 parser.parseError("expected-eof-but-got-start-tag", {"name": token.name}); | |
| 3193 parser.phase = parser._inBodyPhase; | |
| 3194 return token; | |
| 3195 } | |
| 3196 | |
| 3197 Token processEndTag(EndTagToken token) { | |
| 3198 parser.parseError("expected-eof-but-got-end-tag", {"name": token.name}); | |
| 3199 parser.phase = parser._inBodyPhase; | |
| 3200 return token; | |
| 3201 } | |
| 3202 } | |
| 3203 | |
| 3204 class AfterAfterFramesetPhase extends Phase { | |
| 3205 AfterAfterFramesetPhase(parser) : super(parser); | |
| 3206 | |
| 3207 processStartTag(StartTagToken token) { | |
| 3208 switch (token.name) { | |
| 3209 case "html": return startTagHtml(token); | |
| 3210 case "noframes": return startTagNoFrames(token); | |
| 3211 default: return startTagOther(token); | |
| 3212 } | |
| 3213 } | |
| 3214 | |
| 3215 bool processEOF() => false; | |
| 3216 | |
| 3217 Token processComment(CommentToken token) { | |
| 3218 tree.insertComment(token, tree.document); | |
| 3219 } | |
| 3220 | |
| 3221 Token processSpaceCharacters(SpaceCharactersToken token) { | |
| 3222 return parser._inBodyPhase.processSpaceCharacters(token); | |
| 3223 } | |
| 3224 | |
| 3225 Token processCharacters(CharactersToken token) { | |
| 3226 parser.parseError("expected-eof-but-got-char"); | |
| 3227 } | |
| 3228 | |
| 3229 Token startTagHtml(StartTagToken token) { | |
| 3230 return parser._inBodyPhase.processStartTag(token); | |
| 3231 } | |
| 3232 | |
| 3233 Token startTagNoFrames(StartTagToken token) { | |
| 3234 return parser._inHeadPhase.processStartTag(token); | |
| 3235 } | |
| 3236 | |
| 3237 void startTagOther(StartTagToken token) { | |
| 3238 parser.parseError("expected-eof-but-got-start-tag", | |
| 3239 {"name": token.name}); | |
| 3240 } | |
| 3241 | |
| 3242 Token processEndTag(EndTagToken token) { | |
| 3243 parser.parseError("expected-eof-but-got-end-tag", | |
| 3244 {"name": token.name}); | |
| 3245 } | |
| 3246 } | |
| 3247 | |
| 3248 | |
| 3249 /** Error in parsed document. */ | |
| 3250 class ParseError implements Exception { | |
| 3251 final String errorCode; | |
| 3252 final Span span; | |
| 3253 final Map data; | |
| 3254 | |
| 3255 ParseError(this.errorCode, this.span, this.data); | |
| 3256 | |
| 3257 int get line() => span.line; | |
| 3258 | |
| 3259 int get column() => span.column; | |
| 3260 | |
| 3261 String get message => formatStr(errorMessages[errorCode], data); | |
| 3262 | |
| 3263 String toString() => "ParseError at line $line column $column: $message"; | |
| 3264 } | |
| OLD | NEW |