html5parser.dart - Issue 10916294: switch html5lib to new pkg layout

Side by Side Diff: html5parser.dart

Issue 10916294: switch html5lib to new pkg layout (Closed) Base URL: https://github.com/dart-lang/html5lib.git@master

Patch Set: Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 #library('html5parser');

2

3 #import('dart:math');

4 #import('package:logging/logging.dart');

5 #import('treebuilders/base.dart'); // for Marker

6 #import('treebuilders/simpletree.dart');

7 #import('lib/constants.dart');

8 #import('lib/encoding_parser.dart');

9 #import('lib/token.dart');

10 #import('lib/utils.dart');

11 #import('tokenizer.dart');

12

13 // TODO(jmesserly): these APIs, as well as the HTMLParser contructor and

14 // HTMLParser.parse and parseFragment were changed a bit to avoid passing a

15 // first class type that is used for construction. It might be okay, but I'd

16 // like to find a good dependency-injection pattern for Dart rather than

17 // copy the Python API.

18 // TODO(jmesserly): Also some of the HTMLParser APIs are messed up to avoid

19 // editor shadowing warnings :\. Look for trailing underscores.

20 /**

21 * Parse an html5 [doc]ument that is a [String], [RandomAccessFile] or

22 * [List<int>] of bytes into a tree.

23 *

24 * The optional [encoding] must be a string that indicates the encoding. If

25 * specified, that encoding will be used, regardless of any BOM or later

26 * declaration (such as in a meta element).

27 */

28 Document parse(doc, [TreeBuilder treebuilder, String encoding]) {

29 var tokenizer = new HTMLTokenizer(doc, encoding: encoding);

30 return new HTMLParser(treebuilder).parse(tokenizer);

31 }

32

33 /**

34 * Parse an html5 [doc]ument fragment that is a [String], [RandomAccessFile] or

35 * [List<int>] of bytes into a tree. Pass a [container] to change the type of

36 * the containing element.

37 *

38 * The optional [encoding] must be a string that indicates the encoding. If

39 * specified, that encoding will be used, regardless of any BOM or later

40 * declaration (such as in a meta element).

41 */

42 DocumentFragment parseFragment(doc, [String container = "div",

43 TreeBuilder treebuilder, String encoding]) {

44 var tokenizer = new HTMLTokenizer(doc, encoding: encoding);

45 var parser = new HTMLParser(treebuilder);

46 return parser.parseFragment(tokenizer, container_: container);

47 }

48

49

50 /**

51 * HTML parser. Generates a tree structure from a stream of (possibly malformed)

52 * HTML.

53 */

54 class HTMLParser {

55 /** Raise an exception on the first error encountered. */

56 bool strict;

57

58 final TreeBuilder tree;

59

60 List<ParseError> errors;

61

62 // TODO(jmesserly): would be faster not to use Map lookup.

63 Map<String, Phase> phases;

64

65 bool innerHTMLMode;

66

67 String container;

68

69 bool firstStartTag = false;

70

71 // TODO(jmesserly): use enum?

72 /** "quirks" / "limited quirks" / "no quirks" */

73 String compatMode = "no quirks";

74

75 /** innerHTML container when parsing document fragment. */

76 String innerHTML;

77

78 Phase phase;

79

80 Phase lastPhase;

81

82 Phase originalPhase;

83

84 Phase beforeRCDataPhase;

85

86 bool framesetOK;

87

88 HTMLTokenizer tokenizer;

89

90 // These fields hold the different phase singletons. At any given time one

91 // of them will be active.

92 InitialPhase _initialPhase;

93 BeforeHtmlPhase _beforeHtmlPhase;

94 BeforeHeadPhase _beforeHeadPhase;

95 InHeadPhase _inHeadPhase;

96 AfterHeadPhase _afterHeadPhase;

97 InBodyPhase _inBodyPhase;

98 TextPhase _textPhase;

99 InTablePhase _inTablePhase;

100 InTableTextPhase _inTableTextPhase;

101 InCaptionPhase _inCaptionPhase;

102 InColumnGroupPhase _inColumnGroupPhase;

103 InTableBodyPhase _inTableBodyPhase;

104 InRowPhase _inRowPhase;

105 InCellPhase _inCellPhase;

106 InSelectPhase _inSelectPhase;

107 InSelectInTablePhase _inSelectInTablePhase;

108 InForeignContentPhase _inForeignContentPhase;

109 AfterBodyPhase _afterBodyPhase;

110 InFramesetPhase _inFramesetPhase;

111 AfterFramesetPhase _afterFramesetPhase;

112 AfterAfterBodyPhase _afterAfterBodyPhase;

113 AfterAfterFramesetPhase _afterAfterFramesetPhase;

114

115 /**

116 * Create a new HTMLParser and configure the [tree] builder and [strict] mode.

117 */

118 HTMLParser([TreeBuilder tree, this.strict = false])

119 : tree = tree != null ? tree : new TreeBuilder(true),

120 errors = <ParseError>[] {

121

122 _initialPhase = new InitialPhase(this);

123 _beforeHtmlPhase = new BeforeHtmlPhase(this);

124 _beforeHeadPhase = new BeforeHeadPhase(this);

125 _inHeadPhase = new InHeadPhase(this);

126 // XXX "inHeadNoscript": new InHeadNoScriptPhase(this);

127 _afterHeadPhase = new AfterHeadPhase(this);

128 _inBodyPhase = new InBodyPhase(this);

129 _textPhase = new TextPhase(this);

130 _inTablePhase = new InTablePhase(this);

131 _inTableTextPhase = new InTableTextPhase(this);

132 _inCaptionPhase = new InCaptionPhase(this);

133 _inColumnGroupPhase = new InColumnGroupPhase(this);

134 _inTableBodyPhase = new InTableBodyPhase(this);

135 _inRowPhase = new InRowPhase(this);

136 _inCellPhase = new InCellPhase(this);

137 _inSelectPhase = new InSelectPhase(this);

138 _inSelectInTablePhase = new InSelectInTablePhase(this);

139 _inForeignContentPhase = new InForeignContentPhase(this);

140 _afterBodyPhase = new AfterBodyPhase(this);

141 _inFramesetPhase = new InFramesetPhase(this);

142 _afterFramesetPhase = new AfterFramesetPhase(this);

143 _afterAfterBodyPhase = new AfterAfterBodyPhase(this);

144 _afterAfterFramesetPhase = new AfterAfterFramesetPhase(this);

145 // XXX after after frameset

146 }

147

148 /**

149 * Parse a HTML document into a well-formed tree

150 *

151 * [tokenizer_] - an object that provides a stream of tokens to the

152 * treebuilder. This may be replaced for e.g. a sanitizer which converts some

153 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the

154 * appropriate options.

155 */

156 Document parse(HTMLTokenizer tokenizer_) {

157 _parse(tokenizer_, innerHTML_: false);

158 return tree.getDocument();

159 }

160

161 /**

162 * Parse a HTML fragment into a well-formed tree fragment.

163 *

164 * [container_] - name of the element we're setting the innerHTML property

165 * if set to null, default to 'div'.

166 *

167 * [tokenizer_] - an object that provides a stream of tokens to the

168 * treebuilder. This may be replaced for e.g. a sanitizer which converts some

169 * tags to text. Otherwise, construct an instance of HTMLTokenizer with the

170 * appropriate options.

171 */

172 DocumentFragment parseFragment(HTMLTokenizer tokenizer_,

173 [String container_ = "div"]) {

174 _parse(tokenizer_, innerHTML_: true, container_: container_);

175 return tree.getFragment();

176 }

177

178 void _parse(HTMLTokenizer tokenizer_, [bool innerHTML_ = false,

179 String container_ = "div"]) {

180

181 innerHTMLMode = innerHTML_;

182 container = container_;

183 tokenizer = tokenizer_;

184 // TODO(jmesserly): this feels a little strange, but it's needed for CDATA.

185 // Maybe we should change the API to having the parser create the tokenizer.

186 tokenizer.parser = this;

187

188 reset();

189

190 while (true) {

191 try {

192 mainLoop();

193 break;

194 } on ReparseException catch (e) {

195 reset();

196 }

197 }

198 }

199

200 void reset() {

201 tree.reset();

202 firstStartTag = false;

203 errors = <ParseError>[];

204 // "quirks" / "limited quirks" / "no quirks"

205 compatMode = "no quirks";

206

207 if (innerHTMLMode) {

208 innerHTML = container.toLowerCase();

209

210 if (cdataElements.indexOf(innerHTML) >= 0) {

211 tokenizer.state = tokenizer.rcdataState;

212 } else if (rcdataElements.indexOf(innerHTML) >= 0) {

213 tokenizer.state = tokenizer.rawtextState;

214 } else if (innerHTML == 'plaintext') {

215 tokenizer.state = tokenizer.plaintextState;

216 } else {

217 // state already is data state

218 // tokenizer.state = tokenizer.dataState;

219 }

220 phase = _beforeHtmlPhase;

221 _beforeHtmlPhase.insertHtmlElement();

222 resetInsertionMode();

223 } else {

224 innerHTML = null;

225 phase = _initialPhase;

226 }

227

228 lastPhase = null;

229 beforeRCDataPhase = null;

230 framesetOK = true;

231 }

232

233 bool isHTMLIntegrationPoint(Node element) {

234 if (element.tagName == "annotation-xml" &&

235 element.namespace == Namespaces.mathml) {

236 var enc = element.attributes["encoding"];

237 if (enc != null) enc = asciiUpper2Lower(enc);

238 return enc == "text/html" \|\| enc == "application/xhtml+xml";

239 } else {

240 return htmlIntegrationPointElements.indexOf(

241 new Pair(element.namespace, element.tagName)) >= 0;

242 }

243 }

244

245 bool isMathMLTextIntegrationPoint(Node element) {

246 return mathmlTextIntegrationPointElements.indexOf(

247 new Pair(element.namespace, element.tagName)) >= 0;

248 }

249

250 bool inForeignContent(Token token, int type) {

251 if (tree.openElements.length == 0) return false;

252

253 var node = tree.openElements.last();

254 if (node.namespace == tree.defaultNamespace) return false;

255

256 if (isMathMLTextIntegrationPoint(node)) {

257 if (type == TokenKind.startTag &&

258 (token as StartTagToken).name != "mglyph" &&

259 (token as StartTagToken).name != "malignmark") {

260 return false;

261 }

262 if (type == TokenKind.characters \|\| type == TokenKind.spaceCharacters) {

263 return false;

264 }

265 }

266

267 if (node.tagName == "annotation-xml" && type == TokenKind.startTag &&

268 (token as StartTagToken).name == "svg") {

269 return false;

270 }

271

272 if (isHTMLIntegrationPoint(node)) {

273 if (type == TokenKind.startTag \|\|

274 type == TokenKind.characters \|\|

275 type == TokenKind.spaceCharacters) {

276 return false;

277 }

278 }

279

280 return true;

281 }

282

283 void mainLoop() {

284 while (tokenizer.hasNext()) {

285 var token = normalizeToken(tokenizer.next());

286 var newToken = token;

287 int type;

288 while (newToken !== null) {

289 type = newToken.kind;

290

291 // Note: avoid "is" test here, see http://dartbug.com/4795

292 if (type == TokenKind.parseError) {

293 ParseErrorToken error = newToken;

294 parseError(error.data, error.messageParams);

295 newToken = null;

296 } else {

297 Phase phase_ = phase;

298 if (inForeignContent(token, type)) {

299 phase_ = _inForeignContentPhase;

300 }

301

302 switch (type) {

303 case TokenKind.characters:

304 newToken = phase_.processCharacters(newToken);

305 break;

306 case TokenKind.spaceCharacters:

307 newToken = phase_.processSpaceCharacters(newToken);

308 break;

309 case TokenKind.startTag:

310 newToken = phase_.processStartTag(newToken);

311 break;

312 case TokenKind.endTag:

313 newToken = phase_.processEndTag(newToken);

314 break;

315 case TokenKind.comment:

316 newToken = phase_.processComment(newToken);

317 break;

318 case TokenKind.doctype:

319 newToken = phase_.processDoctype(newToken);

320 break;

321 }

322 }

323 }

324

325 if (token is StartTagToken) {

326 if (token.selfClosing && !token.selfClosingAcknowledged) {

327 parseError("non-void-element-with-trailing-solidus",

328 {"name": token.name});

329 }

330 }

331 }

332

333 // When the loop finishes it's EOF

334 var reprocess = true;

335 var reprocessPhases = [];

336 while (reprocess) {

337 reprocessPhases.add(phase);

338 reprocess = phase.processEOF();

339 if (reprocess) {

340 assert(reprocessPhases.indexOf(phase) == -1);

341 }

342 }

343 }

344

345 void parseError([String errorcode = "XXX-undefined-error",

346 Map datavars = const {}]) {

347 // XXX The idea is to make errorcode mandatory.

348 var position = tokenizer.stream.position();

349 var err = new ParseError(errorcode, position, datavars);

350 errors.add(err);

351 if (strict) throw err;

352 }

353

354 /** HTML5 specific normalizations to the token stream. */

355 Token normalizeToken(Token token) {

356 if (token is StartTagToken) {

357 token.data = makeDict(token.data);

358 }

359 return token;

360 }

361

362 void adjustMathMLAttributes(StartTagToken token) {

363 var orig = token.data.remove("definitionurl");

364 if (orig != null) {

365 token.data["definitionURL"] = orig;

366 }

367 }

368

369 void adjustSVGAttributes(Token token) {

370 final replacements = const {

371 "attributename":"attributeName",

372 "attributetype":"attributeType",

373 "basefrequency":"baseFrequency",

374 "baseprofile":"baseProfile",

375 "calcmode":"calcMode",

376 "clippathunits":"clipPathUnits",

377 "contentscripttype":"contentScriptType",

378 "contentstyletype":"contentStyleType",

379 "diffuseconstant":"diffuseConstant",

380 "edgemode":"edgeMode",

381 "externalresourcesrequired":"externalResourcesRequired",

382 "filterres":"filterRes",

383 "filterunits":"filterUnits",

384 "glyphref":"glyphRef",

385 "gradienttransform":"gradientTransform",

386 "gradientunits":"gradientUnits",

387 "kernelmatrix":"kernelMatrix",

388 "kernelunitlength":"kernelUnitLength",

389 "keypoints":"keyPoints",

390 "keysplines":"keySplines",

391 "keytimes":"keyTimes",

392 "lengthadjust":"lengthAdjust",

393 "limitingconeangle":"limitingConeAngle",

394 "markerheight":"markerHeight",

395 "markerunits":"markerUnits",

396 "markerwidth":"markerWidth",

397 "maskcontentunits":"maskContentUnits",

398 "maskunits":"maskUnits",

399 "numoctaves":"numOctaves",

400 "pathlength":"pathLength",

401 "patterncontentunits":"patternContentUnits",

402 "patterntransform":"patternTransform",

403 "patternunits":"patternUnits",

404 "pointsatx":"pointsAtX",

405 "pointsaty":"pointsAtY",

406 "pointsatz":"pointsAtZ",

407 "preservealpha":"preserveAlpha",

408 "preserveaspectratio":"preserveAspectRatio",

409 "primitiveunits":"primitiveUnits",

410 "refx":"refX",

411 "refy":"refY",

412 "repeatcount":"repeatCount",

413 "repeatdur":"repeatDur",

414 "requiredextensions":"requiredExtensions",

415 "requiredfeatures":"requiredFeatures",

416 "specularconstant":"specularConstant",

417 "specularexponent":"specularExponent",

418 "spreadmethod":"spreadMethod",

419 "startoffset":"startOffset",

420 "stddeviation":"stdDeviation",

421 "stitchtiles":"stitchTiles",

422 "surfacescale":"surfaceScale",

423 "systemlanguage":"systemLanguage",

424 "tablevalues":"tableValues",

425 "targetx":"targetX",

426 "targety":"targetY",

427 "textlength":"textLength",

428 "viewbox":"viewBox",

429 "viewtarget":"viewTarget",

430 "xchannelselector":"xChannelSelector",

431 "ychannelselector":"yChannelSelector",

432 "zoomandpan":"zoomAndPan"

433 };

434 for (var originalName in token.data.getKeys()) {

435 var svgName = replacements[originalName];

436 if (svgName != null) {

437 token.data[svgName] = token.data.remove(originalName);

438 }

439 }

440 }

441

442 void adjustForeignAttributes(Token token) {

443 // TODO(jmesserly): I don't like mixing non-string objects with strings in

444 // the Node.attributes Map. Is there another solution?

445 final replacements = const {

446 "xlink:actuate": const AttributeName("xlink", "actuate",

447 Namespaces.xlink),

448 "xlink:arcrole": const AttributeName("xlink", "arcrole",

449 Namespaces.xlink),

450 "xlink:href": const AttributeName("xlink", "href", Namespaces.xlink),

451 "xlink:role": const AttributeName("xlink", "role", Namespaces.xlink),

452 "xlink:show": const AttributeName("xlink", "show", Namespaces.xlink),

453 "xlink:title": const AttributeName("xlink", "title", Namespaces.xlink),

454 "xlink:type": const AttributeName("xlink", "type", Namespaces.xlink),

455 "xml:base": const AttributeName("xml", "base", Namespaces.xml),

456 "xml:lang": const AttributeName("xml", "lang", Namespaces.xml),

457 "xml:space": const AttributeName("xml", "space", Namespaces.xml),

458 "xmlns": const AttributeName(null, "xmlns", Namespaces.xmlns),

459 "xmlns:xlink": const AttributeName("xmlns", "xlink", Namespaces.xmlns)

460 };

461

462 for (var originalName in token.data.getKeys()) {

463 var foreignName = replacements[originalName];

464 if (foreignName != null) {

465 token.data[foreignName] = token.data.remove(originalName);

466 }

467 }

468 }

469

470 void resetInsertionMode() {

471 // The name of this method is mostly historical. (It's also used in the

472 // specification.)

473 for (Node node in reversed(tree.openElements)) {

474 var nodeName = node.tagName;

475 bool last = node == tree.openElements[0];

476 if (last) {

477 assert(innerHTMLMode);

478 nodeName = innerHTML;

479 }

480 // Check for conditions that should only happen in the innerHTML

481 // case

482 switch (nodeName) {

483 case "select": case "colgroup": case "head": case "html":

484 assert(innerHTMLMode);

485 break;

486 }

487 if (!last && node.namespace != tree.defaultNamespace) {

488 continue;

489 }

490 switch (nodeName) {

491 case "select": phase = _inSelectPhase; return;

492 case "td": phase = _inCellPhase; return;

493 case "th": phase = _inCellPhase; return;

494 case "tr": phase = _inRowPhase; return;

495 case "tbody": phase = _inTableBodyPhase; return;

496 case "thead": phase = _inTableBodyPhase; return;

497 case "tfoot": phase = _inTableBodyPhase; return;

498 case "caption": phase = _inCaptionPhase; return;

499 case "colgroup": phase = _inColumnGroupPhase; return;

500 case "table": phase = _inTablePhase; return;

501 case "head": phase = _inBodyPhase; return;

502 case "body": phase = _inBodyPhase; return;

503 case "frameset": phase = _inFramesetPhase; return;

504 case "html": phase = _beforeHeadPhase; return;

505 }

506 }

507 phase = _inBodyPhase;

508 }

509

510 /**

511 * Generic RCDATA/RAWTEXT Parsing algorithm

512 * [contentType] - RCDATA or RAWTEXT

513 */

514 void parseRCDataRawtext(Token token, String contentType) {

515 assert(contentType == "RAWTEXT" \|\| contentType == "RCDATA");

516

517 var element = tree.insertElement(token);

518

519 if (contentType == "RAWTEXT") {

520 tokenizer.state = tokenizer.rawtextState;

521 } else {

522 tokenizer.state = tokenizer.rcdataState;

523 }

524

525 originalPhase = phase;

526 phase = _textPhase;

527 }

528 }

529

530

531 /** Base class for helper object that implements each phase of processing. */

532 class Phase {

533 // Order should be (they can be omitted):

534 // * EOF

535 // * Comment

536 // * Doctype

537 // * SpaceCharacters

538 // * Characters

539 // * StartTag

540 // - startTag* methods

541 // * EndTag

542 // - endTag* methods

543

544 final HTMLParser parser;

545

546 final TreeBuilder tree;

547

548 Phase(HTMLParser parser) : parser = parser, tree = parser.tree;

549

550 bool processEOF() {

551 throw const NotImplementedException();

552 }

553

554 Token processComment(CommentToken token) {

555 // For most phases the following is correct. Where it's not it will be

556 // overridden.

557 tree.insertComment(token, tree.openElements.last());

558 }

559

560 Token processDoctype(DoctypeToken token) {

561 parser.parseError("unexpected-doctype");

562 }

563

564 Token processCharacters(CharactersToken token) {

565 tree.insertText(token.data);

566 }

567

568 Token processSpaceCharacters(SpaceCharactersToken token) {

569 tree.insertText(token.data);

570 }

571

572 Token processStartTag(StartTagToken token) {

573 throw const NotImplementedException();

574 }

575

576 Token startTagHtml(StartTagToken token) {

577 if (parser.firstStartTag == false && token.name == "html") {

578 parser.parseError("non-html-root");

579 }

580 // XXX Need a check here to see if the first start tag token emitted is

581 // this token... If it's not, invoke parser.parseError().

582 token.data.forEach((attr, value) {

583 tree.openElements[0].attributes.putIfAbsent(attr, () => value);

584 });

585 parser.firstStartTag = false;

586 }

587

588 Token processEndTag(EndTagToken token) {

589 throw const NotImplementedException();

590 }

591

592 /** Helper method for popping openElements. */

593 void popOpenElementsUntil(String name) {

594 var node = tree.openElements.removeLast();

595 while (node.tagName != name) {

596 node = tree.openElements.removeLast();

597 }

598 }

599 }

600

601 class InitialPhase extends Phase {

602 InitialPhase(parser) : super(parser);

603

604 Token processSpaceCharacters(SpaceCharactersToken token) {

605 }

606

607 Token processComment(CommentToken token) {

608 tree.insertComment(token, tree.document);

609 }

610

611 Token processDoctype(DoctypeToken token) {

612 var name = token.name;

613 String publicId = token.publicId;

614 var systemId = token.systemId;

615 var correct = token.correct;

616

617 if ((name != "html" \|\| publicId != null \|\|

618 systemId != null && systemId != "about:legacy-compat")) {

619 parser.parseError("unknown-doctype");

620 }

621

622 if (publicId === null) {

623 publicId = "";

624 }

625

626 tree.insertDoctype(token);

627

628 if (publicId != "") {

629 publicId = asciiUpper2Lower(publicId);

630 }

631

632 if (!correct \|\| token.name != "html"

633 \|\| startsWithAny(publicId, const [

634 "+//silmaril//dtd html pro v0r11 19970101//",

635 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",

636 "-//as//dtd html 3.0 aswedit + extensions//",

637 "-//ietf//dtd html 2.0 level 1//",

638 "-//ietf//dtd html 2.0 level 2//",

639 "-//ietf//dtd html 2.0 strict level 1//",

640 "-//ietf//dtd html 2.0 strict level 2//",

641 "-//ietf//dtd html 2.0 strict//",

642 "-//ietf//dtd html 2.0//",

643 "-//ietf//dtd html 2.1e//",

644 "-//ietf//dtd html 3.0//",

645 "-//ietf//dtd html 3.2 final//",

646 "-//ietf//dtd html 3.2//",

647 "-//ietf//dtd html 3//",

648 "-//ietf//dtd html level 0//",

649 "-//ietf//dtd html level 1//",

650 "-//ietf//dtd html level 2//",

651 "-//ietf//dtd html level 3//",

652 "-//ietf//dtd html strict level 0//",

653 "-//ietf//dtd html strict level 1//",

654 "-//ietf//dtd html strict level 2//",

655 "-//ietf//dtd html strict level 3//",

656 "-//ietf//dtd html strict//",

657 "-//ietf//dtd html//",

658 "-//metrius//dtd metrius presentational//",

659 "-//microsoft//dtd internet explorer 2.0 html strict//",

660 "-//microsoft//dtd internet explorer 2.0 html//",

661 "-//microsoft//dtd internet explorer 2.0 tables//",

662 "-//microsoft//dtd internet explorer 3.0 html strict//",

663 "-//microsoft//dtd internet explorer 3.0 html//",

664 "-//microsoft//dtd internet explorer 3.0 tables//",

665 "-//netscape comm. corp.//dtd html//",

666 "-//netscape comm. corp.//dtd strict html//",

667 "-//o'reilly and associates//dtd html 2.0//",

668 "-//o'reilly and associates//dtd html extended 1.0//",

669 "-//o'reilly and associates//dtd html extended relaxed 1.0//",

670 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to h tml 4.0//",

671 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0// ",

672 "-//spyglass//dtd html 2.0 extended//",

673 "-//sq//dtd html 2.0 hotmetal + extensions//",

674 "-//sun microsystems corp.//dtd hotjava html//",

675 "-//sun microsystems corp.//dtd hotjava strict html//",

676 "-//w3c//dtd html 3 1995-03-24//",

677 "-//w3c//dtd html 3.2 draft//",

678 "-//w3c//dtd html 3.2 final//",

679 "-//w3c//dtd html 3.2//",

680 "-//w3c//dtd html 3.2s draft//",

681 "-//w3c//dtd html 4.0 frameset//",

682 "-//w3c//dtd html 4.0 transitional//",

683 "-//w3c//dtd html experimental 19960712//",

684 "-//w3c//dtd html experimental 970421//",

685 "-//w3c//dtd w3 html//",

686 "-//w3o//dtd w3 html 3.0//",

687 "-//webtechs//dtd mozilla html 2.0//",

688 "-//webtechs//dtd mozilla html//"])

689 \|\| const ["-//w3o//dtd w3 html strict 3.0//en//",

690 "-/w3c/dtd html 4.0 transitional/en",

691 "html"].indexOf(publicId) >= 0

692 \|\| startsWithAny(publicId, const [

693 "-//w3c//dtd html 4.01 frameset//",

694 "-//w3c//dtd html 4.01 transitional//"]) && systemId == null

695 \|\| systemId != null && systemId.toLowerCase() ==

696 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {

697

698 parser.compatMode = "quirks";

699 } else if (startsWithAny(publicId, const [

700 "-//w3c//dtd xhtml 1.0 frameset//",

701 "-//w3c//dtd xhtml 1.0 transitional//"])

702 \|\| startsWithAny(publicId, const [

703 "-//w3c//dtd html 4.01 frameset//",

704 "-//w3c//dtd html 4.01 transitional//"]) &&

705 systemId != null) {

706 parser.compatMode = "limited quirks";

707 }

708 parser.phase = parser._beforeHtmlPhase;

709 }

710

711 void anythingElse() {

712 parser.compatMode = "quirks";

713 parser.phase = parser._beforeHtmlPhase;

714 }

715

716 Token processCharacters(CharactersToken token) {

717 parser.parseError("expected-doctype-but-got-chars");

718 anythingElse();

719 return token;

720 }

721

722 Token processStartTag(StartTagToken token) {

723 parser.parseError("expected-doctype-but-got-start-tag",

724 {"name": token.name});

725 anythingElse();

726 return token;

727 }

728

729 Token processEndTag(EndTagToken token) {

730 parser.parseError("expected-doctype-but-got-end-tag",

731 {"name": token.name});

732 anythingElse();

733 return token;

734 }

735

736 bool processEOF() {

737 parser.parseError("expected-doctype-but-got-eof");

738 anythingElse();

739 return true;

740 }

741 }

742

743

744 class BeforeHtmlPhase extends Phase {

745 BeforeHtmlPhase(parser) : super(parser);

746

747 // helper methods

748 void insertHtmlElement() {

749 tree.insertRoot(new StartTagToken("html", data: {}));

750 parser.phase = parser._beforeHeadPhase;

751 }

752

753 // other

754 bool processEOF() {

755 insertHtmlElement();

756 return true;

757 }

758

759 Token processComment(CommentToken token) {

760 tree.insertComment(token, tree.document);

761 }

762

763 Token processSpaceCharacters(SpaceCharactersToken token) {

764 }

765

766 Token processCharacters(CharactersToken token) {

767 insertHtmlElement();

768 return token;

769 }

770

771 Token processStartTag(StartTagToken token) {

772 if (token.name == "html") {

773 parser.firstStartTag = true;

774 }

775 insertHtmlElement();

776 return token;

777 }

778

779 Token processEndTag(EndTagToken token) {

780 switch (token.name) {

781 case "head": case "body": case "html": case "br":

782 insertHtmlElement();

783 return token;

784 default:

785 parser.parseError("unexpected-end-tag-before-html",

786 {"name": token.name});

787 return null;

788 }

789 }

790 }

791

792

793 class BeforeHeadPhase extends Phase {

794 BeforeHeadPhase(parser) : super(parser);

795

796 processStartTag(StartTagToken token) {

797 switch (token.name) {

798 case 'html': return startTagHtml(token);

799 case 'head': return startTagHead(token);

800 default: return startTagOther(token);

801 }

802 }

803

804 processEndTag(EndTagToken token) {

805 switch (token.name) {

806 case "head": case "body": case "html": case "br":

807 return endTagImplyHead(token);

808 default: return endTagOther(token);

809 }

810 }

811

812 bool processEOF() {

813 startTagHead(new StartTagToken("head", data: {}));

814 return true;

815 }

816

817 Token processSpaceCharacters(SpaceCharactersToken token) {

818 }

819

820 Token processCharacters(CharactersToken token) {

821 startTagHead(new StartTagToken("head", data: {}));

822 return token;

823 }

824

825 Token startTagHtml(StartTagToken token) {

826 return parser._inBodyPhase.processStartTag(token);

827 }

828

829 void startTagHead(StartTagToken token) {

830 tree.insertElement(token);

831 tree.headPointer = tree.openElements.last();

832 parser.phase = parser._inHeadPhase;

833 }

834

835 Token startTagOther(StartTagToken token) {

836 startTagHead(new StartTagToken("head", data: {}));

837 return token;

838 }

839

840 Token endTagImplyHead(EndTagToken token) {

841 startTagHead(new StartTagToken("head", data: {}));

842 return token;

843 }

844

845 void endTagOther(EndTagToken token) {

846 parser.parseError("end-tag-after-implied-root",

847 {"name": token.name});

848 }

849 }

850

851 class InHeadPhase extends Phase {

852 InHeadPhase(parser) : super(parser);

853

854 processStartTag(StartTagToken token) {

855 switch (token.name) {

856 case "html": return startTagHtml(token);

857 case "title": return startTagTitle(token);

858 case "noscript": case "noframes": case "style":

859 return startTagNoScriptNoFramesStyle(token);

860 case "script": return startTagScript(token);

861 case "base": case "basefont": case "bgsound": case "command": case "link":

862 return startTagBaseLinkCommand(token);

863 case "meta": return startTagMeta(token);

864 case "head": return startTagHead(token);

865 default: return startTagOther(token);

866 }

867 }

868

869 processEndTag(EndTagToken token) {

870 switch (token.name) {

871 case "head": return endTagHead(token);

872 case "br": case "html": case "body": return endTagHtmlBodyBr(token);

873 default: return endTagOther(token);

874 }

875 }

876

877 // the real thing

878 bool processEOF() {

879 anythingElse();

880 return true;

881 }

882

883 Token processCharacters(CharactersToken token) {

884 anythingElse();

885 return token;

886 }

887

888 Token startTagHtml(StartTagToken token) {

889 return parser._inBodyPhase.processStartTag(token);

890 }

891

892 void startTagHead(StartTagToken token) {

893 parser.parseError("two-heads-are-not-better-than-one");

894 }

895

896 void startTagBaseLinkCommand(StartTagToken token) {

897 tree.insertElement(token);

898 tree.openElements.removeLast();

899 token.selfClosingAcknowledged = true;

900 }

901

902 void startTagMeta(StartTagToken token) {

903 tree.insertElement(token);

904 tree.openElements.removeLast();

905 token.selfClosingAcknowledged = true;

906

907 var attributes = token.data;

908 if (!parser.tokenizer.stream.charEncodingCertain) {

909 var charset = attributes["charset"];

910 var content = attributes["content"];

911 if (charset != null) {

912 parser.tokenizer.stream.changeEncoding(charset);

913 } else if (content != null) {

914 var data = new EncodingBytes(content);

915 var codec = new ContentAttrParser(data).parse();

916 parser.tokenizer.stream.changeEncoding(codec);

917 }

918 }

919 }

920

921 void startTagTitle(StartTagToken token) {

922 parser.parseRCDataRawtext(token, "RCDATA");

923 }

924

925 void startTagNoScriptNoFramesStyle(StartTagToken token) {

926 // Need to decide whether to implement the scripting-disabled case

927 parser.parseRCDataRawtext(token, "RAWTEXT");

928 }

929

930 void startTagScript(StartTagToken token) {

931 tree.insertElement(token);

932 parser.tokenizer.state = parser.tokenizer.scriptDataState;

933 parser.originalPhase = parser.phase;

934 parser.phase = parser._textPhase;

935 }

936

937 Token startTagOther(StartTagToken token) {

938 anythingElse();

939 return token;

940 }

941

942 void endTagHead(EndTagToken token) {

943 var node = parser.tree.openElements.removeLast();

944 assert(node.tagName == "head");

945 parser.phase = parser._afterHeadPhase;

946 }

947

948 Token endTagHtmlBodyBr(EndTagToken token) {

949 anythingElse();

950 return token;

951 }

952

953 void endTagOther(EndTagToken token) {

954 parser.parseError("unexpected-end-tag", {"name": token.name});

955 }

956

957 void anythingElse() {

958 endTagHead(new EndTagToken("head", data: {}));

959 }

960 }

961

962

963 // XXX If we implement a parser for which scripting is disabled we need to

964 // implement this phase.

965 //

966 // class InHeadNoScriptPhase extends Phase {

967

968 class AfterHeadPhase extends Phase {

969 AfterHeadPhase(parser) : super(parser);

970

971 processStartTag(StartTagToken token) {

972 switch (token.name) {

973 case "html": return startTagHtml(token);

974 case "body": return startTagBody(token);

975 case "frameset": return startTagFrameset(token);

976 case "base": case "basefont": case "bgsound": case "link": case "meta":

977 case "noframes": case "script": case "style": case "title":

978 return startTagFromHead(token);

979 case "head": return startTagHead(token);

980 default: return startTagOther(token);

981 }

982 }

983

984 processEndTag(EndTagToken token) {

985 switch (token.name) {

986 case "body": case "html": case "br":

987 return endTagHtmlBodyBr(token);

988 default: return endTagOther(token);

989 }

990 }

991

992 bool processEOF() {

993 anythingElse();

994 return true;

995 }

996

997 Token processCharacters(CharactersToken token) {

998 anythingElse();

999 return token;

1000 }

1001

1002 Token startTagHtml(StartTagToken token) {

1003 return parser._inBodyPhase.processStartTag(token);

1004 }

1005

1006 void startTagBody(StartTagToken token) {

1007 parser.framesetOK = false;

1008 tree.insertElement(token);

1009 parser.phase = parser._inBodyPhase;

1010 }

1011

1012 void startTagFrameset(StartTagToken token) {

1013 tree.insertElement(token);

1014 parser.phase = parser._inFramesetPhase;

1015 }

1016

1017 void startTagFromHead(StartTagToken token) {

1018 parser.parseError("unexpected-start-tag-out-of-my-head",

1019 {"name": token.name});

1020 tree.openElements.add(tree.headPointer);

1021 parser._inHeadPhase.processStartTag(token);

1022 for (Node node in reversed(tree.openElements)) {

1023 if (node.tagName == "head") {

1024 removeFromList(tree.openElements, node);

1025 break;

1026 }

1027 }

1028 }

1029

1030 void startTagHead(StartTagToken token) {

1031 parser.parseError("unexpected-start-tag", {"name":token.name});

1032 }

1033

1034 Token startTagOther(StartTagToken token) {

1035 anythingElse();

1036 return token;

1037 }

1038

1039 Token endTagHtmlBodyBr(EndTagToken token) {

1040 anythingElse();

1041 return token;

1042 }

1043

1044 void endTagOther(EndTagToken token) {

1045 parser.parseError("unexpected-end-tag", {"name":token.name});

1046 }

1047

1048 void anythingElse() {

1049 tree.insertElement(new StartTagToken("body", data: {}));

1050 parser.phase = parser._inBodyPhase;

1051 parser.framesetOK = true;

1052 }

1053 }

1054

1055 typedef Token TokenProccessor(Token token);

1056

1057 class InBodyPhase extends Phase {

1058 TokenProccessor processSpaceCharactersFunc;

1059

1060 // http://www.whatwg.org/specs/web-apps/current-work///parsing-main-inbody

1061 // the really-really-really-very crazy mode

1062 InBodyPhase(parser) : super(parser) {

1063 //Keep a ref to this for special handling of whitespace in <pre>

1064 processSpaceCharactersFunc = processSpaceCharactersNonPre;

1065 }

1066

1067 processStartTag(StartTagToken token) {

1068 switch (token.name) {

1069 case "html":

1070 return startTagHtml(token);

1071 case "base": case "basefont": case "bgsound": case "command": case "link":

1072 case "meta": case "noframes": case "script": case "style": case "title":

1073 return startTagProcessInHead(token);

1074 case "body":

1075 return startTagBody(token);

1076 case "frameset":

1077 return startTagFrameset(token);

1078 case "address": case "article": case "aside": case "blockquote":

1079 case "center": case "details": case "details": case "dir": case "div":

1080 case "dl": case "fieldset": case "figcaption": case "figure":

1081 case "footer": case "header": case "hgroup": case "menu": case "nav":

1082 case "ol": case "p": case "section": case "summary": case "ul":

1083 return startTagCloseP(token);

1084 // headingElements

1085 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":

1086 return startTagHeading(token);

1087 case "pre": case "listing":

1088 return startTagPreListing(token);

1089 case "form":

1090 return startTagForm(token);

1091 case "li": case "dd": case "dt":

1092 return startTagListItem(token);

1093 case "plaintext":

1094 return startTagPlaintext(token);

1095 case "a": return startTagA(token);

1096 case "b": case "big": case "code": case "em": case "font": case "i":

1097 case "s": case "small": case "strike": case "strong": case "tt": case "u":

1098 return startTagFormatting(token);

1099 case "nobr":

1100 return startTagNobr(token);

1101 case "button":

1102 return startTagButton(token);

1103 case "applet": case "marquee": case "object":

1104 return startTagAppletMarqueeObject(token);

1105 case "xmp":

1106 return startTagXmp(token);

1107 case "table":

1108 return startTagTable(token);

1109 case "area": case "br": case "embed": case "img": case "keygen":

1110 case "wbr":

1111 return startTagVoidFormatting(token);

1112 case "param": case "source": case "track":

1113 return startTagParamSource(token);

1114 case "input":

1115 return startTagInput(token);

1116 case "hr":

1117 return startTagHr(token);

1118 case "image":

1119 return startTagImage(token);

1120 case "isindex":

1121 return startTagIsIndex(token);

1122 case "textarea":

1123 return startTagTextarea(token);

1124 case "iframe":

1125 return startTagIFrame(token);

1126 case "noembed": case "noframes": case "noscript":

1127 return startTagRawtext(token);

1128 case "select":

1129 return startTagSelect(token);

1130 case "rp": case "rt":

1131 return startTagRpRt(token);

1132 case "option": case "optgroup":

1133 return startTagOpt(token);

1134 case "math":

1135 return startTagMath(token);

1136 case "svg":

1137 return startTagSvg(token);

1138 case "caption": case "col": case "colgroup": case "frame": case "head":

1139 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":

1140 return startTagMisplaced(token);

1141 default: return startTagOther(token);

1142 }

1143 }

1144

1145 processEndTag(EndTagToken token) {

1146 switch (token.name) {

1147 case "body": return endTagBody(token);

1148 case "html": return endTagHtml(token);

1149 case "address": case "article": case "aside": case "blockquote":

1150 case "center": case "details": case "dir": case "div": case "dl":

1151 case "fieldset": case "figcaption": case "figure": case "footer":

1152 case "header": case "hgroup": case "listing": case "menu": case "nav":

1153 case "ol": case "pre": case "section": case "summary": case "ul":

1154 return endTagBlock(token);

1155 case "form": return endTagForm(token);

1156 case "p": return endTagP(token);

1157 case "dd": case "dt": case "li": return endTagListItem(token);

1158 // headingElements

1159 case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":

1160 return endTagHeading(token);

1161 case "a": case "b": case "big": case "code": case "em": case "font":

1162 case "i": case "nobr": case "s": case "small": case "strike":

1163 case "strong": case "tt": case "u":

1164 return endTagFormatting(token);

1165 case "applet": case "marquee": case "object":

1166 return endTagAppletMarqueeObject(token);

1167 case "br": return endTagBr(token);

1168 default: return endTagOther(token);

1169 }

1170 }

1171

1172 bool isMatchingFormattingElement(Node node1, Node node2) {

1173 if (node1.tagName != node2.tagName \|\| node1.namespace != node2.namespace) {

1174 return false;

1175 } else if (node1.attributes.length != node2.attributes.length) {

1176 return false;

1177 } else {

1178 for (var key in node1.attributes.getKeys()) {

1179 if (node1.attributes[key] != node2.attributes[key]) {

1180 return false;

1181 }

1182 }

1183 }

1184 return true;

1185 }

1186

1187 // helper

1188 void addFormattingElement(token) {

1189 tree.insertElement(token);

1190 var element = tree.openElements.last();

1191

1192 var matchingElements = [];

1193 for (Node node in reversed(tree.activeFormattingElements)) {

1194 if (node === Marker) {

1195 break;

1196 } else if (isMatchingFormattingElement(node, element)) {

1197 matchingElements.add(node);

1198 }

1199 }

1200

1201 assert(matchingElements.length <= 3);

1202 if (matchingElements.length == 3) {

1203 removeFromList(tree.activeFormattingElements, matchingElements.last());

1204 }

1205 tree.activeFormattingElements.add(element);

1206 }

1207

1208 // the real deal

1209 bool processEOF() {

1210 for (Node node in reversed(tree.openElements)) {

1211 switch (node.tagName) {

1212 case "dd": case "dt": case "li": case "p": case "tbody": case "td":

1213 case "tfoot": case "th": case "thead": case "tr": case "body":

1214 case "html":

1215 continue;

1216 }

1217 parser.parseError("expected-closing-tag-but-got-eof");

1218 break;

1219 }

1220 //Stop parsing

1221 return false;

1222 }

1223

1224 Token processSpaceCharactersDropNewline(token) {

1225 // Sometimes (start of <pre>, <listing>, and <textarea> blocks) we

1226 // want to drop leading newlines

1227 var data = token.data;

1228 processSpaceCharactersFunc = processSpaceCharactersNonPre;

1229 if (data.startsWith("\n")) {

1230 var lastOpen = tree.openElements.last();

1231 if (const ["pre", "listing", "textarea"].indexOf(lastOpen.tagName) >= 0

1232 && !lastOpen.hasContent()) {

1233 data = data.substring(1);

1234 }

1235 }

1236 if (data.length > 0) {

1237 tree.reconstructActiveFormattingElements();

1238 tree.insertText(data);

1239 }

1240 }

1241

1242 Token processCharacters(CharactersToken token) {

1243 if (token.data == "\u0000") {

1244 //The tokenizer should always emit null on its own

1245 return null;

1246 }

1247 tree.reconstructActiveFormattingElements();

1248 tree.insertText(token.data);

1249 if (parser.framesetOK && !allWhitespace(token.data)) {

1250 parser.framesetOK = false;

1251 }

1252 }

1253

1254 Token processSpaceCharactersNonPre(token) {

1255 tree.reconstructActiveFormattingElements();

1256 tree.insertText(token.data);

1257 }

1258

1259 Token processSpaceCharacters(token) => processSpaceCharactersFunc(token);

1260

1261 Token startTagProcessInHead(StartTagToken token) {

1262 return parser._inHeadPhase.processStartTag(token);

1263 }

1264

1265 void startTagBody(StartTagToken token) {

1266 parser.parseError("unexpected-start-tag", {"name": "body"});

1267 if (tree.openElements.length == 1

1268 \|\| tree.openElements[1].tagName != "body") {

1269 assert(parser.innerHTMLMode);

1270 } else {

1271 parser.framesetOK = false;

1272 token.data.forEach((attr, value) {

1273 tree.openElements[1].attributes.putIfAbsent(attr, () => value);

1274 });

1275 }

1276 }

1277

1278 void startTagFrameset(StartTagToken token) {

1279 parser.parseError("unexpected-start-tag", {"name": "frameset"});

1280 if ((tree.openElements.length == 1 \|\|

1281 tree.openElements[1].tagName != "body")) {

1282 assert(parser.innerHTMLMode);

1283 } else if (parser.framesetOK) {

1284 if (tree.openElements[1].parent != null) {

1285 tree.openElements[1].parent.$dom_removeChild(tree.openElements[1]);

1286 }

1287 while (tree.openElements.last().tagName != "html") {

1288 tree.openElements.removeLast();

1289 }

1290 tree.insertElement(token);

1291 parser.phase = parser._inFramesetPhase;

1292 }

1293 }

1294

1295 void startTagCloseP(StartTagToken token) {

1296 if (tree.elementInScope("p", variant: "button")) {

1297 endTagP(new EndTagToken("p", data: {}));

1298 }

1299 tree.insertElement(token);

1300 }

1301

1302 void startTagPreListing(StartTagToken token) {

1303 if (tree.elementInScope("p", variant: "button")) {

1304 endTagP(new EndTagToken("p", data: {}));

1305 }

1306 tree.insertElement(token);

1307 parser.framesetOK = false;

1308 processSpaceCharactersFunc = processSpaceCharactersDropNewline;

1309 }

1310

1311 void startTagForm(StartTagToken token) {

1312 if (tree.formPointer != null) {

1313 parser.parseError("unexpected-start-tag", {"name": "form"});

1314 } else {

1315 if (tree.elementInScope("p", variant: "button")) {

1316 endTagP(new EndTagToken("p", data: {}));

1317 }

1318 tree.insertElement(token);

1319 tree.formPointer = tree.openElements.last();

1320 }

1321 }

1322

1323 void startTagListItem(StartTagToken token) {

1324 parser.framesetOK = false;

1325

1326 final stopNamesMap = const {"li": const ["li"],

1327 "dt": const ["dt", "dd"],

1328 "dd": const ["dt", "dd"]};

1329 var stopNames = stopNamesMap[token.name];

1330 for (Node node in reversed(tree.openElements)) {

1331 if (stopNames.indexOf(node.tagName) >= 0) {

1332 parser.phase.processEndTag(new EndTagToken(node.tagName, data: {}));

1333 break;

1334 }

1335 if (specialElements.indexOf(node.nameTuple) >= 0 &&

1336 const ["address", "div", "p"].indexOf(node.tagName) == -1) {

1337 break;

1338 }

1339 }

1340

1341 if (tree.elementInScope("p", variant: "button")) {

1342 parser.phase.processEndTag(new EndTagToken("p", data: {}));

1343 }

1344

1345 tree.insertElement(token);

1346 }

1347

1348 void startTagPlaintext(StartTagToken token) {

1349 if (tree.elementInScope("p", variant: "button")) {

1350 endTagP(new EndTagToken("p", data: {}));

1351 }

1352 tree.insertElement(token);

1353 parser.tokenizer.state = parser.tokenizer.plaintextState;

1354 }

1355

1356 void startTagHeading(StartTagToken token) {

1357 if (tree.elementInScope("p", variant: "button")) {

1358 endTagP(new EndTagToken("p", data: {}));

1359 }

1360 if (headingElements.indexOf(tree.openElements.last().tagName) >= 0) {

1361 parser.parseError("unexpected-start-tag", {"name": token.name});

1362 tree.openElements.removeLast();

1363 }

1364 tree.insertElement(token);

1365 }

1366

1367 void startTagA(StartTagToken token) {

1368 var afeAElement = tree.elementInActiveFormattingElements("a");

1369 if (afeAElement != null) {

1370 parser.parseError("unexpected-start-tag-implies-end-tag",

1371 {"startName": "a", "endName": "a"});

1372 endTagFormatting(new EndTagToken("a", data: {}));

1373 removeFromList(tree.openElements, afeAElement);

1374 removeFromList(tree.activeFormattingElements, afeAElement);

1375 }

1376 tree.reconstructActiveFormattingElements();

1377 addFormattingElement(token);

1378 }

1379

1380 void startTagFormatting(StartTagToken token) {

1381 tree.reconstructActiveFormattingElements();

1382 addFormattingElement(token);

1383 }

1384

1385 void startTagNobr(StartTagToken token) {

1386 tree.reconstructActiveFormattingElements();

1387 if (tree.elementInScope("nobr")) {

1388 parser.parseError("unexpected-start-tag-implies-end-tag",

1389 {"startName": "nobr", "endName": "nobr"});

1390 processEndTag(new EndTagToken("nobr", data: {}));

1391 // XXX Need tests that trigger the following

1392 tree.reconstructActiveFormattingElements();

1393 }

1394 addFormattingElement(token);

1395 }

1396

1397 Token startTagButton(StartTagToken token) {

1398 if (tree.elementInScope("button")) {

1399 parser.parseError("unexpected-start-tag-implies-end-tag",

1400 {"startName": "button", "endName": "button"});

1401 processEndTag(new EndTagToken("button", data: {}));

1402 return token;

1403 } else {

1404 tree.reconstructActiveFormattingElements();

1405 tree.insertElement(token);

1406 parser.framesetOK = false;

1407 }

1408 }

1409

1410 void startTagAppletMarqueeObject(StartTagToken token) {

1411 tree.reconstructActiveFormattingElements();

1412 tree.insertElement(token);

1413 tree.activeFormattingElements.add(Marker);

1414 parser.framesetOK = false;

1415 }

1416

1417 void startTagXmp(StartTagToken token) {

1418 if (tree.elementInScope("p", variant: "button")) {

1419 endTagP(new EndTagToken("p", data: {}));

1420 }

1421 tree.reconstructActiveFormattingElements();

1422 parser.framesetOK = false;

1423 parser.parseRCDataRawtext(token, "RAWTEXT");

1424 }

1425

1426 void startTagTable(StartTagToken token) {

1427 if (parser.compatMode != "quirks") {

1428 if (tree.elementInScope("p", variant: "button")) {

1429 processEndTag(new EndTagToken("p", data: {}));

1430 }

1431 }

1432 tree.insertElement(token);

1433 parser.framesetOK = false;

1434 parser.phase = parser._inTablePhase;

1435 }

1436

1437 void startTagVoidFormatting(StartTagToken token) {

1438 tree.reconstructActiveFormattingElements();

1439 tree.insertElement(token);

1440 tree.openElements.removeLast();

1441 token.selfClosingAcknowledged = true;

1442 parser.framesetOK = false;

1443 }

1444

1445 void startTagInput(StartTagToken token) {

1446 var savedFramesetOK = parser.framesetOK;

1447 startTagVoidFormatting(token);

1448 if (asciiUpper2Lower(token.data["type"]) == "hidden") {

1449 //input type=hidden doesn't change framesetOK

1450 parser.framesetOK = savedFramesetOK;

1451 }

1452 }

1453

1454 void startTagParamSource(StartTagToken token) {

1455 tree.insertElement(token);

1456 tree.openElements.removeLast();

1457 token.selfClosingAcknowledged = true;

1458 }

1459

1460 void startTagHr(StartTagToken token) {

1461 if (tree.elementInScope("p", variant: "button")) {

1462 endTagP(new EndTagToken("p", data: {}));

1463 }

1464 tree.insertElement(token);

1465 tree.openElements.removeLast();

1466 token.selfClosingAcknowledged = true;

1467 parser.framesetOK = false;

1468 }

1469

1470 void startTagImage(StartTagToken token) {

1471 // No really...

1472 parser.parseError("unexpected-start-tag-treated-as",

1473 {"originalName": "image", "newName": "img"});

1474 processStartTag(new StartTagToken("img", data: token.data,

1475 selfClosing: token.selfClosing));

1476 }

1477

1478 void startTagIsIndex(StartTagToken token) {

1479 parser.parseError("deprecated-tag", {"name": "isindex"});

1480 if (tree.formPointer != null) {

1481 return;

1482 }

1483 var formAttrs = {};

1484 var dataAction = token.data["action"];

1485 if (dataAction != null) {

1486 formAttrs["action"] = dataAction;

1487 }

1488 processStartTag(new StartTagToken("form", data: formAttrs));

1489 processStartTag(new StartTagToken("hr", data: {}));

1490 processStartTag(new StartTagToken("label", data: {}));

1491 // XXX Localization ...

1492 var prompt = token.data["prompt"];

1493 if (prompt == null) {

1494 prompt = "This is a searchable index. Enter search keywords: ";

1495 }

1496 processCharacters(new CharactersToken(prompt));

1497 var attributes = new Map.from(token.data);

1498 attributes.remove('action');

1499 attributes.remove('prompt');

1500 attributes["name"] = "isindex";

1501 processStartTag(new StartTagToken("input",

1502 data: attributes, selfClosing: token.selfClosing));

1503 processEndTag(new EndTagToken("label", data: {}));

1504 processStartTag(new StartTagToken("hr", data: {}));

1505 processEndTag(new EndTagToken("form", data: {}));

1506 }

1507

1508 void startTagTextarea(StartTagToken token) {

1509 tree.insertElement(token);

1510 parser.tokenizer.state = parser.tokenizer.rcdataState;

1511 processSpaceCharactersFunc = processSpaceCharactersDropNewline;

1512 parser.framesetOK = false;

1513 }

1514

1515 void startTagIFrame(StartTagToken token) {

1516 parser.framesetOK = false;

1517 startTagRawtext(token);

1518 }

1519

1520 /** iframe, noembed noframes, noscript(if scripting enabled). */

1521 void startTagRawtext(StartTagToken token) {

1522 parser.parseRCDataRawtext(token, "RAWTEXT");

1523 }

1524

1525 void startTagOpt(StartTagToken token) {

1526 if (tree.openElements.last().tagName == "option") {

1527 parser.phase.processEndTag(new EndTagToken("option", data: {}));

1528 }

1529 tree.reconstructActiveFormattingElements();

1530 parser.tree.insertElement(token);

1531 }

1532

1533 void startTagSelect(StartTagToken token) {

1534 tree.reconstructActiveFormattingElements();

1535 tree.insertElement(token);

1536 parser.framesetOK = false;

1537

1538 if (parser._inTablePhase == parser.phase \|\|

1539 parser._inCaptionPhase == parser.phase \|\|

1540 parser._inColumnGroupPhase == parser.phase \|\|

1541 parser._inTableBodyPhase == parser.phase \|\|

1542 parser._inRowPhase == parser.phase \|\|

1543 parser._inCellPhase == parser.phase) {

1544 parser.phase = parser._inSelectInTablePhase;

1545 } else {

1546 parser.phase = parser._inSelectPhase;

1547 }

1548 }

1549

1550 void startTagRpRt(StartTagToken token) {

1551 if (tree.elementInScope("ruby")) {

1552 tree.generateImpliedEndTags();

1553 if (tree.openElements.last().tagName != "ruby") {

1554 parser.parseError();

1555 }

1556 }

1557 tree.insertElement(token);

1558 }

1559

1560 void startTagMath(StartTagToken token) {

1561 tree.reconstructActiveFormattingElements();

1562 parser.adjustMathMLAttributes(token);

1563 parser.adjustForeignAttributes(token);

1564 token.namespace = Namespaces.mathml;

1565 tree.insertElement(token);

1566 //Need to get the parse error right for the case where the token

1567 //has a namespace not equal to the xmlns attribute

1568 if (token.selfClosing) {

1569 tree.openElements.removeLast();

1570 token.selfClosingAcknowledged = true;

1571 }

1572 }

1573

1574 void startTagSvg(StartTagToken token) {

1575 tree.reconstructActiveFormattingElements();

1576 parser.adjustSVGAttributes(token);

1577 parser.adjustForeignAttributes(token);

1578 token.namespace = Namespaces.svg;

1579 tree.insertElement(token);

1580 //Need to get the parse error right for the case where the token

1581 //has a namespace not equal to the xmlns attribute

1582 if (token.selfClosing) {

1583 tree.openElements.removeLast();

1584 token.selfClosingAcknowledged = true;

1585 }

1586 }

1587

1588 /**

1589 * Elements that should be children of other elements that have a

1590 * different insertion mode; here they are ignored

1591 * "caption", "col", "colgroup", "frame", "frameset", "head",

1592 * "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",

1593 * "tr", "noscript"

1594 */

1595 void startTagMisplaced(StartTagToken token) {

1596 parser.parseError("unexpected-start-tag-ignored",

1597 {"name": token.name});

1598 }

1599

1600 Token startTagOther(StartTagToken token) {

1601 tree.reconstructActiveFormattingElements();

1602 tree.insertElement(token);

1603 }

1604

1605 void endTagP(EndTagToken token) {

1606 if (!tree.elementInScope("p", variant: "button")) {

1607 startTagCloseP(new StartTagToken("p", data: {}));

1608 parser.parseError("unexpected-end-tag", {"name": "p"});

1609 endTagP(new EndTagToken("p", data: {}));

1610 } else {

1611 tree.generateImpliedEndTags("p");

1612 if (tree.openElements.last().tagName != "p") {

1613 parser.parseError("unexpected-end-tag", {"name": "p"});

1614 }

1615 popOpenElementsUntil("p");

1616 }

1617 }

1618

1619 void endTagBody(EndTagToken token) {

1620 if (!tree.elementInScope("body")) {

1621 parser.parseError();

1622 return;

1623 } else if (tree.openElements.last().tagName != "body") {

1624 for (Node node in slice(tree.openElements, 2)) {

1625 switch (node.tagName) {

1626 case "dd": case "dt": case "li": case "optgroup": case "option":

1627 case "p": case "rp": case "rt": case "tbody": case "td": case "tfoot":

1628 case "th": case "thead": case "tr": case "body": case "html":

1629 continue;

1630 }

1631 // Not sure this is the correct name for the parse error

1632 parser.parseError("expected-one-end-tag-but-got-another",

1633 {"expectedName": "body", "gotName": node.tagName});

1634 break;

1635 }

1636 }

1637 parser.phase = parser._afterBodyPhase;

1638 }

1639

1640 Token endTagHtml(EndTagToken token) {

1641 //We repeat the test for the body end tag token being ignored here

1642 if (tree.elementInScope("body")) {

1643 endTagBody(new EndTagToken("body", data: {}));

1644 return token;

1645 }

1646 }

1647

1648 void endTagBlock(EndTagToken token) {

1649 //Put us back in the right whitespace handling mode

1650 if (token.name == "pre") {

1651 processSpaceCharactersFunc = processSpaceCharactersNonPre;

1652 }

1653 var inScope = tree.elementInScope(token.name);

1654 if (inScope) {

1655 tree.generateImpliedEndTags();

1656 }

1657 if (tree.openElements.last().tagName != token.name) {

1658 parser.parseError("end-tag-too-early", {"name": token.name});

1659 }

1660 if (inScope) {

1661 popOpenElementsUntil(token.name);

1662 }

1663 }

1664

1665 void endTagForm(EndTagToken token) {

1666 var node = tree.formPointer;

1667 tree.formPointer = null;

1668 if (node === null \|\| !tree.elementInScope(node)) {

1669 parser.parseError("unexpected-end-tag", {"name": "form"});

1670 } else {

1671 tree.generateImpliedEndTags();

1672 if (tree.openElements.last() != node) {

1673 parser.parseError("end-tag-too-early-ignored", {"name": "form"});

1674 }

1675 removeFromList(tree.openElements, node);

1676 }

1677 }

1678

1679 void endTagListItem(EndTagToken token) {

1680 var variant;

1681 if (token.name == "li") {

1682 variant = "list";

1683 } else {

1684 variant = null;

1685 }

1686 if (!tree.elementInScope(token.name, variant: variant)) {

1687 parser.parseError("unexpected-end-tag", {"name": token.name});

1688 } else {

1689 tree.generateImpliedEndTags(exclude: token.name);

1690 if (tree.openElements.last().tagName != token.name) {

1691 parser.parseError("end-tag-too-early", {"name": token.name});

1692 }

1693 popOpenElementsUntil(token.name);

1694 }

1695 }

1696

1697 void endTagHeading(EndTagToken token) {

1698 for (var item in headingElements) {

1699 if (tree.elementInScope(item)) {

1700 tree.generateImpliedEndTags();

1701 break;

1702 }

1703 }

1704 if (tree.openElements.last().tagName != token.name) {

1705 parser.parseError("end-tag-too-early", {"name": token.name});

1706 }

1707

1708 for (var item in headingElements) {

1709 if (tree.elementInScope(item)) {

1710 item = tree.openElements.removeLast();

1711 while (headingElements.indexOf(item.tagName) == -1) {

1712 item = tree.openElements.removeLast();

1713 }

1714 break;

1715 }

1716 }

1717 }

1718

1719 /** The much-feared adoption agency algorithm. */

1720 endTagFormatting(EndTagToken token) {

1721 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#adoptionAgency

1722 // TODO(jmesserly): the comments here don't match the numbered steps in the

1723 // updated spec. This needs a pass over it to verify that it still matches.

1724 // In particular the html5lib Python code skiped "step 4", I'm not sure why.

1725 // XXX Better parseError messages appreciated.

1726 int outerLoopCounter = 0;

1727 while (outerLoopCounter < 8) {

1728 outerLoopCounter += 1;

1729

1730 // Step 1 paragraph 1

1731 var formattingElement = tree.elementInActiveFormattingElements(

1732 token.name);

1733 if (formattingElement == null \|\|

1734 (tree.openElements.indexOf(formattingElement) >= 0 &&

1735 !tree.elementInScope(formattingElement.tagName))) {

1736 parser.parseError("adoption-agency-1.1", {"name": token.name});

1737 return;

1738 // Step 1 paragraph 2

1739 } else if (tree.openElements.indexOf(formattingElement) == -1) {

1740 parser.parseError("adoption-agency-1.2", {"name": token.name});

1741 removeFromList(tree.activeFormattingElements, formattingElement);

1742 return;

1743 }

1744

1745 // Step 1 paragraph 3

1746 if (formattingElement != tree.openElements.last()) {

1747 parser.parseError("adoption-agency-1.3", {"name": token.name});

1748 }

1749

1750 // Step 2

1751 // Start of the adoption agency algorithm proper

1752 var afeIndex = tree.openElements.indexOf(formattingElement);

1753 Node furthestBlock = null;

1754 for (Node element in slice(tree.openElements, afeIndex)) {

1755 if (specialElements.indexOf(element.nameTuple) >= 0) {

1756 furthestBlock = element;

1757 break;

1758 }

1759 }

1760 // Step 3

1761 if (furthestBlock === null) {

1762 var element = tree.openElements.removeLast();

1763 while (element != formattingElement) {

1764 element = tree.openElements.removeLast();

1765 }

1766 removeFromList(tree.activeFormattingElements, element);

1767 return;

1768 }

1769

1770 var commonAncestor = tree.openElements[afeIndex - 1];

1771

1772 // Step 5

1773 // The bookmark is supposed to help us identify where to reinsert

1774 // nodes in step 12. We have to ensure that we reinsert nodes after

1775 // the node before the active formatting element. Note the bookmark

1776 // can move in step 7.4

1777 var bookmark = tree.activeFormattingElements.indexOf(formattingElement);

1778

1779 // Step 6

1780 Node lastNode = furthestBlock;

1781 var node = furthestBlock;

1782 int innerLoopCounter = 0;

1783

1784 var index = tree.openElements.indexOf(node);

1785 while (innerLoopCounter < 3) {

1786 innerLoopCounter += 1;

1787

1788 // Node is element before node in open elements

1789 index -= 1;

1790 node = tree.openElements[index];

1791 if (tree.activeFormattingElements.indexOf(node) == -1) {

1792 removeFromList(tree.openElements, node);

1793 continue;

1794 }

1795 // Step 6.3

1796 if (node == formattingElement) {

1797 break;

1798 }

1799 // Step 6.4

1800 if (lastNode == furthestBlock) {

1801 bookmark = (tree.activeFormattingElements.indexOf(node) + 1);

1802 }

1803 // Step 6.5

1804 //cite = node.parent

1805 var clone = node.clone();

1806 // Replace node with clone

1807 tree.activeFormattingElements[

1808 tree.activeFormattingElements.indexOf(node)] = clone;

1809 tree.openElements[tree.openElements.indexOf(node)] = clone;

1810 node = clone;

1811

1812 // Step 6.6

1813 // Remove lastNode from its parents, if any

1814 if (lastNode.parent != null) {

1815 lastNode.parent.$dom_removeChild(lastNode);

1816 }

1817 node.$dom_appendChild(lastNode);

1818 // Step 7.7

1819 lastNode = node;

1820 // End of inner loop

1821 }

1822

1823 // Step 7

1824 // Foster parent lastNode if commonAncestor is a

1825 // table, tbody, tfoot, thead, or tr we need to foster parent the

1826 // lastNode

1827 if (lastNode.parent != null) {

1828 lastNode.parent.$dom_removeChild(lastNode);

1829 }

1830

1831 if (const ["table", "tbody", "tfoot", "thead", "tr"].indexOf(

1832 commonAncestor.tagName) >= 0) {

1833 var nodePos = tree.getTableMisnestedNodePosition();

1834 nodePos[0].insertBefore(lastNode, nodePos[1]);

1835 } else {

1836 commonAncestor.$dom_appendChild(lastNode);

1837 }

1838

1839 // Step 8

1840 var clone = formattingElement.clone();

1841

1842 // Step 9

1843 furthestBlock.reparentChildren(clone);

1844

1845 // Step 10

1846 furthestBlock.$dom_appendChild(clone);

1847

1848 // Step 11

1849 removeFromList(tree.activeFormattingElements, formattingElement);

1850 tree.activeFormattingElements.insertRange(

1851 min(bookmark, tree.activeFormattingElements.length), 1, clone);

1852

1853 // Step 12

1854 removeFromList(tree.openElements, formattingElement);

1855 tree.openElements.insertRange(

1856 tree.openElements.indexOf(furthestBlock) + 1, 1, clone);

1857 }

1858 }

1859

1860 void endTagAppletMarqueeObject(EndTagToken token) {

1861 if (tree.elementInScope(token.name)) {

1862 tree.generateImpliedEndTags();

1863 }

1864 if (tree.openElements.last().tagName != token.name) {

1865 parser.parseError("end-tag-too-early", {"name": token.name});

1866 }

1867 if (tree.elementInScope(token.name)) {

1868 popOpenElementsUntil(token.name);

1869 tree.clearActiveFormattingElements();

1870 }

1871 }

1872

1873 void endTagBr(EndTagToken token) {

1874 parser.parseError("unexpected-end-tag-treated-as",

1875 {"originalName": "br", "newName": "br element"});

1876 tree.reconstructActiveFormattingElements();

1877 tree.insertElement(new StartTagToken("br", data: {}));

1878 tree.openElements.removeLast();

1879 }

1880

1881 void endTagOther(EndTagToken token) {

1882 for (Node node in reversed(tree.openElements)) {

1883 if (node.tagName == token.name) {

1884 tree.generateImpliedEndTags(exclude: token.name);

1885 if (tree.openElements.last().tagName != token.name) {

1886 parser.parseError("unexpected-end-tag", {"name": token.name});

1887 }

1888 while (tree.openElements.removeLast() != node);

1889 break;

1890 } else {

1891 if (specialElements.indexOf(node.nameTuple) >= 0) {

1892 parser.parseError("unexpected-end-tag", {"name": token.name});

1893 break;

1894 }

1895 }

1896 }

1897 }

1898 }

1899

1900

1901 class TextPhase extends Phase {

1902 TextPhase(parser) : super(parser);

1903

1904 // "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token.name

1905 processStartTag(StartTagToken token) { assert(false); }

1906

1907 processEndTag(EndTagToken token) {

1908 if (token.name == 'script') return endTagScript(token);

1909 return endTagOther(token);

1910 }

1911

1912 Token processCharacters(CharactersToken token) {

1913 tree.insertText(token.data);

1914 }

1915

1916 bool processEOF() {

1917 parser.parseError("expected-named-closing-tag-but-got-eof",

1918 {'name': tree.openElements.last().tagName});

1919 tree.openElements.removeLast();

1920 parser.phase = parser.originalPhase;

1921 return true;

1922 }

1923

1924 void endTagScript(EndTagToken token) {

1925 var node = tree.openElements.removeLast();

1926 assert(node.tagName == "script");

1927 parser.phase = parser.originalPhase;

1928 //The rest of this method is all stuff that only happens if

1929 //document.write works

1930 }

1931

1932 void endTagOther(EndTagToken token) {

1933 var node = tree.openElements.removeLast();

1934 parser.phase = parser.originalPhase;

1935 }

1936 }

1937

1938 class InTablePhase extends Phase {

1939 // http://www.whatwg.org/specs/web-apps/current-work///in-table

1940 InTablePhase(parser) : super(parser);

1941

1942 processStartTag(StartTagToken token) {

1943 switch (token.name) {

1944 case "html": return startTagHtml(token);

1945 case "caption": return startTagCaption(token);

1946 case "colgroup": return startTagColgroup(token);

1947 case "col": return startTagCol(token);

1948 case "tbody": case "tfoot": case "thead": return startTagRowGroup(token);

1949 case "td": case "th": case "tr": return startTagImplyTbody(token);

1950 case "table": return startTagTable(token);

1951 case "style": case "script": return startTagStyleScript(token);

1952 case "input": return startTagInput(token);

1953 case "form": return startTagForm(token);

1954 default: return startTagOther(token);

1955 }

1956 }

1957

1958 processEndTag(EndTagToken token) {

1959 switch (token.name) {

1960 case "table": return endTagTable(token);

1961 case "body": case "caption": case "col": case "colgroup": case "html":

1962 case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr":

1963 return endTagIgnore(token);

1964 default: return endTagOther(token);

1965 }

1966 }

1967

1968 // helper methods

1969 void clearStackToTableContext() {

1970 // "clear the stack back to a table context"

1971 while (tree.openElements.last().tagName != "table" &&

1972 tree.openElements.last().tagName != "html") {

1973 //parser.parseError("unexpected-implied-end-tag-in-table",

1974 // {"name": tree.openElements.last().name})

1975 tree.openElements.removeLast();

1976 }

1977 // When the current node is <html> it's an innerHTML case

1978 }

1979

1980 // processing methods

1981 bool processEOF() {

1982 if (tree.openElements.last().tagName != "html") {

1983 parser.parseError("eof-in-table");

1984 } else {

1985 assert(parser.innerHTMLMode);

1986 }

1987 //Stop parsing

1988 return false;

1989 }

1990

1991 Token processSpaceCharacters(SpaceCharactersToken token) {

1992 var originalPhase = parser.phase;

1993 parser.phase = parser._inTableTextPhase;

1994 parser._inTableTextPhase.originalPhase = originalPhase;

1995 parser.phase.processSpaceCharacters(token);

1996 }

1997

1998 Token processCharacters(CharactersToken token) {

1999 var originalPhase = parser.phase;

2000 parser.phase = parser._inTableTextPhase;

2001 parser._inTableTextPhase.originalPhase = originalPhase;

2002 parser.phase.processCharacters(token);

2003 }

2004

2005 void insertText(CharactersToken token) {

2006 // If we get here there must be at least one non-whitespace character

2007 // Do the table magic!

2008 tree.insertFromTable = true;

2009 parser._inBodyPhase.processCharacters(token);

2010 tree.insertFromTable = false;

2011 }

2012

2013 void startTagCaption(StartTagToken token) {

2014 clearStackToTableContext();

2015 tree.activeFormattingElements.add(Marker);

2016 tree.insertElement(token);

2017 parser.phase = parser._inCaptionPhase;

2018 }

2019

2020 void startTagColgroup(StartTagToken token) {

2021 clearStackToTableContext();

2022 tree.insertElement(token);

2023 parser.phase = parser._inColumnGroupPhase;

2024 }

2025

2026 Token startTagCol(StartTagToken token) {

2027 startTagColgroup(new StartTagToken("colgroup", data: {}));

2028 return token;

2029 }

2030

2031 void startTagRowGroup(StartTagToken token) {

2032 clearStackToTableContext();

2033 tree.insertElement(token);

2034 parser.phase = parser._inTableBodyPhase;

2035 }

2036

2037 Token startTagImplyTbody(StartTagToken token) {

2038 startTagRowGroup(new StartTagToken("tbody", data: {}));

2039 return token;

2040 }

2041

2042 Token startTagTable(StartTagToken token) {

2043 parser.parseError("unexpected-start-tag-implies-end-tag",

2044 {"startName": "table", "endName": "table"});

2045 parser.phase.processEndTag(new EndTagToken("table", data: {}));

2046 if (!parser.innerHTMLMode) {

2047 return token;

2048 }

2049 }

2050

2051 Token startTagStyleScript(StartTagToken token) {

2052 return parser._inHeadPhase.processStartTag(token);

2053 }

2054

2055 void startTagInput(StartTagToken token) {

2056 if (asciiUpper2Lower(token.data["type"]) == "hidden") {

2057 parser.parseError("unexpected-hidden-input-in-table");

2058 tree.insertElement(token);

2059 // XXX associate with form

2060 tree.openElements.removeLast();

2061 } else {

2062 startTagOther(token);

2063 }

2064 }

2065

2066 void startTagForm(StartTagToken token) {

2067 parser.parseError("unexpected-form-in-table");

2068 if (tree.formPointer === null) {

2069 tree.insertElement(token);

2070 tree.formPointer = tree.openElements.last();

2071 tree.openElements.removeLast();

2072 }

2073 }

2074

2075 void startTagOther(StartTagToken token) {

2076 parser.parseError("unexpected-start-tag-implies-table-voodoo",

2077 {"name": token.name});

2078 // Do the table magic!

2079 tree.insertFromTable = true;

2080 parser._inBodyPhase.processStartTag(token);

2081 tree.insertFromTable = false;

2082 }

2083

2084 void endTagTable(EndTagToken token) {

2085 if (tree.elementInScope("table", variant: "table")) {

2086 tree.generateImpliedEndTags();

2087 if (tree.openElements.last().tagName != "table") {

2088 parser.parseError("end-tag-too-early-named", {"gotName": "table",

2089 "expectedName": tree.openElements.last().tagName});

2090 }

2091 while (tree.openElements.last().tagName != "table") {

2092 tree.openElements.removeLast();

2093 }

2094 tree.openElements.removeLast();

2095 parser.resetInsertionMode();

2096 } else {

2097 // innerHTML case

2098 assert(parser.innerHTMLMode);

2099 parser.parseError();

2100 }

2101 }

2102

2103 void endTagIgnore(EndTagToken token) {

2104 parser.parseError("unexpected-end-tag", {"name": token.name});

2105 }

2106

2107 void endTagOther(EndTagToken token) {

2108 parser.parseError("unexpected-end-tag-implies-table-voodoo",

2109 {"name": token.name});

2110 // Do the table magic!

2111 tree.insertFromTable = true;

2112 parser._inBodyPhase.processEndTag(token);

2113 tree.insertFromTable = false;

2114 }

2115 }

2116

2117 class InTableTextPhase extends Phase {

2118 Phase originalPhase;

2119 List<StringToken> characterTokens;

2120

2121 InTableTextPhase(parser)

2122 : characterTokens = <StringToken>[],

2123 super(parser);

2124

2125 void flushCharacters() {

2126 var data = joinStr(characterTokens.map((t) => t.data));

2127 if (!allWhitespace(data)) {

2128 parser._inTablePhase.insertText(new CharactersToken(data));

2129 } else if (data.length > 0) {

2130 tree.insertText(data);

2131 }

2132 characterTokens = <StringToken>[];

2133 }

2134

2135 Token processComment(CommentToken token) {

2136 flushCharacters();

2137 parser.phase = originalPhase;

2138 return token;

2139 }

2140

2141 bool processEOF() {

2142 flushCharacters();

2143 parser.phase = originalPhase;

2144 return true;

2145 }

2146

2147 Token processCharacters(CharactersToken token) {

2148 if (token.data == "\u0000") {

2149 return null;

2150 }

2151 characterTokens.add(token);

2152 }

2153

2154 Token processSpaceCharacters(SpaceCharactersToken token) {

2155 //pretty sure we should never reach here

2156 characterTokens.add(token);

2157 // XXX assert(false);

2158 }

2159

2160 Token processStartTag(StartTagToken token) {

2161 flushCharacters();

2162 parser.phase = originalPhase;

2163 return token;

2164 }

2165

2166 Token processEndTag(EndTagToken token) {

2167 flushCharacters();

2168 parser.phase = originalPhase;

2169 return token;

2170 }

2171 }

2172

2173

2174 class InCaptionPhase extends Phase {

2175 // http://www.whatwg.org/specs/web-apps/current-work///in-caption

2176 InCaptionPhase(parser) : super(parser);

2177

2178 processStartTag(StartTagToken token) {

2179 switch (token.name) {

2180 case "html": return startTagHtml(token);

2181 case "caption": case "col": case "colgroup": case "tbody": case "td":

2182 case "tfoot": case "th": case "thead": case "tr":

2183 return startTagTableElement(token);

2184 default: return startTagOther(token);

2185 }

2186 }

2187

2188 processEndTag(EndTagToken token) {

2189 switch (token.name) {

2190 case "caption": return endTagCaption(token);

2191 case "table": return endTagTable(token);

2192 case "body": case "col": case "colgroup": case "html": case "tbody":

2193 case "td": case "tfoot": case "th": case "thead": case "tr":

2194 return endTagIgnore(token);

2195 default: return endTagOther(token);

2196 }

2197 }

2198

2199 bool ignoreEndTagCaption() {

2200 return !tree.elementInScope("caption", variant: "table");

2201 }

2202

2203 bool processEOF() {

2204 parser._inBodyPhase.processEOF();

2205 return false;

2206 }

2207

2208 Token processCharacters(CharactersToken token) {

2209 return parser._inBodyPhase.processCharacters(token);

2210 }

2211

2212 Token startTagTableElement(StartTagToken token) {

2213 parser.parseError();

2214 //XXX Have to duplicate logic here to find out if the tag is ignored

2215 var ignoreEndTag = ignoreEndTagCaption();

2216 parser.phase.processEndTag(new EndTagToken("caption", data: {}));

2217 if (!ignoreEndTag) {

2218 return token;

2219 }

2220 return null;

2221 }

2222

2223 Token startTagOther(StartTagToken token) {

2224 return parser._inBodyPhase.processStartTag(token);

2225 }

2226

2227 void endTagCaption(EndTagToken token) {

2228 if (!ignoreEndTagCaption()) {

2229 // AT this code is quite similar to endTagTable in "InTable"

2230 tree.generateImpliedEndTags();

2231 if (tree.openElements.last().tagName != "caption") {

2232 parser.parseError("expected-one-end-tag-but-got-another",

2233 {"gotName": "caption",

2234 "expectedName": tree.openElements.last().tagName});

2235 }

2236 while (tree.openElements.last().tagName != "caption") {

2237 tree.openElements.removeLast();

2238 }

2239 tree.openElements.removeLast();

2240 tree.clearActiveFormattingElements();

2241 parser.phase = parser._inTablePhase;

2242 } else {

2243 // innerHTML case

2244 assert(parser.innerHTMLMode);

2245 parser.parseError();

2246 }

2247 }

2248

2249 Token endTagTable(EndTagToken token) {

2250 parser.parseError();

2251 var ignoreEndTag = ignoreEndTagCaption();

2252 parser.phase.processEndTag(new EndTagToken("caption", data: {}));

2253 if (!ignoreEndTag) {

2254 return token;

2255 }

2256 return null;

2257 }

2258

2259 void endTagIgnore(EndTagToken token) {

2260 parser.parseError("unexpected-end-tag", {"name": token.name});

2261 }

2262

2263 Token endTagOther(EndTagToken token) {

2264 return parser._inBodyPhase.processEndTag(token);

2265 }

2266 }

2267

2268

2269 class InColumnGroupPhase extends Phase {

2270 // http://www.whatwg.org/specs/web-apps/current-work///in-column

2271 InColumnGroupPhase(parser) : super(parser);

2272

2273 processStartTag(StartTagToken token) {

2274 switch (token.name) {

2275 case "html": return startTagHtml(token);

2276 case "col": return startTagCol(token);

2277 default: return startTagOther(token);

2278 }

2279 }

2280

2281 processEndTag(EndTagToken token) {

2282 switch (token.name) {

2283 case "colgroup": return endTagColgroup(token);

2284 case "col": return endTagCol(token);

2285 default: return endTagOther(token);

2286 }

2287 }

2288

2289 bool ignoreEndTagColgroup() {

2290 return tree.openElements.last().tagName == "html";

2291 }

2292

2293 bool processEOF() {

2294 var ignoreEndTag = ignoreEndTagColgroup();

2295 if (ignoreEndTag) {

2296 assert(parser.innerHTMLMode);

2297 return false;

2298 } else {

2299 endTagColgroup(new EndTagToken("colgroup", data: {}));

2300 return true;

2301 }

2302 }

2303

2304 Token processCharacters(CharactersToken token) {

2305 var ignoreEndTag = ignoreEndTagColgroup();

2306 endTagColgroup(new EndTagToken("colgroup", data: {}));

2307 return ignoreEndTag ? null : token;

2308 }

2309

2310 void startTagCol(StartTagToken token) {

2311 tree.insertElement(token);

2312 tree.openElements.removeLast();

2313 }

2314

2315 Token startTagOther(StartTagToken token) {

2316 var ignoreEndTag = ignoreEndTagColgroup();

2317 endTagColgroup(new EndTagToken("colgroup", data: {}));

2318 return ignoreEndTag ? null : token;

2319 }

2320

2321 void endTagColgroup(EndTagToken token) {

2322 if (ignoreEndTagColgroup()) {

2323 // innerHTML case

2324 assert(parser.innerHTMLMode);

2325 parser.parseError();

2326 } else {

2327 tree.openElements.removeLast();

2328 parser.phase = parser._inTablePhase;

2329 }

2330 }

2331

2332 void endTagCol(EndTagToken token) {

2333 parser.parseError("no-end-tag", {"name": "col"});

2334 }

2335

2336 Token endTagOther(EndTagToken token) {

2337 var ignoreEndTag = ignoreEndTagColgroup();

2338 endTagColgroup(new EndTagToken("colgroup", data: {}));

2339 return ignoreEndTag ? null : token;

2340 }

2341 }

2342

2343

2344 class InTableBodyPhase extends Phase {

2345 // http://www.whatwg.org/specs/web-apps/current-work///in-table0

2346 InTableBodyPhase(parser) : super(parser);

2347

2348 processStartTag(StartTagToken token) {

2349 switch (token.name) {

2350 case "html": return startTagHtml(token);

2351 case "tr": return startTagTr(token);

2352 case "td": case "th": return startTagTableCell(token);

2353 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":

2354 case "thead":

2355 return startTagTableOther(token);

2356 default: return startTagOther(token);

2357 }

2358 }

2359

2360 processEndTag(EndTagToken token) {

2361 switch (token.name) {

2362 case "tbody": case "tfoot": case "thead":

2363 return endTagTableRowGroup(token);

2364 case "table": return endTagTable(token);

2365 case "body": case "caption": case "col": case "colgroup": case "html":

2366 case "td": case "th": case "tr":

2367 return endTagIgnore(token);

2368 default: return endTagOther(token);

2369 }

2370 }

2371

2372 // helper methods

2373 void clearStackToTableBodyContext() {

2374 while (const ["tbody", "tfoot","thead", "html"].indexOf(

2375 tree.openElements.last().tagName) == -1) {

2376 //XXX parser.parseError("unexpected-implied-end-tag-in-table",

2377 // {"name": tree.openElements.last().name})

2378 tree.openElements.removeLast();

2379 }

2380 if (tree.openElements.last().tagName == "html") {

2381 assert(parser.innerHTMLMode);

2382 }

2383 }

2384

2385 // the rest

2386 bool processEOF() {

2387 parser._inTablePhase.processEOF();

2388 return false;

2389 }

2390

2391 Token processSpaceCharacters(SpaceCharactersToken token) {

2392 return parser._inTablePhase.processSpaceCharacters(token);

2393 }

2394

2395 Token processCharacters(CharactersToken token) {

2396 return parser._inTablePhase.processCharacters(token);

2397 }

2398

2399 void startTagTr(StartTagToken token) {

2400 clearStackToTableBodyContext();

2401 tree.insertElement(token);

2402 parser.phase = parser._inRowPhase;

2403 }

2404

2405 Token startTagTableCell(StartTagToken token) {

2406 parser.parseError("unexpected-cell-in-table-body",

2407 {"name": token.name});

2408 startTagTr(new StartTagToken("tr", data: {}));

2409 return token;

2410 }

2411

2412 Token startTagTableOther(token) => endTagTable(token);

2413

2414 Token startTagOther(StartTagToken token) {

2415 return parser._inTablePhase.processStartTag(token);

2416 }

2417

2418 void endTagTableRowGroup(EndTagToken token) {

2419 if (tree.elementInScope(token.name, variant: "table")) {

2420 clearStackToTableBodyContext();

2421 tree.openElements.removeLast();

2422 parser.phase = parser._inTablePhase;

2423 } else {

2424 parser.parseError("unexpected-end-tag-in-table-body",

2425 {"name": token.name});

2426 }

2427 }

2428

2429 Token endTagTable(TagToken token) {

2430 // XXX AT Any ideas on how to share this with endTagTable?

2431 if (tree.elementInScope("tbody", variant: "table") \|\|

2432 tree.elementInScope("thead", variant: "table") \|\|

2433 tree.elementInScope("tfoot", variant: "table")) {

2434 clearStackToTableBodyContext();

2435 endTagTableRowGroup(

2436 new EndTagToken(tree.openElements.last().tagName, data: {}));

2437 return token;

2438 } else {

2439 // innerHTML case

2440 assert(parser.innerHTMLMode);

2441 parser.parseError();

2442 }

2443 return null;

2444 }

2445

2446 void endTagIgnore(EndTagToken token) {

2447 parser.parseError("unexpected-end-tag-in-table-body",

2448 {"name": token.name});

2449 }

2450

2451 Token endTagOther(EndTagToken token) {

2452 return parser._inTablePhase.processEndTag(token);

2453 }

2454 }

2455

2456

2457 class InRowPhase extends Phase {

2458 // http://www.whatwg.org/specs/web-apps/current-work///in-row

2459 InRowPhase(parser) : super(parser);

2460

2461 processStartTag(StartTagToken token) {

2462 switch (token.name) {

2463 case "html": return startTagHtml(token);

2464 case "td": case "th": return startTagTableCell(token);

2465 case "caption": case "col": case "colgroup": case "tbody": case "tfoot":

2466 case "thead": case "tr":

2467 return startTagTableOther(token);

2468 default: return startTagOther(token);

2469 }

2470 }

2471

2472 processEndTag(EndTagToken token) {

2473 switch (token.name) {

2474 case "tr": return endTagTr(token);

2475 case "table": return endTagTable(token);

2476 case "tbody": case "tfoot": case "thead":

2477 return endTagTableRowGroup(token);

2478 case "body": case "caption": case "col": case "colgroup": case "html":

2479 case "td": case "th":

2480 return endTagIgnore(token);

2481 default: return endTagOther(token);

2482 }

2483 }

2484

2485 // helper methods (XXX unify this with other table helper methods)

2486 void clearStackToTableRowContext() {

2487 while (tree.openElements.last().tagName != "tr" &&

2488 tree.openElements.last().tagName != "html") {

2489 parser.parseError("unexpected-implied-end-tag-in-table-row",

2490 {"name": tree.openElements.last().tagName});

2491 tree.openElements.removeLast();

2492 }

2493 }

2494

2495 bool ignoreEndTagTr() {

2496 return !tree.elementInScope("tr", variant: "table");

2497 }

2498

2499 // the rest

2500 bool processEOF() {

2501 parser._inTablePhase.processEOF();

2502 return false;

2503 }

2504

2505 Token processSpaceCharacters(SpaceCharactersToken token) {

2506 return parser._inTablePhase.processSpaceCharacters(token);

2507 }

2508

2509 Token processCharacters(CharactersToken token) {

2510 return parser._inTablePhase.processCharacters(token);

2511 }

2512

2513 void startTagTableCell(StartTagToken token) {

2514 clearStackToTableRowContext();

2515 tree.insertElement(token);

2516 parser.phase = parser._inCellPhase;

2517 tree.activeFormattingElements.add(Marker);

2518 }

2519

2520 Token startTagTableOther(StartTagToken token) {

2521 bool ignoreEndTag = ignoreEndTagTr();

2522 endTagTr(new EndTagToken("tr", data: {}));

2523 // XXX how are we sure it's always ignored in the innerHTML case?

2524 return ignoreEndTag ? null : token;

2525 }

2526

2527 Token startTagOther(StartTagToken token) {

2528 return parser._inTablePhase.processStartTag(token);

2529 }

2530

2531 void endTagTr(EndTagToken token) {

2532 if (!ignoreEndTagTr()) {

2533 clearStackToTableRowContext();

2534 tree.openElements.removeLast();

2535 parser.phase = parser._inTableBodyPhase;

2536 } else {

2537 // innerHTML case

2538 assert(parser.innerHTMLMode);

2539 parser.parseError();

2540 }

2541 }

2542

2543 Token endTagTable(EndTagToken token) {

2544 var ignoreEndTag = ignoreEndTagTr();

2545 endTagTr(new EndTagToken("tr", data: {}));

2546 // Reprocess the current tag if the tr end tag was not ignored

2547 // XXX how are we sure it's always ignored in the innerHTML case?

2548 return ignoreEndTag ? null : token;

2549 }

2550

2551 Token endTagTableRowGroup(EndTagToken token) {

2552 if (tree.elementInScope(token.name, variant: "table")) {

2553 endTagTr(new EndTagToken("tr", data: {}));

2554 return token;

2555 } else {

2556 parser.parseError();

2557 return null;

2558 }

2559 }

2560

2561 void endTagIgnore(EndTagToken token) {

2562 parser.parseError("unexpected-end-tag-in-table-row",

2563 {"name": token.name});

2564 }

2565

2566 Token endTagOther(EndTagToken token) {

2567 return parser._inTablePhase.processEndTag(token);

2568 }

2569 }

2570

2571 class InCellPhase extends Phase {

2572 // http://www.whatwg.org/specs/web-apps/current-work///in-cell

2573 InCellPhase(parser) : super(parser);

2574

2575 processStartTag(StartTagToken token) {

2576 switch (token.name) {

2577 case "html": return startTagHtml(token);

2578 case "caption": case "col": case "colgroup": case "tbody": case "td":

2579 case "tfoot": case "th": case "thead": case "tr":

2580 return startTagTableOther(token);

2581 default: return startTagOther(token);

2582 }

2583 }

2584

2585 processEndTag(EndTagToken token) {

2586 switch (token.name) {

2587 case "td": case "th":

2588 return endTagTableCell(token);

2589 case "body": case "caption": case "col": case "colgroup": case "html":

2590 return endTagIgnore(token);

2591 case "table": case "tbody": case "tfoot": case "thead": case "tr":

2592 return endTagImply(token);

2593 default: return endTagOther(token);

2594 }

2595 }

2596

2597 // helper

2598 void closeCell() {

2599 if (tree.elementInScope("td", variant: "table")) {

2600 endTagTableCell(new EndTagToken("td", data: {}));

2601 } else if (tree.elementInScope("th", variant: "table")) {

2602 endTagTableCell(new EndTagToken("th", data: {}));

2603 }

2604 }

2605

2606 // the rest

2607 bool processEOF() {

2608 parser._inBodyPhase.processEOF();

2609 return false;

2610 }

2611

2612 Token processCharacters(CharactersToken token) {

2613 return parser._inBodyPhase.processCharacters(token);

2614 }

2615

2616 Token startTagTableOther(StartTagToken token) {

2617 if (tree.elementInScope("td", variant: "table") \|\|

2618 tree.elementInScope("th", variant: "table")) {

2619 closeCell();

2620 return token;

2621 } else {

2622 // innerHTML case

2623 assert(parser.innerHTMLMode);

2624 parser.parseError();

2625 }

2626 }

2627

2628 Token startTagOther(StartTagToken token) {

2629 return parser._inBodyPhase.processStartTag(token);

2630 }

2631

2632 void endTagTableCell(EndTagToken token) {

2633 if (tree.elementInScope(token.name, variant: "table")) {

2634 tree.generateImpliedEndTags(token.name);

2635 if (tree.openElements.last().tagName != token.name) {

2636 parser.parseError("unexpected-cell-end-tag", {"name": token.name});

2637 popOpenElementsUntil(token.name);

2638 } else {

2639 tree.openElements.removeLast();

2640 }

2641 tree.clearActiveFormattingElements();

2642 parser.phase = parser._inRowPhase;

2643 } else {

2644 parser.parseError("unexpected-end-tag", {"name": token.name});

2645 }

2646 }

2647

2648 void endTagIgnore(EndTagToken token) {

2649 parser.parseError("unexpected-end-tag", {"name": token.name});

2650 }

2651

2652 Token endTagImply(EndTagToken token) {

2653 if (tree.elementInScope(token.name, variant: "table")) {

2654 closeCell();

2655 return token;

2656 } else {

2657 // sometimes innerHTML case

2658 parser.parseError();

2659 }

2660 }

2661

2662 Token endTagOther(EndTagToken token) {

2663 return parser._inBodyPhase.processEndTag(token);

2664 }

2665 }

2666

2667 class InSelectPhase extends Phase {

2668 InSelectPhase(parser) : super(parser);

2669

2670 processStartTag(StartTagToken token) {

2671 switch (token.name) {

2672 case "html": return startTagHtml(token);

2673 case "option": return startTagOption(token);

2674 case "optgroup": return startTagOptgroup(token);

2675 case "select": return startTagSelect(token);

2676 case "input": case "keygen": case "textarea":

2677 return startTagInput(token);

2678 case "script": return startTagScript(token);

2679 default: return startTagOther(token);

2680 }

2681 }

2682

2683 processEndTag(EndTagToken token) {

2684 switch (token.name) {

2685 case "option": return endTagOption(token);

2686 case "optgroup": return endTagOptgroup(token);

2687 case "select": return endTagSelect(token);

2688 default: return endTagOther(token);

2689 }

2690 }

2691

2692 // http://www.whatwg.org/specs/web-apps/current-work///in-select

2693 bool processEOF() {

2694 if (tree.openElements.last().tagName != "html") {

2695 parser.parseError("eof-in-select");

2696 } else {

2697 assert(parser.innerHTMLMode);

2698 }

2699 return false;

2700 }

2701

2702 Token processCharacters(CharactersToken token) {

2703 if (token.data == "\u0000") {

2704 return null;

2705 }

2706 tree.insertText(token.data);

2707 }

2708

2709 void startTagOption(StartTagToken token) {

2710 // We need to imply </option> if <option> is the current node.

2711 if (tree.openElements.last().tagName == "option") {

2712 tree.openElements.removeLast();

2713 }

2714 tree.insertElement(token);

2715 }

2716

2717 void startTagOptgroup(StartTagToken token) {

2718 if (tree.openElements.last().tagName == "option") {

2719 tree.openElements.removeLast();

2720 }

2721 if (tree.openElements.last().tagName == "optgroup") {

2722 tree.openElements.removeLast();

2723 }

2724 tree.insertElement(token);

2725 }

2726

2727 void startTagSelect(StartTagToken token) {

2728 parser.parseError("unexpected-select-in-select");

2729 endTagSelect(new EndTagToken("select", data: {}));

2730 }

2731

2732 Token startTagInput(StartTagToken token) {

2733 parser.parseError("unexpected-input-in-select");

2734 if (tree.elementInScope("select", variant: "select")) {

2735 endTagSelect(new EndTagToken("select", data: {}));

2736 return token;

2737 } else {

2738 assert(parser.innerHTMLMode);

2739 }

2740 }

2741

2742 Token startTagScript(StartTagToken token) {

2743 return parser._inHeadPhase.processStartTag(token);

2744 }

2745

2746 Token startTagOther(StartTagToken token) {

2747 parser.parseError("unexpected-start-tag-in-select",

2748 {"name": token.name});

2749 }

2750

2751 void endTagOption(EndTagToken token) {

2752 if (tree.openElements.last().tagName == "option") {

2753 tree.openElements.removeLast();

2754 } else {

2755 parser.parseError("unexpected-end-tag-in-select",

2756 {"name": "option"});

2757 }

2758 }

2759

2760 void endTagOptgroup(EndTagToken token) {

2761 // </optgroup> implicitly closes <option>

2762 if (tree.openElements.last().tagName == "option" &&

2763 tree.openElements[tree.openElements.length - 2].tagName == "optgroup") {

2764 tree.openElements.removeLast();

2765 }

2766 // It also closes </optgroup>

2767 if (tree.openElements.last().tagName == "optgroup") {

2768 tree.openElements.removeLast();

2769 // But nothing else

2770 } else {

2771 parser.parseError("unexpected-end-tag-in-select",

2772 {"name": "optgroup"});

2773 }

2774 }

2775

2776 void endTagSelect(EndTagToken token) {

2777 if (tree.elementInScope("select", variant: "select")) {

2778 popOpenElementsUntil("select");

2779 parser.resetInsertionMode();

2780 } else {

2781 // innerHTML case

2782 assert(parser.innerHTMLMode);

2783 parser.parseError();

2784 }

2785 }

2786

2787 void endTagOther(EndTagToken token) {

2788 parser.parseError("unexpected-end-tag-in-select",

2789 {"name": token.name});

2790 }

2791 }

2792

2793

2794 class InSelectInTablePhase extends Phase {

2795 InSelectInTablePhase(parser) : super(parser);

2796

2797 processStartTag(StartTagToken token) {

2798 switch (token.name) {

2799 case "caption": case "table": case "tbody": case "tfoot": case "thead":

2800 case "tr": case "td": case "th":

2801 return startTagTable(token);

2802 default: return startTagOther(token);

2803 }

2804 }

2805

2806 processEndTag(EndTagToken token) {

2807 switch (token.name) {

2808 case "caption": case "table": case "tbody": case "tfoot": case "thead":

2809 case "tr": case "td": case "th":

2810 return endTagTable(token);

2811 default: return endTagOther(token);

2812 }

2813 }

2814

2815 bool processEOF() {

2816 parser._inSelectPhase.processEOF();

2817 return false;

2818 }

2819

2820 Token processCharacters(CharactersToken token) {

2821 return parser._inSelectPhase.processCharacters(token);

2822 }

2823

2824 Token startTagTable(StartTagToken token) {

2825 parser.parseError("unexpected-table-element-start-tag-in-select-in-table",

2826 {"name": token.name});

2827 endTagOther(new EndTagToken("select", data: {}));

2828 return token;

2829 }

2830

2831 Token startTagOther(StartTagToken token) {

2832 return parser._inSelectPhase.processStartTag(token);

2833 }

2834

2835 Token endTagTable(EndTagToken token) {

2836 parser.parseError("unexpected-table-element-end-tag-in-select-in-table",

2837 {"name": token.name});

2838 if (tree.elementInScope(token.name, variant: "table")) {

2839 endTagOther(new EndTagToken("select", data: {}));

2840 return token;

2841 }

2842 }

2843

2844 Token endTagOther(EndTagToken token) {

2845 return parser._inSelectPhase.processEndTag(token);

2846 }

2847 }

2848

2849

2850 class InForeignContentPhase extends Phase {

2851 // TODO(jmesserly): this is sorted so we could binary search.

2852 const breakoutElements = const [

2853 'b', 'big', 'blockquote', 'body', 'br','center', 'code', 'dd', 'div', 'dl',

2854 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i',

2855 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby',

2856 's', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tt', 'u',

2857 'ul', 'var'

2858 ];

2859

2860 InForeignContentPhase(parser) : super(parser);

2861

2862 void adjustSVGTagNames(token) {

2863 final replacements = const {

2864 "altglyph":"altGlyph",

2865 "altglyphdef":"altGlyphDef",

2866 "altglyphitem":"altGlyphItem",

2867 "animatecolor":"animateColor",

2868 "animatemotion":"animateMotion",

2869 "animatetransform":"animateTransform",

2870 "clippath":"clipPath",

2871 "feblend":"feBlend",

2872 "fecolormatrix":"feColorMatrix",

2873 "fecomponenttransfer":"feComponentTransfer",

2874 "fecomposite":"feComposite",

2875 "feconvolvematrix":"feConvolveMatrix",

2876 "fediffuselighting":"feDiffuseLighting",

2877 "fedisplacementmap":"feDisplacementMap",

2878 "fedistantlight":"feDistantLight",

2879 "feflood":"feFlood",

2880 "fefunca":"feFuncA",

2881 "fefuncb":"feFuncB",

2882 "fefuncg":"feFuncG",

2883 "fefuncr":"feFuncR",

2884 "fegaussianblur":"feGaussianBlur",

2885 "feimage":"feImage",

2886 "femerge":"feMerge",

2887 "femergenode":"feMergeNode",

2888 "femorphology":"feMorphology",

2889 "feoffset":"feOffset",

2890 "fepointlight":"fePointLight",

2891 "fespecularlighting":"feSpecularLighting",

2892 "fespotlight":"feSpotLight",

2893 "fetile":"feTile",

2894 "feturbulence":"feTurbulence",

2895 "foreignobject":"foreignObject",

2896 "glyphref":"glyphRef",

2897 "lineargradient":"linearGradient",

2898 "radialgradient":"radialGradient",

2899 "textpath":"textPath"

2900 };

2901

2902 var replace = replacements[token.name];

2903 if (replace != null) {

2904 token.name = replace;

2905 }

2906 }

2907

2908 Token processCharacters(CharactersToken token) {

2909 if (token.data == "\u0000") {

2910 token.data = "\uFFFD";

2911 } else if (parser.framesetOK && !allWhitespace(token.data)) {

2912 parser.framesetOK = false;

2913 }

2914 super.processCharacters(token);

2915 }

2916

2917 Token processStartTag(StartTagToken token) {

2918 var currentNode = tree.openElements.last();

2919 if (breakoutElements.indexOf(token.name) >= 0 \|\|

2920 (token.name == "font" &&

2921 (token.data.containsKey("color") \|\|

2922 token.data.containsKey("face") \|\|

2923 token.data.containsKey("size")))) {

2924

2925 parser.parseError("unexpected-html-element-in-foreign-content",

2926 {'name': token.name});

2927 while (tree.openElements.last().namespace !=

2928 tree.defaultNamespace &&

2929 !parser.isHTMLIntegrationPoint(tree.openElements.last()) &&

2930 !parser.isMathMLTextIntegrationPoint(tree.openElements.last())) {

2931 tree.openElements.removeLast();

2932 }

2933 return token;

2934

2935 } else {

2936 if (currentNode.namespace == Namespaces.mathml) {

2937 parser.adjustMathMLAttributes(token);

2938 } else if (currentNode.namespace == Namespaces.svg) {

2939 adjustSVGTagNames(token);

2940 parser.adjustSVGAttributes(token);

2941 }

2942 parser.adjustForeignAttributes(token);

2943 token.namespace = currentNode.namespace;

2944 tree.insertElement(token);

2945 if (token.selfClosing) {

2946 tree.openElements.removeLast();

2947 token.selfClosingAcknowledged = true;

2948 }

2949 }

2950 }

2951

2952 Token processEndTag(EndTagToken token) {

2953 var nodeIndex = tree.openElements.length - 1;

2954 var node = tree.openElements.last();

2955 if (node.tagName != token.name) {

2956 parser.parseError("unexpected-end-tag", {"name": token.name});

2957 }

2958

2959 var newToken = null;

2960 while (true) {

2961 if (asciiUpper2Lower(node.tagName) == token.name) {

2962 //XXX this isn't in the spec but it seems necessary

2963 if (parser.phase == parser._inTableTextPhase) {

2964 InTableTextPhase inTableText = parser.phase;

2965 inTableText.flushCharacters();

2966 parser.phase = inTableText.originalPhase;

2967 }

2968 while (tree.openElements.removeLast() != node) {

2969 assert(tree.openElements.length > 0);

2970 }

2971 newToken = null;

2972 break;

2973 }

2974 nodeIndex -= 1;

2975

2976 node = tree.openElements[nodeIndex];

2977 if (node.namespace != tree.defaultNamespace) {

2978 continue;

2979 } else {

2980 newToken = parser.phase.processEndTag(token);

2981 break;

2982 }

2983 }

2984 return newToken;

2985 }

2986 }

2987

2988

2989 class AfterBodyPhase extends Phase {

2990 AfterBodyPhase(parser) : super(parser);

2991

2992 processStartTag(StartTagToken token) {

2993 if (token.name == "html") return startTagHtml(token);

2994 return startTagOther(token);

2995 }

2996

2997 processEndTag(EndTagToken token) {

2998 if (token.name == "html") return endTagHtml(token);

2999 return endTagOther(token);

3000 }

3001

3002 //Stop parsing

3003 bool processEOF() => false;

3004

3005 Token processComment(CommentToken token) {

3006 // This is needed because data is to be appended to the <html> element

3007 // here and not to whatever is currently open.

3008 tree.insertComment(token, tree.openElements[0]);

3009 }

3010

3011 Token processCharacters(CharactersToken token) {

3012 parser.parseError("unexpected-char-after-body");

3013 parser.phase = parser._inBodyPhase;

3014 return token;

3015 }

3016

3017 Token startTagHtml(StartTagToken token) {

3018 return parser._inBodyPhase.processStartTag(token);

3019 }

3020

3021 Token startTagOther(StartTagToken token) {

3022 parser.parseError("unexpected-start-tag-after-body",

3023 {"name": token.name});

3024 parser.phase = parser._inBodyPhase;

3025 return token;

3026 }

3027

3028 void endTagHtml(name) {

3029 if (parser.innerHTMLMode) {

3030 parser.parseError("unexpected-end-tag-after-body-innerhtml");

3031 } else {

3032 parser.phase = parser._afterAfterBodyPhase;

3033 }

3034 }

3035

3036 Token endTagOther(EndTagToken token) {

3037 parser.parseError("unexpected-end-tag-after-body",

3038 {"name": token.name});

3039 parser.phase = parser._inBodyPhase;

3040 return token;

3041 }

3042 }

3043

3044 class InFramesetPhase extends Phase {

3045 // http://www.whatwg.org/specs/web-apps/current-work///in-frameset

3046 InFramesetPhase(parser) : super(parser);

3047

3048 processStartTag(StartTagToken token) {

3049 switch (token.name) {

3050 case "html": return startTagHtml(token);

3051 case "frameset": return startTagFrameset(token);

3052 case "frame": return startTagFrame(token);

3053 case "noframes": return startTagNoframes(token);

3054 default: return startTagOther(token);

3055 }

3056 }

3057

3058 processEndTag(EndTagToken token) {

3059 switch (token.name) {

3060 case "frameset": return endTagFrameset(token);

3061 default: return endTagOther(token);

3062 }

3063 }

3064

3065 bool processEOF() {

3066 if (tree.openElements.last().tagName != "html") {

3067 parser.parseError("eof-in-frameset");

3068 } else {

3069 assert(parser.innerHTMLMode);

3070 }

3071 return false;

3072 }

3073

3074 Token processCharacters(CharactersToken token) {

3075 parser.parseError("unexpected-char-in-frameset");

3076 }

3077

3078 void startTagFrameset(StartTagToken token) {

3079 tree.insertElement(token);

3080 }

3081

3082 void startTagFrame(StartTagToken token) {

3083 tree.insertElement(token);

3084 tree.openElements.removeLast();

3085 }

3086

3087 Token startTagNoframes(StartTagToken token) {

3088 return parser._inBodyPhase.processStartTag(token);

3089 }

3090

3091 Token startTagOther(StartTagToken token) {

3092 parser.parseError("unexpected-start-tag-in-frameset",

3093 {"name": token.name});

3094 }

3095

3096 void endTagFrameset(EndTagToken token) {

3097 if (tree.openElements.last().tagName == "html") {

3098 // innerHTML case

3099 parser.parseError("unexpected-frameset-in-frameset-innerhtml");

3100 } else {

3101 tree.openElements.removeLast();

3102 }

3103 if (!parser.innerHTMLMode && tree.openElements.last().tagName != "frameset") {

3104 // If we're not in innerHTML mode and the the current node is not a

3105 // "frameset" element (anymore) then switch.

3106 parser.phase = parser._afterFramesetPhase;

3107 }

3108 }

3109

3110 void endTagOther(EndTagToken token) {

3111 parser.parseError("unexpected-end-tag-in-frameset",

3112 {"name": token.name});

3113 }

3114 }

3115

3116

3117 class AfterFramesetPhase extends Phase {

3118 // http://www.whatwg.org/specs/web-apps/current-work///after3

3119 AfterFramesetPhase(parser) : super(parser);

3120

3121 processStartTag(StartTagToken token) {

3122 switch (token.name) {

3123 case "html": return startTagHtml(token);

3124 case "noframes": return startTagNoframes(token);

3125 default: return startTagOther(token);

3126 }

3127 }

3128

3129 processEndTag(EndTagToken token) {

3130 switch (token.name) {

3131 case "html": return endTagHtml(token);

3132 default: return endTagOther(token);

3133 }

3134 }

3135

3136 // Stop parsing

3137 bool processEOF() => false;

3138

3139 Token processCharacters(CharactersToken token) {

3140 parser.parseError("unexpected-char-after-frameset");

3141 }

3142

3143 Token startTagNoframes(StartTagToken token) {

3144 return parser._inHeadPhase.processStartTag(token);

3145 }

3146

3147 void startTagOther(StartTagToken token) {

3148 parser.parseError("unexpected-start-tag-after-frameset",

3149 {"name": token.name});

3150 }

3151

3152 void endTagHtml(EndTagToken token) {

3153 parser.phase = parser._afterAfterFramesetPhase;

3154 }

3155

3156 void endTagOther(EndTagToken token) {

3157 parser.parseError("unexpected-end-tag-after-frameset",

3158 {"name": token.name});

3159 }

3160 }

3161

3162

3163 class AfterAfterBodyPhase extends Phase {

3164 AfterAfterBodyPhase(parser) : super(parser);

3165

3166 processStartTag(StartTagToken token) {

3167 if (token.name == 'html') return startTagHtml(token);

3168 return startTagOther(token);

3169 }

3170

3171 bool processEOF() => false;

3172

3173 Token processComment(CommentToken token) {

3174 tree.insertComment(token, tree.document);

3175 }

3176

3177 Token processSpaceCharacters(SpaceCharactersToken token) {

3178 return parser._inBodyPhase.processSpaceCharacters(token);

3179 }

3180

3181 Token processCharacters(CharactersToken token) {

3182 parser.parseError("expected-eof-but-got-char");

3183 parser.phase = parser._inBodyPhase;

3184 return token;

3185 }

3186

3187 Token startTagHtml(StartTagToken token) {

3188 return parser._inBodyPhase.processStartTag(token);

3189 }

3190

3191 Token startTagOther(StartTagToken token) {

3192 parser.parseError("expected-eof-but-got-start-tag", {"name": token.name});

3193 parser.phase = parser._inBodyPhase;

3194 return token;

3195 }

3196

3197 Token processEndTag(EndTagToken token) {

3198 parser.parseError("expected-eof-but-got-end-tag", {"name": token.name});

3199 parser.phase = parser._inBodyPhase;

3200 return token;

3201 }

3202 }

3203

3204 class AfterAfterFramesetPhase extends Phase {

3205 AfterAfterFramesetPhase(parser) : super(parser);

3206

3207 processStartTag(StartTagToken token) {

3208 switch (token.name) {

3209 case "html": return startTagHtml(token);

3210 case "noframes": return startTagNoFrames(token);

3211 default: return startTagOther(token);

3212 }

3213 }

3214

3215 bool processEOF() => false;

3216

3217 Token processComment(CommentToken token) {

3218 tree.insertComment(token, tree.document);

3219 }

3220

3221 Token processSpaceCharacters(SpaceCharactersToken token) {

3222 return parser._inBodyPhase.processSpaceCharacters(token);

3223 }

3224

3225 Token processCharacters(CharactersToken token) {

3226 parser.parseError("expected-eof-but-got-char");

3227 }

3228

3229 Token startTagHtml(StartTagToken token) {

3230 return parser._inBodyPhase.processStartTag(token);

3231 }

3232

3233 Token startTagNoFrames(StartTagToken token) {

3234 return parser._inHeadPhase.processStartTag(token);

3235 }

3236

3237 void startTagOther(StartTagToken token) {

3238 parser.parseError("expected-eof-but-got-start-tag",

3239 {"name": token.name});

3240 }

3241

3242 Token processEndTag(EndTagToken token) {

3243 parser.parseError("expected-eof-but-got-end-tag",

3244 {"name": token.name});

3245 }

3246 }

3247

3248

3249 /** Error in parsed document. */

3250 class ParseError implements Exception {

3251 final String errorCode;

3252 final Span span;

3253 final Map data;

3254

3255 ParseError(this.errorCode, this.span, this.data);

3256

3257 int get line() => span.line;

3258

3259 int get column() => span.column;

3260

3261 String get message => formatStr(errorMessages[errorCode], data);

3262

3263 String toString() => "ParseError at line $line column $column: $message";

3264 }

OLD	NEW

« no previous file with comments | « README.md ('k') | lib/char_encodings.dart » ('j') | no next file with comments »