utils/apidoc/mdn/extract.dart - Issue 9315026: Cleanup mdn scripts

Side by Side Diff: utils/apidoc/mdn/extract.dart

Issue 9315026: Cleanup mdn scripts (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Code review fixes Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #import ("dart:html");	1 #import ("dart:html");

2 #import ("dart:htmlimpl");	2 #import ("dart:htmlimpl");

3 #import ("dart:dom", prefix:"dom");	3 #import ("dart:dom", prefix:"dom");

4 #import ("dart:json");	4 #import ("dart:json");

5	5

6 // Workaround for HTML lib missing feature.	6 // Workaround for HTML lib missing feature.

7 Range newRange() {	7 Range newRange() {

8 return LevelDom.wrapRange(dom.document.createRange());	8 return LevelDom.wrapRange(dom.document.createRange());

9 }	9 }

10	10

11 // Temporary range object to optimize performance computing client rects	11 // Temporary range object to optimize performance computing client rects

12 // from text nodes.	12 // from text nodes.

13 Range _tempRange;	13 Range _tempRange;

14 // Hacks because ASYNC measurement is annoying when just writing a script.	14 // Hacks because ASYNC measurement is annoying when just writing a script.

15 ClientRect getClientRect(Node n) {	15 ClientRect getClientRect(Node n) {

16 if (n is Element) {	16 if (n is Element) {

17 Element e = n;	17 dom.Element raw = unwrapDomObject(n.dynamic);

18 dom.Element raw = unwrapDomObject(e.dynamic);

19 return LevelDom.wrapClientRect(raw.getBoundingClientRect());	18 return LevelDom.wrapClientRect(raw.getBoundingClientRect());

20 } else {	19 } else {

21 // Crazy hacks that works for nodes.... create a range and measure it.	20 // Crazy hacks that works for nodes.... create a range and measure it.

22 if (_tempRange == null) {	21 if (_tempRange == null) {

23 _tempRange = newRange();	22 _tempRange = newRange();

24 }	23 }

25 _tempRange.setStartBefore(n);	24 _tempRange.setStartBefore(n);

26 _tempRange.setEndAfter(n);	25 _tempRange.setEndAfter(n);

27 return _tempRange.getBoundingClientRect();	26 return _tempRange.getBoundingClientRect();

28 }	27 }

29 }	28 }

30	29

31 final DART_REMOVED = "dart_removed";	30 /**

	31 * CSS class that is added to elements in the DOM to indicate that they should

	32 * be removed when extracting blocks of documentation. This is helpful when

	33 * running this script in a web browser as it is easy to visually see what

	34 * blocks of information were extracted when using CSS such as DEBUG_CSS

	35 * which highlights elements that should be removed.

	36 */

	37 final DART_REMOVED = "dart-removed";

32	38

33 final DEBUG_CSS = """	39 final DEBUG_CSS = """

34 <style type="text/css">	40 <style type="text/css">

35 .dart_removed {	41 .dart-removed {

36 background-color: rgba(255, 0, 0, 0.5);	42 background-color: rgba(255, 0, 0, 0.5);

37 }	43 }

38 </style>""";	44 </style>""";

39	45

40 final MIN_PIXELS_DIFFERENT_LINES = 10;	46 final MIN_PIXELS_DIFFERENT_LINES = 10;

41	47

42 final IDL_SELECTOR = "pre.eval, pre.idl";	48 final IDL_SELECTOR = "pre.eval, pre.idl";

43	49

44 Map data;	50 Map data;

45	51

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
274 if (path.startsWith('/')) {	280 if (path.startsWith('/')) {

275 return "$pageDomain$path";	281 return "$pageDomain$path";

276 } else if (path.startsWith("#")) {	282 } else if (path.startsWith("#")) {

277 return "$pageUrl$path";	283 return "$pageUrl$path";

278 } else {	284 } else {

279 return "$pageDir$path";	285 return "$pageDir$path";

280 }	286 }

281 }	287 }

282	288

283 bool inTable(Node n) {	289 bool inTable(Node n) {

284 while(n != null) {	290 while (n != null) {

285 if (n is TableElement) return true;	291 if (n is TableElement) return true;

286 n = n.parent;	292 n = n.parent;

287 }	293 }

288 return false;	294 return false;

289 }	295 }

290	296

291 String escapeHTML(str) {	297 String escapeHTML(str) {

292 Element e = new Element.tag("div");	298 Element e = new Element.tag("div");

293 e.text = str;	299 e.text = str;

294 return e.innerHTML;	300 return e.innerHTML;

295 }	301 }

296	302

297 List<Text> getAllTextNodes(Element elem) {	303 List<Text> getAllTextNodes(Element elem) {

298 List<Text> nodes = <Text>[];	304 final nodes = <Text>[];

299 helper(Node n) {	305 helper(Node n) {

300 if (n is Text) {	306 if (n is Text) {

301 nodes.add(n);	307 nodes.add(n);

302 } else {	308 } else {

303 for (Node child in n.nodes) {	309 for (Node child in n.nodes) {

304 helper(child);	310 helper(child);

305 }	311 }

306 }	312 }

307 };	313 };

308	314

309 helper(elem);	315 helper(elem);

310 return nodes;	316 return nodes;

311 }	317 }

312	318

313 /**	319 /**

314 * Whether a node and its children are all types that are safe to skip if the	320 * Whether a node and its children are all types that are safe to skip if the

315 * nodes have no text content.	321 * nodes have no text content.

316 */	322 */

317 bool isSkippableType(Node n) {	323 bool isSkippableType(Node n) {

318 // TODO(jacobr): are there any types we don't want to skip even if they	324 // TODO(jacobr): are there any types we don't want to skip even if they

319 // have no text content?	325 // have no text content?

320 if (n is ImageElement \|\| n is CanvasElement \|\| n is InputElement	326 if (n is ImageElement \|\| n is CanvasElement \|\| n is InputElement

321 \|\| n is ObjectElement) {	327 \|\| n is ObjectElement) {

322 return false;	328 return false;

323 }	329 }

324 if (n is Text) return true;	330 if (n is Text) return true;

325	331

326 for (Node child in n.nodes) {	332 for (final child in n.nodes) {

327 if (isSkippableType(child) == false) {	333 if (!isSkippableType(child)) {

328 return false;	334 return false;

329 }	335 }

330 }	336 }

331 return true;	337 return true;

332 }	338 }

333	339

334 bool isSkippable(Node n) {	340 bool isSkippable(Node n) {

335 if (!isSkippableType(n)) return false;	341 if (!isSkippableType(n)) return false;

336 return n.text.trim().length == 0;	342 return n.text.trim().length == 0;

337 }	343 }

338	344

339 void onEnd() {	345 void onEnd() {

340 // Hideous hack to send JSON back to JS.	346 // Hideous hack to send JSON back to JS.

341 String dbJson = JSON.stringify(dbEntry);	347 String dbJson = JSON.stringify(dbEntry);

342 // workaround bug in JSON parser.	348 // workaround bug in JSON parser.

343 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n");	349 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n");

344	350

	351 // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple

	352 // isolate based Dart-JS interop solution in the future.

345 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*");	353 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*");

346 }	354 }

347	355

348 class SectionParseResult {	356 class SectionParseResult {

349 final String html;	357 final String html;

350 final String url;	358 final String url;

351 final String idl;	359 final String idl;

352 SectionParseResult(this.html, this.url, this.idl);	360 SectionParseResult(this.html, this.url, this.idl);

353 }	361 }

354	362

355 String genCleanHtml(Element root) {	363 String genCleanHtml(Element root) {

356 for (Element e in root.queryAll(".$DART_REMOVED")) {	364 for (final e in root.queryAll(".$DART_REMOVED")) {

357 e.classes.remove(DART_REMOVED);	365 e.classes.remove(DART_REMOVED);

358 }	366 }

359	367

360 // Ditch inline styles.	368 // Ditch inline styles.

361 for (Element e in root.queryAll('[style]')) {	369 for (final e in root.queryAll('[style]')) {

362 e.attributes.remove('style');	370 e.attributes.remove('style');

363 }	371 }

364	372

365 // These elements are just tags that we should suppress.	373 // These elements are just tags that we should suppress.

366 for (Element e in root.queryAll(".lang.lang-en")) {	374 for (final e in root.queryAll(".lang.lang-en")) {

367 e.remove();	375 e.remove();

368 }	376 }

369	377

	378 Element parametersHeader;

	379 Element returnValueHeader;

	380 for (final e in root.queryAll("h6")) {

	381 if (e.text == 'Parameters') {

	382 parametersHeader = e;

	383 } else if (e.text == 'Return value') {

	384 returnValueHeader = e;

	385 }

	386 }

	387

	388 if (parametersHeader != null) {

	389 int numEmptyParameters = 0;

	390 final parameterDescriptions = root.queryAll("dd");

	391 for (Element parameterDescription in parameterDescriptions) {

	392 if (parameterDescription.text.trim().length == 0) {

	393 numEmptyParameters++;

	394 }

	395 }

	396 if (numEmptyParameters > 0 &&

	397 numEmptyParameters == parameterDescriptions.length) {

	398 // Remove the parameter list as it adds zero value as all descriptions

	399 // are empty.

	400 parametersHeader.remove();

	401 for (final e in root.queryAll("dl")) {

	402 e.remove();

	403 }

	404 } else if (parameterDescriptions.length == 0 &&

	405 parametersHeader.nextElementSibling != null &&

	406 parametersHeader.nextElementSibling.text.trim() == 'None.') {

	407 // No need to display that the function takes 0 parameters.

	408 parametersHeader.nextElementSibling.remove();

	409 parametersHeader.remove();

	410 }

	411 }

	412

	413 // Heuristic: if the return value is a single word it is a type name not a

	414 // useful text description so suppress it.

	415 if (returnValueHeader != null &&

	416 returnValueHeader.nextElementSibling != null &&

	417 returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) {

	418 returnValueHeader.nextElementSibling.remove();

	419 returnValueHeader.remove();

	420 }

	421

370 bool changed = true;	422 bool changed = true;

371 while (changed) {	423 while (changed) {

372 changed = false;	424 changed = false;

373 while (root.nodes.length == 1) {	425 while (root.nodes.length == 1 && root.nodes.first is Element) {

374 Node child = root.nodes.first;	426 root = root.nodes.first;

375 if (child is Element) {	427 changed = true;

376 root = child;

377 changed = true;

378 } else {

379 // Just calling innerHTML on the parent will be sufficient...

380 // and insures the output is properly escaped.

381 break;

382 }

383 }	428 }

384	429

385 // Trim useless nodes from the front.	430 // Trim useless nodes from the front.

386 while(root.nodes.length > 0 &&	431 while (root.nodes.length > 0 &&

387 isSkippable(root.nodes.first)) {	432 isSkippable(root.nodes.first)) {

388 root.nodes.first.remove();	433 root.nodes.first.remove();

389 changed = true;	434 changed = true;

390 }	435 }

391	436

392 // Trim useless nodes from the back.	437 // Trim useless nodes from the back.

393 while(root.nodes.length > 0 &&	438 while (root.nodes.length > 0 &&

394 isSkippable(root.nodes.last())) {	439 isSkippable(root.nodes.last())) {

395 root.nodes.last().remove();	440 root.nodes.last().remove();

396 changed = true;	441 changed = true;

397 }	442 }

398 }	443 }

399 return JSONFIXUPHACK(root.innerHTML);	444 return JSONFIXUPHACK(root.innerHTML);

400 }	445 }

401	446

402 String genPrettyHtml(DocumentFragment fragment) {

403 return genCleanHtml(fragment);

404 }

405

406 String genPrettyHtmlFromElement(Element e) {	447 String genPrettyHtmlFromElement(Element e) {

407 e = e.clone(true);	448 e = e.clone(true);

408 return genCleanHtml(e);	449 return genCleanHtml(e);

409 }	450 }

410	451

411 class PostOrderTraversalIterator implements Iterator<Node> {	452 class PostOrderTraversalIterator implements Iterator<Node> {

412	453

413 Node _next;	454 Node _next;

414	455

415 PostOrderTraversalIterator(Node start) {	456 PostOrderTraversalIterator(Node start) {

416 _next = _leftMostDescendent(start);	457 _next = _leftMostDescendent(start);

417 }	458 }

418	459

419 bool hasNext() => _next != null;	460 bool hasNext() => _next != null;

420	461

421 Node next() {	462 Node next() {

422 if (_next == null) return null;	463 if (_next == null) return null;

423 Node ret = _next;	464 final ret = _next;

424 if (_next.nextNode != null) {	465 if (_next.nextNode != null) {

425 _next = _leftMostDescendent(_next.nextNode);	466 _next = _leftMostDescendent(_next.nextNode);

426 } else {	467 } else {

427 _next = _next.parent;	468 _next = _next.parent;

428 }	469 }

429 return ret;	470 return ret;

430 }	471 }

431	472

432 static Node _leftMostDescendent(Node n) {	473 static Node _leftMostDescendent(Node n) {

433 while (n.nodes.length > 0) {	474 while (n.nodes.length > 0) {

434 n = n.nodes.first;	475 n = n.nodes.first;

435 }	476 }

436 return n;	477 return n;

437 }	478 }

438 }	479 }

439	480

440 class PostOrderTraversal implements Iterable<Node> {	481 class PostOrderTraversal implements Iterable<Node> {

441 final Node _node;	482 final Node _node;

442 PostOrderTraversal(this._node);	483 PostOrderTraversal(this._node);

443	484

444 Iterator<Node> iterator() => new PostOrderTraversalIterator(_node);	485 Iterator<Node> iterator() => new PostOrderTraversalIterator(_node);

445 }	486 }

446	487

	488 /**

	489 * Estimate what content represents the first line of text within the [section]

	490 * range returning null if there isn't a plausible first line of text that

	491 * contains the string [prop]. We measure the actual rendered client rectangle

	492 * for the text and use heuristics defining how many pixels text can vary by

	493 * and still be viewed as being on the same line.

	494 */

447 Range findFirstLine(Range section, String prop) {	495 Range findFirstLine(Range section, String prop) {

448 Range firstLine = newRange();	496 final firstLine = newRange();

449 firstLine.setStart(section.startContainer, section.startOffset);	497 firstLine.setStart(section.startContainer, section.startOffset);

450	498

451 num maxBottom = null;	499 num maxBottom = null;

452 for (Node n in new PostOrderTraversal(section.startContainer)) {	500 for (final n in new PostOrderTraversal(section.startContainer)) {

453 int compareResult = section.comparePoint(n, 0);	501 int compareResult = section.comparePoint(n, 0);

454 if (compareResult == -1) {	502 if (compareResult == -1) {

455 // before range so skip.	503 // before range so skip.

456 continue;	504 continue;

457 } else if (compareResult > 0) {	505 } else if (compareResult > 0) {

458 // After range so exit.	506 // After range so exit.

459 break;	507 break;

460 }	508 }

461	509

462 final rect = getClientRect(n);	510 final rect = getClientRect(n);

463 num bottom = rect.bottom;	511 num bottom = rect.bottom;

464 if (rect.height > 0 && rect.width > 0) {	512 if (rect.height > 0 && rect.width > 0) {

465 if (maxBottom != null && (	513 if (maxBottom != null &&

466 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom	514 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) {

467 )) {

468 break;	515 break;

469 } else if (maxBottom == null \|\| maxBottom > bottom) {	516 } else if (maxBottom == null \|\| maxBottom > bottom) {

470 maxBottom = bottom;	517 maxBottom = bottom;

471 }	518 }

472 }	519 }

473	520

474 firstLine.setEndAfter(n);	521 firstLine.setEndAfter(n);

475 }	522 }

476	523

477 if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) {	524 // If the first line of text in the section does not contain the property

	525 // name then we're not confident we are able to extract a high accuracy match

	526 // so we should not return anything.

	527 if (!firstLine.toString().contains(stripWebkit(prop))) {

478 return null;	528 return null;

479 }	529 }

480 return firstLine;	530 return firstLine;

481 }	531 }

482	532

	533 /** Find child anchor elements that contain the text [prop]. */

483 AnchorElement findAnchorElement(Element root, String prop) {	534 AnchorElement findAnchorElement(Element root, String prop) {

484 for (AnchorElement a in root.queryAll("a")) {	535 for (AnchorElement a in root.queryAll("a")) {

485 if (a.text.indexOf(prop) != -1) {	536 if (a.text.contains(prop)) {

486 return a;	537 return a;

487 }	538 }

488 }	539 }

489 return null;	540 return null;

490 }	541 }

491	542

492 // First surrounding element with an ID is safe enough.	543 // First surrounding element with an ID is safe enough.

493 Element findTigherRoot(Element elem, Element root) {	544 Element findTighterRoot(Element elem, Element root) {

494 Element candidate = elem;	545 Element candidate = elem;

495 while(root != candidate) {	546 while (root != candidate) {

496 candidate = candidate.parent;	547 candidate = candidate.parent;

497 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {	548 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {

498 break;	549 break;

499 }	550 }

500 }	551 }

501 return candidate;	552 return candidate;

502 }	553 }

503	554

504 // this is very slow and ugly.. consider rewriting.	555 // TODO(jacobr): this is very slow and ugly.. consider rewriting or at least

	556 // commenting carefully.

505 SectionParseResult filteredHtml(Element elem, Element root, String prop,	557 SectionParseResult filteredHtml(Element elem, Element root, String prop,

506 Function fragmentGeneratedCallback) {	558 Function fragmentGeneratedCallback) {

507 // Using a tighter root avoids false positives at the risk of trimming	559 // Using a tighter root avoids false positives at the risk of trimming

508 // text we shouldn't.	560 // text we shouldn't.

509 root = findTigherRoot(elem, root);	561 root = findTighterRoot(elem, root);

510 Range range = newRange();	562 final range = newRange();

511 range.setStartBefore(elem);	563 range.setStartBefore(elem);

512	564

513 Element current = elem;	565 Element current = elem;

514 while (current != null) {	566 while (current != null) {

515 range.setEndBefore(current);	567 range.setEndBefore(current);

516 if (current.classes.contains(DART_REMOVED)) {	568 if (current.classes.contains(DART_REMOVED) &&

517 if (range.toString().trim().length > 0) {	569 range.toString().trim().length > 0) {

518 break;	570 break;

519 }

520 }	571 }

521 if (current.firstElementChild != null) {	572 if (current.firstElementChild != null) {

522 current = current.firstElementChild;	573 current = current.firstElementChild;

523 } else {	574 } else {

524 while (current != null) {	575 while (current != null) {

525 range.setEndAfter(current);	576 range.setEndAfter(current);

526 if (current == root) {	577 if (current == root) {

527 current = null;	578 current = null;

528 break;	579 break;

529 }	580 }

(...skipping 10 matching lines...) Expand all Loading...
540 Range firstLine = findFirstLine(range, prop);	591 Range firstLine = findFirstLine(range, prop);

541 if (firstLine != null) {	592 if (firstLine != null) {

542 range.setStart(firstLine.endContainer, firstLine.endOffset);	593 range.setStart(firstLine.endContainer, firstLine.endOffset);

543 DocumentFragment firstLineClone = firstLine.cloneContents();	594 DocumentFragment firstLineClone = firstLine.cloneContents();

544 AnchorElement anchor = findAnchorElement(firstLineClone, prop);	595 AnchorElement anchor = findAnchorElement(firstLineClone, prop);

545 if (anchor != null) {	596 if (anchor != null) {

546 url = getAbsoluteUrl(anchor);	597 url = getAbsoluteUrl(anchor);

547 }	598 }

548 }	599 }

549 }	600 }

550 DocumentFragment fragment = range.cloneContents();	601 final fragment = range.cloneContents();

551 if (fragmentGeneratedCallback != null) {	602 if (fragmentGeneratedCallback != null) {

552 fragmentGeneratedCallback(fragment);	603 fragmentGeneratedCallback(fragment);

553 }	604 }

554 // Strip tags we don't want	605 // Strip tags we don't want

555 for (Element e in fragment.queryAll("script, object, style")) {	606 for (Element e in fragment.queryAll("script, object, style")) {

556 e.remove();	607 e.remove();

557 }	608 }

558	609

559 // Extract idl	610 // Extract idl

560 StringBuffer idl = new StringBuffer();	611 final idl = new StringBuffer();

561 if (prop != null && prop.length > 0) {	612 if (prop != null && prop.length > 0) {

562 // Only expect properties to have HTML.	613 // Only expect properties to have HTML.

563 for(Element e in fragment.queryAll(IDL_SELECTOR)) {	614 for(Element e in fragment.queryAll(IDL_SELECTOR)) {

564 idl.add(e.outerHTML);	615 idl.add(e.outerHTML);

565 e.remove();	616 e.remove();

566 }	617 }

567 // TODO(jacobr) this is a very basic regex to see if text looks like IDL	618 // TODO(jacobr) this is a very basic regex to see if text looks like IDL

568 RegExp likelyIdl = new RegExp(" $prop\\w*\\(");	619 RegExp likelyIdl = new RegExp(" $prop\\w*\\(");

569	620

570 for (Element e in fragment.queryAll("pre")) {	621 for (Element e in fragment.queryAll("pre")) {

571 // Check if it looks like idl...	622 // Check if it looks like idl...

572 String txt = e.text.trim();	623 String txt = e.text.trim();

573 if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1	624 if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) {

574 && txt.indexOf(")") != -1) {

575 idl.add(e.outerHTML);	625 idl.add(e.outerHTML);

576 e.remove();	626 e.remove();

577 }	627 }

578 }	628 }

579 }	629 }

580 return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString());	630 return new SectionParseResult(genCleanHtml(fragment), url, idl.toString());

581 }	631 }

582	632

583 Element findBest(Element root, List<Text> allText, String prop, String propType) {	633 /**

584 // Best bet: match an id	634 * Find the best child element of [root] that appears to be an API definition

585 Element cand;	635 * for [prop]. [allText] is a list of all text nodes under root computed by

586 cand = root.query("#" + prop);	636 * the caller to improve performance.

	637 */

	638 Element findBest(Element root, List<Text> allText, String prop,

	639 String propType) {

	640 // Best bet: find a child of root where the id matches the property name.

	641 Element cand = root.query("#$prop");

587	642

588 if (cand == null && propType == "methods") {	643 if (cand == null && propType == "methods") {

589 cand = root.query("[id=" + prop + "\$\$]");	644 cand = root.query("[id=$prop\$\$]");

	645 }

	646 while (cand != null && cand.text.trim().length == 0) {

	647 // We found the bookmark for the element but sadly it is just an empty

	648 // placeholder. Find the first real element.

	649 cand = cand.nextElementSibling;

590 }	650 }

591 if (cand != null) {	651 if (cand != null) {

592 while (cand != null && cand.text.trim().length == 0) {	652 return cand;

593 // We found the bookmark for the element but sadly it is just an empty

594 // placeholder. Find the first real element.

595 cand = cand.nextElementSibling;

596 }

597 if (cand != null) {

598 return cand;

599 }

600 }	653 }

601	654

602 // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate.	655 // If we are at least 70 pixels from the left, something is definitely

	656 // fishy and we shouldn't even consider this candidate as nobody visually

	657 // formats API docs like that.

603 num candLeft = 70;	658 num candLeft = 70;

604	659

605 for (Text text in allText) {	660 for (Text text in allText) {

606 Element proposed = null;	661 Element proposed = null;

607	662

608 // var t = safeNameCleanup(text.text);	663 // TODO(jacobr): does it hurt precision to use the full cleanup?

609 // TODO(jacobr): does it hurt precision to use the full cleanup?

610 String t = fullNameCleanup(text.text);	664 String t = fullNameCleanup(text.text);

611 if (t == prop) {	665 if (t == prop) {

612 proposed = text.parent;	666 proposed = text.parent;

613 ClientRect candRect = getClientRect(proposed);	667 ClientRect candRect = getClientRect(proposed);

614	668

615 // TODO(jacobr): this is a good heuristic	669 // TODO(jacobr): this is a good heuristic

616 // if (selObj.selector.indexOf(" > DD ") == -1	670 // if (selObj.selector.indexOf(" > DD ") == -1

617 if (candRect.left < candLeft) {	671 if (candRect.left < candLeft) {

618 cand = proposed;	672 cand = proposed;

619 candLeft = candRect.left;	673 candLeft = candRect.left;

620 }	674 }

621 }	675 }

622 }	676 }

623 return cand;	677 return cand;

624 }	678 }

625	679

	680 /**

	681 * Checks whether [e] is tagged as obsolete or deprecated using heuristics

	682 * for what these tags look like in the MDN docs.

	683 */

626 bool isObsolete(Element e) {	684 bool isObsolete(Element e) {

627 RegExp obsoleteRegExp = new RegExp(@"(^\|\s)obsolete(?=\s\|$)");	685 RegExp obsoleteRegExp = new RegExp(@"(^\|\s)obsolete(?=\s\|$)");

628 RegExp deprecatedRegExp = new RegExp(@"(^\|\s)deprecated(?=\s\|$)");	686 RegExp deprecatedRegExp = new RegExp(@"(^\|\s)deprecated(?=\s\|$)");

629 for (Element child in e.queryAll("span")) {	687 for (Element child in e.queryAll("span")) {

630 String t = child.text.toLowerCase();	688 String t = child.text.toLowerCase();

631 if (t.startsWith("obsolete") \|\| t.startsWith("deprecated")) return true;	689 if (t.startsWith("obsolete") \|\| t.startsWith("deprecated")) return true;

632 }	690 }

633	691

634 String text = e.text.toLowerCase();	692 String text = e.text.toLowerCase();

635 return obsoleteRegExp.hasMatch(text) \|\| deprecatedRegExp.hasMatch(text);	693 return obsoleteRegExp.hasMatch(text) \|\| deprecatedRegExp.hasMatch(text);

636 }	694 }

637	695

638 bool isFirstCharLowerCase(String str) {	696 bool isFirstCharLowerCase(String str) {

639 RegExp firstLower = new RegExp("^[a-z]");	697 return const RegExp("^[a-z]").hasMatch(str);

640 return firstLower.hasMatch(str);

641 }	698 }

642	699

643 void scrapeSection(Element root, String sectionSelector,	700 /**

644 String currentType,	701 * Extracts information from a fragment of HTML only searching under the [root]

645 List members,	702 * html node. [secitonSelector] specifies the query to use to find candidate

646 String propType) {	703 * sections of the document to consider (there may be more than one).

	704 * [currentType] specifies the name of the current class. [members] specifies

	705 * the known class members for this class that we are attempting to find

	706 * documentation for. [propType] indicates whether we are searching for

	707 * methods, properties, constants, or constructors.

	708 */

	709 void scrapeSection(Element root, String sectionSelector, String currentType,

	710 List members, String propType) {

647 Map expectedProps = dartIdl[propType];	711 Map expectedProps = dartIdl[propType];

648	712

649 Set<String> alreadyMatchedProperties = new Set<String>();	713 Set<String> alreadyMatchedProperties = new Set<String>();

650 bool onlyConsiderTables = false;	714 bool onlyConsiderTables = false;

651 ElementList allMatches = root.queryAll(sectionSelector);	715 ElementList allMatches = root.queryAll(sectionSelector);

652 if (allMatches.length == 0) {	716 if (allMatches.length == 0) {

	717 // If we can't find any matches to the sectionSelector, we fall back to

	718 // considering all tables in the document. This is dangerous so we only

	719 // allow the safer table matching extraction rules for this case.

653 allMatches = root.queryAll(".fullwidth-table");	720 allMatches = root.queryAll(".fullwidth-table");

654 onlyConsiderTables = true;	721 onlyConsiderTables = true;

655 }	722 }

656 for (Element matchElement in allMatches) {	723 for (Element matchElement in allMatches) {

657 DivElement match = matchElement.parent;	724 final match = matchElement.parent;

658 if (!match.id.startsWith("section") && !(match.id == "pageText")) {	725 if (!match.id.startsWith("section") && match.id != "pageText") {

659 throw "Enexpected element $match";	726 throw "Unexpected element $match";

660 }	727 }

	728 // We don't want to later display this text a second time while for example

	729 // displaying class level summary information as then we would display

	730 // the same documentation twice.

661 match.classes.add(DART_REMOVED);	731 match.classes.add(DART_REMOVED);

662	732

663 bool foundProps = false;	733 bool foundProps = false;

664	734

665 // TODO(jacobr): we should really look for the table tag instead	735 // TODO(jacobr): we should really look for the table tag instead

666 // add an assert if we are missing something that is a table...	736 // add an assert if we are missing something that is a table...

667 // TODO(jacobr) ignore tables in tables....	737 // TODO(jacobr) ignore tables in tables.

668 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) {	738 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) {

669 int helpIndex = -1;	739 int helpIndex = -1;

670 num i = 0;	740 num i = 0;

671 for (Element r in t.queryAll("th, td.header")) {	741 for (Element r in t.queryAll("th, td.header")) {

672 var txt = r.text.trim().split(" ")[0].toLowerCase();	742 final txt = r.text.trim().split(" ")[0].toLowerCase();

673 if (txt == "description") {	743 if (txt == "description") {

674 helpIndex = i;	744 helpIndex = i;

675 break;	745 break;

676 }	746 }

677 i++;	747 i++;

678 }	748 }

679	749

680 List<int> numMatches = new List<int>(i);	750 // Figure out which column in the table contains member names by

	751 // tracking how many member names each column contains.

	752 final numMatches = new List<int>(i);

681 for (int j = 0; j < i; j++) {	753 for (int j = 0; j < i; j++) {

682 numMatches[j] = 0;	754 numMatches[j] = 0;

683 }	755 }

684	756

685 // Find the row that seems to have the most names that look like	757 // Find the column that seems to have the most names that look like

686 // expected properties.	758 // expected properties.

687 for (Element r in t.queryAll("tbody tr")) {	759 for (Element r in t.queryAll("tbody tr")) {

688 ElementList $row = r.elements;	760 ElementList row = r.elements;

689 if ($row.length == 0 \|\| $row.first.classes.contains(".header")) {	761 if (row.length == 0 \|\| row.first.classes.contains(".header")) {

690 continue;	762 continue;

691 }	763 }

692	764

693 for (int k = 0; k < numMatches.length && k < $row.length; k++) {	765 for (int k = 0; k < numMatches.length && k < row.length; k++) {

694 Element e = $row[k];	766 if (expectedProps.containsKey(fullNameCleanup(row[k].text))) {

695 if (expectedProps.containsKey(fullNameCleanup(e.text))) {

696 numMatches[k]++;	767 numMatches[k]++;

697 break;	768 break;

698 }	769 }

699 }	770 }

700 }	771 }

701	772

702 int propNameIndex = 0;	773 int propNameIndex = 0;

703 {	774 {

704 int bestCount = numMatches[0];	775 int bestCount = numMatches[0];

705 for (int k = 1; k < numMatches.length; k++) {	776 for (int k = 1; k < numMatches.length; k++) {

706 if (numMatches[k] > bestCount) {	777 if (numMatches[k] > bestCount) {

707 bestCount = numMatches[k];	778 bestCount = numMatches[k];

708 propNameIndex = k;	779 propNameIndex = k;

709 }	780 }

710 }	781 }

711 }	782 }

712	783

713 for (Element r in t.queryAll("tbody tr")) {	784 for (Element r in t.queryAll("tbody tr")) {

714 ElementList $row = r.elements;	785 final row = r.elements;

715 if ($row.length > propNameIndex && $row.length > helpIndex ) {	786 if (row.length > propNameIndex && row.length > helpIndex) {

716 if ($row.first.classes.contains(".header")) {	787 if (row.first.classes.contains(".header")) {

717 continue;	788 continue;

718 }	789 }

719 // TODO(jacobr): this code for determining the namestr is needlessly	790 // TODO(jacobr): this code for determining the namestr is needlessly

720 // messy.	791 // messy.

721 Element nameRow = $row[propNameIndex];	792 final nameRow = row[propNameIndex];

722 AnchorElement a = nameRow.query("a");	793 AnchorElement a = nameRow.query("a");

723 String goodName = '';	794 String goodName = '';

724 if (a != null) {	795 if (a != null) {

725 goodName = a.text.trim();	796 goodName = a.text.trim();

726 }	797 }

727 String nameStr = nameRow.text;	798 String nameStr = nameRow.text;

728	799

729 Map entry = new Map<String, String>();	800 Map entry = new Map<String, String>();

730	801

731 // "currentType": $($row[1]).text().trim(), // find("code") ?	802 entry["name"] = fullNameCleanup(nameStr.length > 0 ?

732 entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodNam e);	803 nameStr : goodName);

733	804

734 final parse = filteredHtml(nameRow, nameRow, entry["name"], null);	805 final parse = filteredHtml(nameRow, nameRow, entry["name"], null);

735 String altHelp = parse.html;	806 String altHelp = parse.html;

736	807

737 // "jsSignature": nameStr,	808 entry["help"] = (helpIndex == -1 \|\| row[helpIndex] == null) ?

738 entry["help"] = (helpIndex == -1 \|\| $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]);	809 altHelp : genPrettyHtmlFromElement(row[helpIndex]);

739 // "altHelp" : altHelp,

740 if (parse.url != null) {	810 if (parse.url != null) {

741 entry["url"] = parse.url;	811 entry["url"] = parse.url;

742 }	812 }

743	813

744 if (parse.idl.length > 0) {	814 if (parse.idl.length > 0) {

745 entry["idl"] = parse.idl;	815 entry["idl"] = parse.idl;

746 }	816 }

747	817

748 entry["obsolete"] = isObsolete(r);	818 entry["obsolete"] = isObsolete(r);

749	819

750 if (entry["name"].length > 0) {	820 if (entry["name"].length > 0) {

751 cleanupEntry(members, entry);	821 cleanupEntry(members, entry);

752 alreadyMatchedProperties.add(entry['name']);	822 alreadyMatchedProperties.add(entry['name']);

753 foundProps = true;	823 foundProps = true;

754 }	824 }

755 }	825 }

756 }	826 }

757 }	827 }

758	828

759 if (onlyConsiderTables) {	829 if (onlyConsiderTables) {

760 continue;	830 continue;

761 }	831 }

	832

762 // After this point we have higher risk tests that attempt to perform	833 // After this point we have higher risk tests that attempt to perform

763 // rudimentary page segmentation.	834 // rudimentary page segmentation. This approach is much more error-prone

	835 // than using tables because the HTML is far less clearly structured.

764	836

765 // Search for expected matching names.	837 final allText = getAllTextNodes(match);

766 List<Text> allText = getAllTextNodes(match);

767	838

768 Map<String, Element> pmap = new Map<String, Element>();	839 final pmap = new Map<String, Element>();

769 for (String prop in expectedProps.getKeys()) {	840 for (final prop in expectedProps.getKeys()) {

770 if (alreadyMatchedProperties.contains(prop)) {	841 if (alreadyMatchedProperties.contains(prop)) {

771 continue;	842 continue;

772 }	843 }

773 Element e = findBest(match, allText, prop, propType);	844 final e = findBest(match, allText, prop, propType);

774 if (e != null && !inTable(e)) {	845 if (e != null && !inTable(e)) {

775 pmap[prop] = e;	846 pmap[prop] = e;

776 }	847 }

777 }	848 }

778	849

779 for (String prop in pmap.getKeys()) {	850 for (final prop in pmap.getKeys()) {

780 Element e = pmap[prop];	851 pmap[prop].classes.add(DART_REMOVED);

781 e.classes.add(DART_REMOVED);

782 }	852 }

783	853

	854 // The problem is the MDN docs do place documentation for each method in a

	855 // nice self contained subtree. Instead you will see something like:

	856

	857 // <h3>drawImage</h3>

	858 // <p>Draw image is an awesome method</p>

	859 // some more info on drawImage here

	860 // <h3>mozDrawWindow</h3>

	861 // <p>This API cannot currently be used by Web content.

	862 // It is chrome only.</p>

	863 // <h3>drawRect</h3>

	864 // <p>Always call drawRect instead of drawImage</p>

	865 // some more info on drawRect here...

	866

	867 // The trouble is we will easily detect that the drawImage and drawRect

	868 // entries are method definitions because we know to search for these

	869 // method names but we will not detect that mozDrawWindow is a method

	870 // definition as that method doesn't exist in our IDL. Thus if we are not

	871 // careful the definition for the drawImage method will contain the

	872 // definition for the mozDrawWindow method as well which would result in

	873 // broken docs. We solve this problem by finding all content with similar

	874 // visual structure to the already found method definitions. It turns out

	875 // that using the visual position of each element on the page is much

	876 // more reliable than using the DOM structure

	877 // (e.g. section_root > div > h3) for the MDN docs because MDN authors

	878 // carefully check that the documentation for each method comment is

	879 // visually consistent but take less care to check that each

	880 // method comment has identical markup structure.

784 for (String prop in pmap.getKeys()) {	881 for (String prop in pmap.getKeys()) {

785 Element e = pmap[prop];	882 Element e = pmap[prop];

786 ClientRect r = getClientRect(e);	883 ClientRect r = getClientRect(e);

787 // TODO(jacobr): a lot of these queries are identical.	884 // TODO(jacobr): a lot of these queries are identical and this code

788 for (Element cand in match.queryAll(e.tagName)) {	885 // could easily be optimized.

789 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX us e a neg selector.	886 for (final cand in match.queryAll(e.tagName)) {

790 ClientRect candRect = getClientRect(cand);	887 // TODO(jacobr): use a negative selector instead.

791 // TODO(jacobr): this is somewhat loose.	888 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) {

	889 final candRect = getClientRect(cand);

	890 // Only consider matches that have similar heights and identical left

	891 // coordinates.

792 if (candRect.left == r.left &&	892 if (candRect.left == r.left &&

793 (candRect.height - r.height).abs() < 5) {	893 (candRect.height - r.height).abs() < 5) {

794 String propName = fullNameCleanup(cand.text);	894 String propName = fullNameCleanup(cand.text);

795 if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) {	895 if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName)

796 // Don't set here to avoid layouts... cand.classes.add(DART_REMOVE D);	896 && !alreadyMatchedProperties.contains(propName)) {

797 pmap[propName] = cand;	897 pmap[propName] = cand;

798 }	898 }

799 }	899 }

800 }	900 }

801 }	901 }

802 }	902 }

803	903

	904 // We mark these elements in batch to reduce the number of layouts

	905 // triggered. TODO(jacobr): use new batch based async measurement to make

	906 // this code flow simpler.

804 for (String prop in pmap.getKeys()) {	907 for (String prop in pmap.getKeys()) {

805 Element e = pmap[prop];	908 Element e = pmap[prop];

806 e.classes.add(DART_REMOVED);	909 e.classes.add(DART_REMOVED);

807 }	910 }

808	911

809 // Find likely "subsections" of the main section and mark them with	912 // Find likely "subsections" of the main section and mark them with

810 // DART_REMOVED so we don't include them in member descriptions... which	913 // DART_REMOVED so we don't include them in member descriptions... which

811 // would suck.	914 // would suck.

812 for (Element e in match.queryAll("[id]")) {	915 for (Element e in match.queryAll("[id]")) {

813 if (e.id.indexOf(matchElement.id) != -1) {	916 if (e.id.contains(matchElement.id)) {

814 e.classes.add(DART_REMOVED);	917 e.classes.add(DART_REMOVED);

815 }	918 }

816 }	919 }

817	920

818 for (String prop in pmap.getKeys()) {	921 for (String prop in pmap.getKeys()) {

819 Element elem = pmap[prop];	922 Element elem = pmap[prop];

820 bool obsolete = false;	923 bool obsolete = false;

821 final parse = filteredHtml(	924 final parse = filteredHtml(

822 elem, match, prop,	925 elem, match, prop,

823 (Element e) {	926 (Element e) {

824 obsolete = isObsolete(e);	927 obsolete = isObsolete(e);

825 });	928 });

826 Map entry = {	929 Map entry = {

827 "url" : parse.url,	930 "url" : parse.url,

828 "name" : prop,	931 "name" : prop,

829 "help" : parse.html,	932 "help" : parse.html,

830 "obsolete" : obsolete	933 "obsolete" : obsolete

831 //"jsSignature" : nameStr

832 };	934 };

833 if (parse.idl.length > 0) {	935 if (parse.idl.length > 0) {

834 entry["idl"] = parse.idl;	936 entry["idl"] = parse.idl;

835 }	937 }

836 cleanupEntry(members, entry);	938 cleanupEntry(members, entry);

837 }	939 }

838 }	940 }

839 }	941 }

840	942

841 String trimHtml(String html) {	943 String trimHtml(String html) {

842 // TODO(jacobr): impl.	944 // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc.

843 return html;	945 return html;

844 }	946 }

845	947

846 bool maybeName(String name) {	948 bool maybeName(String name) {

847 RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$");	949 return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) \|\|

848 if (nameRegExp.hasMatch(name)) return true;	950 const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name);

849 RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$");

850 if (constRegExp.hasMatch(name)) return true;

851 }	951 }

852	952

	953 // TODO(jacobr): this element is ugly at the moment but will become easier to

	954 // read once ElementList supports most of the Element functionality.

853 void markRemoved(var e) {	955 void markRemoved(var e) {

854 if (e != null) {	956 if (e != null) {

855 // TODO( remove)

856 if (e is Element) {	957 if (e is Element) {

857 e.classes.add(DART_REMOVED);	958 e.classes.add(DART_REMOVED);

858 } else {	959 } else {

859 for (Element el in e) {	960 for (Element el in e) {

860 el.classes.add(DART_REMOVED);	961 el.classes.add(DART_REMOVED);

861 }	962 }

862 }	963 }

863 }	964 }

864 }	965 }

865	966

	967 // TODO(jacobr): remove this when the dartium JSON parser handles \n correctly.

866 String JSONFIXUPHACK(String value) {	968 String JSONFIXUPHACK(String value) {

867 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX");	969 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX");

868 }	970 }

869	971

870 String mozToWebkit(String name) {	972 String mozToWebkit(String name) {

871 RegExp regExp = new RegExp("^moz");	973 return name.replaceFirst(const RegExp("^moz"), "webkit");

872 name = name.replaceFirst(regExp, "webkit");

873 return name;

874 }	974 }

875	975

876 String stripWebkit(String name) {	976 String stripWebkit(String name) {

877 return trimPrefix(name, "webkit");	977 return trimPrefix(name, "webkit");

878 }	978 }

879	979

	980 // TODO(jacobr): be more principled about this.

880 String fullNameCleanup(String name) {	981 String fullNameCleanup(String name) {

881 int parenIndex = name.indexOf('(');	982 int parenIndex = name.indexOf('(');

882 if (parenIndex != -1) {	983 if (parenIndex != -1) {

883 // TODO(jacobr): workaround bug in:

884 // name = name.split("(")[0];

885 name = name.substring(0, parenIndex);	984 name = name.substring(0, parenIndex);

886 }	985 }

887 name = name.split(" ")[0];	986 name = name.split(" ")[0];

888 name = name.split("\n")[0];	987 name = name.split("\n")[0];

889 name = name.split("\t")[0];	988 name = name.split("\t")[0];

890 name = name.split("*")[0];	989 name = name.split("*")[0];

891 name = name.trim();	990 name = name.trim();

892 name = safeNameCleanup(name);	991 name = safeNameCleanup(name);

893 return name;	992 return name;

894 }	993 }

895	994

896 // Less agressive than the full cleanup to avoid overeager matching of	995 // Less agressive than the full name cleanup to avoid overeager matching.

897 // everytyhing	996 // TODO(jacobr): be more principled about this.

898 String safeNameCleanup(String name) {	997 String safeNameCleanup(String name) {

899 int parenIndex = name.indexOf('(');	998 int parenIndex = name.indexOf('(');

900 if (parenIndex != -1 && name.indexOf(")") != -1) {	999 if (parenIndex != -1 && name.indexOf(")") != -1) {

901 // TODO(jacobr): workaround bug in:	1000 // TODO(jacobr): workaround bug in:

902 // name = name.split("(")[0];	1001 // name = name.split("(")[0];

903 name = name.substring(0, parenIndex);	1002 name = name.substring(0, parenIndex);

904 }	1003 }

905 name = name.trim();	1004 name = name.trim();

906 name = trimPrefix(name, currentType + ".");	1005 name = trimPrefix(name, currentType + ".");

907 name = trimPrefix(name, currentType.toLowerCase() + ".");	1006 name = trimPrefix(name, currentType.toLowerCase() + ".");

908 name = trimPrefix(name, currentTypeShort + ".");	1007 name = trimPrefix(name, currentTypeShort + ".");

909 name = trimPrefix(name, currentTypeShort.toLowerCase() + ".");	1008 name = trimPrefix(name, currentTypeShort.toLowerCase() + ".");

910 name = trimPrefix(name, currentTypeTiny + ".");	1009 name = trimPrefix(name, currentTypeTiny + ".");

911 name = trimPrefix(name, currentTypeTiny.toLowerCase() + ".");	1010 name = trimPrefix(name, currentTypeTiny.toLowerCase() + ".");

912 name = name.trim();	1011 name = name.trim();

913 name = mozToWebkit(name);	1012 name = mozToWebkit(name);

914 return name;	1013 return name;

915 }	1014 }

916	1015

	1016 /**

	1017 * Remove h1, h2, and h3 headers.

	1018 */

917 void removeHeaders(DocumentFragment fragment) {	1019 void removeHeaders(DocumentFragment fragment) {

918 for (Element e in fragment.queryAll("h1, h2, h3")) {	1020 for (Element e in fragment.queryAll("h1, h2, h3")) {

919 e.remove();	1021 e.remove();

920 }	1022 }

921 }	1023 }

922	1024

	1025 /**

	1026 * Given an [entry] representing a single method or property cleanup the

	1027 * values performing some simple normalization and only adding the entry to

	1028 * [members] if it has a valid name.

	1029 */

923 void cleanupEntry(List members, Map entry) {	1030 void cleanupEntry(List members, Map entry) {

924 if (entry.containsKey('help')) {	1031 if (entry.containsKey('help')) {

925 entry['help'] = trimHtml(entry['help']);	1032 entry['help'] = trimHtml(entry['help']);

926 }	1033 }

927 String name = fullNameCleanup(entry['name']);	1034 String name = fullNameCleanup(entry['name']);

928 entry['name'] = name;	1035 entry['name'] = name;

929 if (maybeName(name)) {	1036 if (maybeName(name)) {

930 for (String key in entry.getKeys()) {	1037 for (String key in entry.getKeys()) {

931 var value = entry[key];	1038 var value = entry[key];

932 if (value == null) {	1039 if (value == null) {

(...skipping 10 matching lines...) Expand all Loading...
943	1050

944 // TODO(jacobr) dup with trim start....	1051 // TODO(jacobr) dup with trim start....

945 String trimPrefix(String str, String prefix) {	1052 String trimPrefix(String str, String prefix) {

946 if (str.indexOf(prefix) == 0) {	1053 if (str.indexOf(prefix) == 0) {

947 return str.substring(prefix.length);	1054 return str.substring(prefix.length);

948 } else {	1055 } else {

949 return str;	1056 return str;

950 }	1057 }

951 }	1058 }

952	1059

953 void resourceLoaded() {

954 if (data != null) run();

955 }

956

957 String trimStart(String str, String start) {	1060 String trimStart(String str, String start) {

958 if (str.startsWith(start) && str.length > start.length) {	1061 if (str.startsWith(start) && str.length > start.length) {

959 return str.substring(start.length);	1062 return str.substring(start.length);

960 }	1063 }

961 return str;	1064 return str;

962 }	1065 }

963	1066

964 String trimEnd(String str, String end) {	1067 String trimEnd(String str, String end) {

965 if (str.endsWith(end) && str.length > end.length) {	1068 if (str.endsWith(end) && str.length > end.length) {

966 return str.substring(0, str.length - end.length);	1069 return str.substring(0, str.length - end.length);

967 }	1070 }

968 return str;	1071 return str;

969 }	1072 }

970	1073

	1074 /**

	1075 * Extract a section with name [key] using [selector] to find start points for

	1076 * the section in the document.

	1077 */

971 void extractSection(String selector, String key) {	1078 void extractSection(String selector, String key) {

972 for (Element e in document.queryAll(selector)) {	1079 for (Element e in document.queryAll(selector)) {

973 e = e.parent;	1080 e = e.parent;

974 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) {	1081 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) {

975 skip.remove();	1082 skip.remove();

976 }	1083 }

977 String html = filteredHtml(e, e, null, removeHeaders).html;	1084 String html = filteredHtml(e, e, null, removeHeaders).html;

978 if (html.length > 0) {	1085 if (html.length > 0) {

979 if (dbEntry.containsKey(key)) {	1086 if (dbEntry.containsKey(key)) {

980 dbEntry[key] += html;	1087 dbEntry[key] += html;

981 } else {	1088 } else {

982 dbEntry[key] = html;	1089 dbEntry[key] = html;

983 }	1090 }

984 }	1091 }

985 e.classes.add(DART_REMOVED);	1092 e.classes.add(DART_REMOVED);

986 }	1093 }

987 }	1094 }

988	1095

989 void run() {	1096 void run() {

990 // Inject CSS to insure lines don't wrap unless it was intentional.	1097 // Inject CSS to ensure lines don't wrap unless they were intended to.

	1098 // This is needed to make the logic to determine what is a single line

	1099 // behave consistently even for very long method names.

991 document.head.nodes.add(new Element.html("""	1100 document.head.nodes.add(new Element.html("""

992 <style type="text/css">	1101 <style type="text/css">

993 body {	1102 body {

994 width: 10000px;	1103 width: 10000px;

995 }	1104 }

996 </style>"""));	1105 </style>"""));

997	1106

998 String title = trimEnd(window.document.title.trim(), " - MDN");	1107 String title = trimEnd(window.document.title.trim(), " - MDN");

999 dbEntry['title'] = title;	1108 dbEntry['title'] = title;

1000	1109

1001 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best	1110 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best

1002 // place to do this...	1111 // place to do this...

	1112 // TODO(jacobr): move this to right before we extract HTML.

1003	1113

1004 // Remove the "Introduced in HTML <version>" boxes.	1114 // Remove the "Introduced in HTML <version>" boxes.

1005 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) {	1115 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) {

1006 e.remove();	1116 e.remove();

1007 }	1117 }

1008	1118

1009 // Flatten the list of known DOM types into a faster and case-insensitive map.	1119 // Flatten the list of known DOM types into a faster and case-insensitive

	1120 // map.

1010 domTypes = {};	1121 domTypes = {};

1011 for (final domType in domTypesRaw) {	1122 for (final domType in domTypesRaw) {

1012 domTypes[domType.toLowerCase()] = domType;	1123 domTypes[domType.toLowerCase()] = domType;

1013 }	1124 }

1014	1125

1015 // Fix up links.	1126 // Fix up links.

1016 final SHORT_LINK = const RegExp(@'^[\w/]+$');	1127 final SHORT_LINK = const RegExp(@'^[\w/]+$');

1017 final INNER_LINK = const RegExp(@'[Ee]n/(?:[\w/]+/\|)([\w#.]+)(?:)?$');	1128 final INNER_LINK = const RegExp(@'[Ee]n/(?:[\w/]+/\|)([\w#.]+)(?:)?$');

1018 final MEMBER_LINK = const RegExp(@'(\w+)[.#](\w+)');	1129 final MEMBER_LINK = const RegExp(@'(\w+)[.#](\w+)');

1019 final RELATIVE_LINK = const RegExp(@'^(?:../)*/?[Ee][Nn]/(.+)');	1130 final RELATIVE_LINK = const RegExp(@'^(?:../)*/?[Ee][Nn]/(.+)');

1020	1131

1021 // - Make relative links absolute.	1132 // - Make relative links absolute.

1022 // - If we can, take links that point to other MDN pages and retarget them	1133 // - If we can, take links that point to other MDN pages and retarget them

1023 // to appropriate pages in our docs.	1134 // to appropriate pages in our docs.

1024 // TODO(rnystrom): Add rel external to links we didn't fix.	1135 // TODO(rnystrom): Add rel external to links we didn't fix.

1025 for (AnchorElement a in document.queryAll('a')) {	1136 for (AnchorElement a in document.queryAll('a')) {

1026 // Get the raw attribute because we don't want the browser to fully-	1137 // Get the raw attribute because we don't want the browser to fully-

1027 // qualify the name for us since it has the wrong base address for the page.	1138 // qualify the name for us since it has the wrong base address for the

	1139 // page.

1028 var href = a.attributes['href'];	1140 var href = a.attributes['href'];

1029	1141

1030 // Ignore busted links.	1142 // Ignore busted links.

1031 if (href == null) continue;	1143 if (href == null) continue;

1032	1144

1033 // If we can recognize what it's pointing to, point it to our page instead.	1145 // If we can recognize what it's pointing to, point it to our page instead.

1034 tryToLinkToRealType(maybeType) {	1146 tryToLinkToRealType(maybeType) {

1035 // See if we know a type with that name.	1147 // See if we know a type with that name.

1036 final realType = domTypes[maybeType.toLowerCase()];	1148 final realType = domTypes[maybeType.toLowerCase()];

1037 if (realType != null) {	1149 if (realType != null) {

(...skipping 25 matching lines...) Expand all Loading...
1063 tryToLinkToRealType(member[1]);	1175 tryToLinkToRealType(member[1]);

1064 } else {	1176 } else {

1065 tryToLinkToRealType(match[1]);	1177 tryToLinkToRealType(match[1]);

1066 }	1178 }

1067 }	1179 }

1068	1180

1069 // Put it back into the element.	1181 // Put it back into the element.

1070 a.attributes['href'] = href;	1182 a.attributes['href'] = href;

1071 }	1183 }

1072	1184

1073 if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) {	1185 if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) {

1074 bool foundMatch = false;	1186 bool foundMatch = false;

1075 // Test out if the title is really an HTML tag that matches the	1187 // Test out if the title is really an HTML tag that matches the

1076 // current class name.	1188 // current class name.

1077 for (String tag in [title.split(" ")[0], title.split(".").last()]) {	1189 for (String tag in [title.split(" ")[0], title.split(".").last()]) {

1078 try {	1190 try {

1079 dom.Element element = dom.document.createElement(tag);	1191 dom.Element element = dom.document.createElement(tag);

	1192 // TODO(jacobr): this is a really ugly way of doing this that will

	1193 // stop working at some point soon.

1080 if (element.typeName == currentType) {	1194 if (element.typeName == currentType) {

1081 foundMatch = true;	1195 foundMatch = true;

1082 break;	1196 break;

1083 }	1197 }

1084 } catch(e) {}	1198 } catch(e) {}

1085 }	1199 }

1086 if (foundMatch == false) {	1200 if (!foundMatch) {

1087 dbEntry['skipped'] = true;	1201 dbEntry['skipped'] = true;

1088 dbEntry['cause'] = "Suspect title";	1202 dbEntry['cause'] = "Suspect title";

1089 onEnd();	1203 onEnd();

1090 return;	1204 return;

1091 }	1205 }

1092 }	1206 }

1093	1207

1094 Element root = document.query(".pageText");	1208 Element root = document.query(".pageText");

1095 if (root == null) {	1209 if (root == null) {

1096 dbEntry['cause'] = '.pageText not found';	1210 dbEntry['cause'] = '.pageText not found';

1097 onEnd();	1211 onEnd();

1098 return;	1212 return;

1099 }	1213 }

1100	1214

1101 markRemoved(root.query("#Notes"));	1215 markRemoved(root.query("#Notes"));

1102 List members = dbEntry['members'];	1216 List members = dbEntry['members'];

1103	1217

	1218 // This is a laundry list of CSS selectors for boilerplate content on the

	1219 // MDN pages that we should ignore for the purposes of extracting

	1220 // documentation.

1104 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar"));	1221 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar"));

1105 markRemoved(document.queryAll("#article-nav"));	1222 markRemoved(document.queryAll("#article-nav"));

1106 markRemoved(document.queryAll(".hideforedit"));	1223 markRemoved(document.queryAll(".hideforedit"));

1107 markRemoved(document.queryAll(".navbox"));	1224 markRemoved(document.queryAll(".navbox"));

1108 markRemoved(document.query("#Method_overview"));	1225 markRemoved(document.query("#Method_overview"));

1109 markRemoved(document.queryAll("h1, h2"));	1226 markRemoved(document.queryAll("h1, h2"));

1110	1227

1111 scrapeSection(root, "#Methods", currentType, members, 'methods');	1228 scrapeSection(root, "#Methods", currentType, members, 'methods');

1112 scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants');	1229 scrapeSection(root, "#Constants, #Error_codes, #State_constants",

	1230 currentType, members, 'constants');

1113 // TODO(jacobr): infer tables based on multiple matches rather than	1231 // TODO(jacobr): infer tables based on multiple matches rather than

1114 // using a hard coded list of section ids.	1232 // using a hard coded list of section ids.

1115 scrapeSection(root,	1233 scrapeSection(root,

1116 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_prope rties, #Event_handlers, #Event_Handlers",	1234 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " +

	1235 "#DOM_properties, #Event_handlers, #Event_Handlers",

1117 currentType, members, 'properties');	1236 currentType, members, 'properties');

1118	1237

1119 // Avoid doing this till now to avoid messing up the section scrape.	1238 // Avoid doing this till now to avoid messing up the section scrape.

1120 markRemoved(document.queryAll("h3"));	1239 markRemoved(document.queryAll("h3"));

1121	1240

1122 ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]");	1241 ElementList examples = root.queryAll("span[id^=example], span[id^=Example]");

1123	1242

1124 extractSection("#See_also", 'seeAlso');	1243 extractSection("#See_also", 'seeAlso');

1125 extractSection("#Specification, #Specifications", "specification");	1244 extractSection("#Specification, #Specifications", "specification");

1126 // $("#Methods").parent().remove(); // not safe (e.g. Document)

1127	1245

1128 // TODO(jacobr): actually extract the constructor(s)	1246 // TODO(jacobr): actually extract the constructor(s)

1129 extractSection("#Constructor, #Constructors", 'constructor');	1247 extractSection("#Constructor, #Constructors", 'constructor');

1130 extractSection("#Browser_compatibility, #Compatibility", 'compatibility');	1248 extractSection("#Browser_compatibility, #Compatibility", 'compatibility');

1131	1249

	1250 // Extract examples.

1132 List<String> exampleHtml = [];	1251 List<String> exampleHtml = [];

1133 for (Element e in $examples) {	1252 for (Element e in examples) {

1134 e.classes.add(DART_REMOVED);	1253 e.classes.add(DART_REMOVED);

1135 }	1254 }

1136 for (Element e in $examples) {	1255 for (Element e in examples) {

1137 String html = filteredHtml(e, root, null,	1256 String html = filteredHtml(e, root, null,

1138 (DocumentFragment fragment) {	1257 (DocumentFragment fragment) {

1139 removeHeaders(fragment);	1258 removeHeaders(fragment);

1140 if (fragment.text.trim().toLowerCase() == "example") {	1259 if (fragment.text.trim().toLowerCase() == "example") {

1141 // Degenerate example.	1260 // Degenerate example.

1142 fragment.nodes.clear();	1261 fragment.nodes.clear();

1143 }	1262 }

1144 }).html;	1263 }).html;

1145 if (html.length > 0) {	1264 if (html.length > 0) {

1146 exampleHtml.add(html);	1265 exampleHtml.add(html);

1147 }	1266 }

1148 }	1267 }

1149 if (exampleHtml.length > 0) {	1268 if (exampleHtml.length > 0) {

1150 dbEntry['examples'] = exampleHtml;	1269 dbEntry['examples'] = exampleHtml;

1151 }	1270 }

1152	1271

	1272 // Extract the class summary.

	1273 // Basically everything left over after the #Summary or #Description tag is

	1274 // safe to include in the summary.

1153 StringBuffer summary = new StringBuffer();	1275 StringBuffer summary = new StringBuffer();

1154

1155 for (Element e in root.queryAll("#Summary, #Description")) {	1276 for (Element e in root.queryAll("#Summary, #Description")) {

1156 summary.add(filteredHtml(root, e, null, removeHeaders).html);	1277 summary.add(filteredHtml(root, e, null, removeHeaders).html);

1157 }	1278 }

1158	1279

1159 if (summary.length == 0) {	1280 if (summary.length == 0) {

1160 // Remove the "Gecko DOM Reference text"	1281 // Remove the "Gecko DOM Reference text"

1161 Element ref = root.query(".lang.lang-en");	1282 Element ref = root.query(".lang.lang-en");

1162 if (ref != null) {	1283 if (ref != null) {

1163 ref = ref.parent;	1284 ref = ref.parent;

1164 String refText = ref.text.trim();	1285 String refText = ref.text.trim();

1165 if (refText == "Gecko DOM Reference" \|\|	1286 if (refText == "Gecko DOM Reference" \|\|

1166 refText == "« Gecko DOM Reference") {	1287 refText == "« Gecko DOM Reference") {

1167 ref.remove();	1288 ref.remove();

1168 }	1289 }

1169 }	1290 }

1170 // Risky... this might add stuff we shouldn't.	1291 // Risky... this might add stuff we shouldn't.

1171 summary.add(filteredHtml(root, root, null, removeHeaders).html);	1292 summary.add(filteredHtml(root, root, null, removeHeaders).html);

1172 }	1293 }

1173	1294

1174 if (summary.length > 0) {	1295 if (summary.length > 0) {

1175 dbEntry['summary'] = summary.toString();	1296 dbEntry['summary'] = summary.toString();

1176 }	1297 }

1177	1298

1178 // Inject CSS to aid debugging in the browser.	1299 // Inject CSS to aid debugging in the browser.

	1300 // We could avoid doing this if we know we are not running in a browser..

1179 document.head.nodes.add(new Element.html(DEBUG_CSS));	1301 document.head.nodes.add(new Element.html(DEBUG_CSS));

1180	1302

1181 onEnd();	1303 onEnd();

1182 }	1304 }

1183	1305

1184 void main() {	1306 void main() {

1185 window.on.load.add(documentLoaded);	1307 window.on.load.add(documentLoaded);

1186 }	1308 }

1187	1309

1188 void documentLoaded(event) {	1310 void documentLoaded(event) {

	1311 // Load the database of expected methods and properties with an

	1312 // XMLHttpRequest.

1189 new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) {	1313 new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) {

1190 data = JSON.parse(req.responseText);	1314 data = JSON.parse(req.responseText);

1191 dbEntry = {'members': [], 'srcUrl': pageUrl};	1315 dbEntry = {'members': [], 'srcUrl': pageUrl};

1192 resourceLoaded();	1316 run();

1193 });	1317 });

1194 }	1318 }

OLD	NEW

« no previous file with comments | « utils/apidoc/mdn/crawl.js ('k') | utils/apidoc/mdn/extract.sh » ('j') | no next file with comments »