OLD | NEW |
1 #import ("dart:html"); | 1 #import ("dart:html"); |
2 #import ("dart:htmlimpl"); | 2 #import ("dart:htmlimpl"); |
3 #import ("dart:dom", prefix:"dom"); | 3 #import ("dart:dom", prefix:"dom"); |
4 #import ("dart:json"); | 4 #import ("dart:json"); |
5 | 5 |
6 // Workaround for HTML lib missing feature. | 6 // Workaround for HTML lib missing feature. |
7 Range newRange() { | 7 Range newRange() { |
8 return LevelDom.wrapRange(dom.document.createRange()); | 8 return LevelDom.wrapRange(dom.document.createRange()); |
9 } | 9 } |
10 | 10 |
11 // Temporary range object to optimize performance computing client rects | 11 // Temporary range object to optimize performance computing client rects |
12 // from text nodes. | 12 // from text nodes. |
13 Range _tempRange; | 13 Range _tempRange; |
14 // Hacks because ASYNC measurement is annoying when just writing a script. | 14 // Hacks because ASYNC measurement is annoying when just writing a script. |
15 ClientRect getClientRect(Node n) { | 15 ClientRect getClientRect(Node n) { |
16 if (n is Element) { | 16 if (n is Element) { |
17 Element e = n; | 17 dom.Element raw = unwrapDomObject(n.dynamic); |
18 dom.Element raw = unwrapDomObject(e.dynamic); | |
19 return LevelDom.wrapClientRect(raw.getBoundingClientRect()); | 18 return LevelDom.wrapClientRect(raw.getBoundingClientRect()); |
20 } else { | 19 } else { |
21 // Crazy hacks that works for nodes.... create a range and measure it. | 20 // Crazy hacks that works for nodes.... create a range and measure it. |
22 if (_tempRange == null) { | 21 if (_tempRange == null) { |
23 _tempRange = newRange(); | 22 _tempRange = newRange(); |
24 } | 23 } |
25 _tempRange.setStartBefore(n); | 24 _tempRange.setStartBefore(n); |
26 _tempRange.setEndAfter(n); | 25 _tempRange.setEndAfter(n); |
27 return _tempRange.getBoundingClientRect(); | 26 return _tempRange.getBoundingClientRect(); |
28 } | 27 } |
29 } | 28 } |
30 | 29 |
31 final DART_REMOVED = "dart_removed"; | 30 /** |
| 31 * CSS class that is added to elements in the DOM to indicate that they should |
| 32 * be removed when extracting blocks of documentation. This is helpful when |
| 33 * running this script in a web browser as it is easy to visually see what |
| 34 * blocks of information were extracted when using CSS such as DEBUG_CSS |
| 35 * which highlights elements that should be removed. |
| 36 */ |
| 37 final DART_REMOVED = "dart-removed"; |
32 | 38 |
33 final DEBUG_CSS = """ | 39 final DEBUG_CSS = """ |
34 <style type="text/css"> | 40 <style type="text/css"> |
35 .dart_removed { | 41 .dart-removed { |
36 background-color: rgba(255, 0, 0, 0.5); | 42 background-color: rgba(255, 0, 0, 0.5); |
37 } | 43 } |
38 </style>"""; | 44 </style>"""; |
39 | 45 |
40 final MIN_PIXELS_DIFFERENT_LINES = 10; | 46 final MIN_PIXELS_DIFFERENT_LINES = 10; |
41 | 47 |
42 final IDL_SELECTOR = "pre.eval, pre.idl"; | 48 final IDL_SELECTOR = "pre.eval, pre.idl"; |
43 | 49 |
44 Map data; | 50 Map data; |
45 | 51 |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
274 if (path.startsWith('/')) { | 280 if (path.startsWith('/')) { |
275 return "$pageDomain$path"; | 281 return "$pageDomain$path"; |
276 } else if (path.startsWith("#")) { | 282 } else if (path.startsWith("#")) { |
277 return "$pageUrl$path"; | 283 return "$pageUrl$path"; |
278 } else { | 284 } else { |
279 return "$pageDir$path"; | 285 return "$pageDir$path"; |
280 } | 286 } |
281 } | 287 } |
282 | 288 |
283 bool inTable(Node n) { | 289 bool inTable(Node n) { |
284 while(n != null) { | 290 while (n != null) { |
285 if (n is TableElement) return true; | 291 if (n is TableElement) return true; |
286 n = n.parent; | 292 n = n.parent; |
287 } | 293 } |
288 return false; | 294 return false; |
289 } | 295 } |
290 | 296 |
291 String escapeHTML(str) { | 297 String escapeHTML(str) { |
292 Element e = new Element.tag("div"); | 298 Element e = new Element.tag("div"); |
293 e.text = str; | 299 e.text = str; |
294 return e.innerHTML; | 300 return e.innerHTML; |
295 } | 301 } |
296 | 302 |
297 List<Text> getAllTextNodes(Element elem) { | 303 List<Text> getAllTextNodes(Element elem) { |
298 List<Text> nodes = <Text>[]; | 304 final nodes = <Text>[]; |
299 helper(Node n) { | 305 helper(Node n) { |
300 if (n is Text) { | 306 if (n is Text) { |
301 nodes.add(n); | 307 nodes.add(n); |
302 } else { | 308 } else { |
303 for (Node child in n.nodes) { | 309 for (Node child in n.nodes) { |
304 helper(child); | 310 helper(child); |
305 } | 311 } |
306 } | 312 } |
307 }; | 313 }; |
308 | 314 |
309 helper(elem); | 315 helper(elem); |
310 return nodes; | 316 return nodes; |
311 } | 317 } |
312 | 318 |
313 /** | 319 /** |
314 * Whether a node and its children are all types that are safe to skip if the | 320 * Whether a node and its children are all types that are safe to skip if the |
315 * nodes have no text content. | 321 * nodes have no text content. |
316 */ | 322 */ |
317 bool isSkippableType(Node n) { | 323 bool isSkippableType(Node n) { |
318 // TODO(jacobr): are there any types we don't want to skip even if they | 324 // TODO(jacobr): are there any types we don't want to skip even if they |
319 // have no text content? | 325 // have no text content? |
320 if (n is ImageElement || n is CanvasElement || n is InputElement | 326 if (n is ImageElement || n is CanvasElement || n is InputElement |
321 || n is ObjectElement) { | 327 || n is ObjectElement) { |
322 return false; | 328 return false; |
323 } | 329 } |
324 if (n is Text) return true; | 330 if (n is Text) return true; |
325 | 331 |
326 for (Node child in n.nodes) { | 332 for (final child in n.nodes) { |
327 if (isSkippableType(child) == false) { | 333 if (!isSkippableType(child)) { |
328 return false; | 334 return false; |
329 } | 335 } |
330 } | 336 } |
331 return true; | 337 return true; |
332 } | 338 } |
333 | 339 |
334 bool isSkippable(Node n) { | 340 bool isSkippable(Node n) { |
335 if (!isSkippableType(n)) return false; | 341 if (!isSkippableType(n)) return false; |
336 return n.text.trim().length == 0; | 342 return n.text.trim().length == 0; |
337 } | 343 } |
338 | 344 |
339 void onEnd() { | 345 void onEnd() { |
340 // Hideous hack to send JSON back to JS. | 346 // Hideous hack to send JSON back to JS. |
341 String dbJson = JSON.stringify(dbEntry); | 347 String dbJson = JSON.stringify(dbEntry); |
342 // workaround bug in JSON parser. | 348 // workaround bug in JSON parser. |
343 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n"); | 349 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n"); |
344 | 350 |
| 351 // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple |
| 352 // isolate based Dart-JS interop solution in the future. |
345 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*"); | 353 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*"); |
346 } | 354 } |
347 | 355 |
348 class SectionParseResult { | 356 class SectionParseResult { |
349 final String html; | 357 final String html; |
350 final String url; | 358 final String url; |
351 final String idl; | 359 final String idl; |
352 SectionParseResult(this.html, this.url, this.idl); | 360 SectionParseResult(this.html, this.url, this.idl); |
353 } | 361 } |
354 | 362 |
355 String genCleanHtml(Element root) { | 363 String genCleanHtml(Element root) { |
356 for (Element e in root.queryAll(".$DART_REMOVED")) { | 364 for (final e in root.queryAll(".$DART_REMOVED")) { |
357 e.classes.remove(DART_REMOVED); | 365 e.classes.remove(DART_REMOVED); |
358 } | 366 } |
359 | 367 |
360 // Ditch inline styles. | 368 // Ditch inline styles. |
361 for (Element e in root.queryAll('[style]')) { | 369 for (final e in root.queryAll('[style]')) { |
362 e.attributes.remove('style'); | 370 e.attributes.remove('style'); |
363 } | 371 } |
364 | 372 |
365 // These elements are just tags that we should suppress. | 373 // These elements are just tags that we should suppress. |
366 for (Element e in root.queryAll(".lang.lang-en")) { | 374 for (final e in root.queryAll(".lang.lang-en")) { |
367 e.remove(); | 375 e.remove(); |
368 } | 376 } |
369 | 377 |
| 378 Element parametersHeader; |
| 379 Element returnValueHeader; |
| 380 for (final e in root.queryAll("h6")) { |
| 381 if (e.text == 'Parameters') { |
| 382 parametersHeader = e; |
| 383 } else if (e.text == 'Return value') { |
| 384 returnValueHeader = e; |
| 385 } |
| 386 } |
| 387 |
| 388 if (parametersHeader != null) { |
| 389 int numEmptyParameters = 0; |
| 390 final parameterDescriptions = root.queryAll("dd"); |
| 391 for (Element parameterDescription in parameterDescriptions) { |
| 392 if (parameterDescription.text.trim().length == 0) { |
| 393 numEmptyParameters++; |
| 394 } |
| 395 } |
| 396 if (numEmptyParameters > 0 && |
| 397 numEmptyParameters == parameterDescriptions.length) { |
| 398 // Remove the parameter list as it adds zero value as all descriptions |
| 399 // are empty. |
| 400 parametersHeader.remove(); |
| 401 for (final e in root.queryAll("dl")) { |
| 402 e.remove(); |
| 403 } |
| 404 } else if (parameterDescriptions.length == 0 && |
| 405 parametersHeader.nextElementSibling != null && |
| 406 parametersHeader.nextElementSibling.text.trim() == 'None.') { |
| 407 // No need to display that the function takes 0 parameters. |
| 408 parametersHeader.nextElementSibling.remove(); |
| 409 parametersHeader.remove(); |
| 410 } |
| 411 } |
| 412 |
| 413 // Heuristic: if the return value is a single word it is a type name not a |
| 414 // useful text description so suppress it. |
| 415 if (returnValueHeader != null && |
| 416 returnValueHeader.nextElementSibling != null && |
| 417 returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) { |
| 418 returnValueHeader.nextElementSibling.remove(); |
| 419 returnValueHeader.remove(); |
| 420 } |
| 421 |
370 bool changed = true; | 422 bool changed = true; |
371 while (changed) { | 423 while (changed) { |
372 changed = false; | 424 changed = false; |
373 while (root.nodes.length == 1) { | 425 while (root.nodes.length == 1 && root.nodes.first is Element) { |
374 Node child = root.nodes.first; | 426 root = root.nodes.first; |
375 if (child is Element) { | 427 changed = true; |
376 root = child; | |
377 changed = true; | |
378 } else { | |
379 // Just calling innerHTML on the parent will be sufficient... | |
380 // and insures the output is properly escaped. | |
381 break; | |
382 } | |
383 } | 428 } |
384 | 429 |
385 // Trim useless nodes from the front. | 430 // Trim useless nodes from the front. |
386 while(root.nodes.length > 0 && | 431 while (root.nodes.length > 0 && |
387 isSkippable(root.nodes.first)) { | 432 isSkippable(root.nodes.first)) { |
388 root.nodes.first.remove(); | 433 root.nodes.first.remove(); |
389 changed = true; | 434 changed = true; |
390 } | 435 } |
391 | 436 |
392 // Trim useless nodes from the back. | 437 // Trim useless nodes from the back. |
393 while(root.nodes.length > 0 && | 438 while (root.nodes.length > 0 && |
394 isSkippable(root.nodes.last())) { | 439 isSkippable(root.nodes.last())) { |
395 root.nodes.last().remove(); | 440 root.nodes.last().remove(); |
396 changed = true; | 441 changed = true; |
397 } | 442 } |
398 } | 443 } |
399 return JSONFIXUPHACK(root.innerHTML); | 444 return JSONFIXUPHACK(root.innerHTML); |
400 } | 445 } |
401 | 446 |
402 String genPrettyHtml(DocumentFragment fragment) { | |
403 return genCleanHtml(fragment); | |
404 } | |
405 | |
406 String genPrettyHtmlFromElement(Element e) { | 447 String genPrettyHtmlFromElement(Element e) { |
407 e = e.clone(true); | 448 e = e.clone(true); |
408 return genCleanHtml(e); | 449 return genCleanHtml(e); |
409 } | 450 } |
410 | 451 |
411 class PostOrderTraversalIterator implements Iterator<Node> { | 452 class PostOrderTraversalIterator implements Iterator<Node> { |
412 | 453 |
413 Node _next; | 454 Node _next; |
414 | 455 |
415 PostOrderTraversalIterator(Node start) { | 456 PostOrderTraversalIterator(Node start) { |
416 _next = _leftMostDescendent(start); | 457 _next = _leftMostDescendent(start); |
417 } | 458 } |
418 | 459 |
419 bool hasNext() => _next != null; | 460 bool hasNext() => _next != null; |
420 | 461 |
421 Node next() { | 462 Node next() { |
422 if (_next == null) return null; | 463 if (_next == null) return null; |
423 Node ret = _next; | 464 final ret = _next; |
424 if (_next.nextNode != null) { | 465 if (_next.nextNode != null) { |
425 _next = _leftMostDescendent(_next.nextNode); | 466 _next = _leftMostDescendent(_next.nextNode); |
426 } else { | 467 } else { |
427 _next = _next.parent; | 468 _next = _next.parent; |
428 } | 469 } |
429 return ret; | 470 return ret; |
430 } | 471 } |
431 | 472 |
432 static Node _leftMostDescendent(Node n) { | 473 static Node _leftMostDescendent(Node n) { |
433 while (n.nodes.length > 0) { | 474 while (n.nodes.length > 0) { |
434 n = n.nodes.first; | 475 n = n.nodes.first; |
435 } | 476 } |
436 return n; | 477 return n; |
437 } | 478 } |
438 } | 479 } |
439 | 480 |
440 class PostOrderTraversal implements Iterable<Node> { | 481 class PostOrderTraversal implements Iterable<Node> { |
441 final Node _node; | 482 final Node _node; |
442 PostOrderTraversal(this._node); | 483 PostOrderTraversal(this._node); |
443 | 484 |
444 Iterator<Node> iterator() => new PostOrderTraversalIterator(_node); | 485 Iterator<Node> iterator() => new PostOrderTraversalIterator(_node); |
445 } | 486 } |
446 | 487 |
| 488 /** |
| 489 * Estimate what content represents the first line of text within the [section] |
| 490 * range returning null if there isn't a plausible first line of text that |
| 491 * contains the string [prop]. We measure the actual rendered client rectangle |
| 492 * for the text and use heuristics defining how many pixels text can vary by |
| 493 * and still be viewed as being on the same line. |
| 494 */ |
447 Range findFirstLine(Range section, String prop) { | 495 Range findFirstLine(Range section, String prop) { |
448 Range firstLine = newRange(); | 496 final firstLine = newRange(); |
449 firstLine.setStart(section.startContainer, section.startOffset); | 497 firstLine.setStart(section.startContainer, section.startOffset); |
450 | 498 |
451 num maxBottom = null; | 499 num maxBottom = null; |
452 for (Node n in new PostOrderTraversal(section.startContainer)) { | 500 for (final n in new PostOrderTraversal(section.startContainer)) { |
453 int compareResult = section.comparePoint(n, 0); | 501 int compareResult = section.comparePoint(n, 0); |
454 if (compareResult == -1) { | 502 if (compareResult == -1) { |
455 // before range so skip. | 503 // before range so skip. |
456 continue; | 504 continue; |
457 } else if (compareResult > 0) { | 505 } else if (compareResult > 0) { |
458 // After range so exit. | 506 // After range so exit. |
459 break; | 507 break; |
460 } | 508 } |
461 | 509 |
462 final rect = getClientRect(n); | 510 final rect = getClientRect(n); |
463 num bottom = rect.bottom; | 511 num bottom = rect.bottom; |
464 if (rect.height > 0 && rect.width > 0) { | 512 if (rect.height > 0 && rect.width > 0) { |
465 if (maxBottom != null && ( | 513 if (maxBottom != null && |
466 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom | 514 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) { |
467 )) { | |
468 break; | 515 break; |
469 } else if (maxBottom == null || maxBottom > bottom) { | 516 } else if (maxBottom == null || maxBottom > bottom) { |
470 maxBottom = bottom; | 517 maxBottom = bottom; |
471 } | 518 } |
472 } | 519 } |
473 | 520 |
474 firstLine.setEndAfter(n); | 521 firstLine.setEndAfter(n); |
475 } | 522 } |
476 | 523 |
477 if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) { | 524 // If the first line of text in the section does not contain the property |
| 525 // name then we're not confident we are able to extract a high accuracy match |
| 526 // so we should not return anything. |
| 527 if (!firstLine.toString().contains(stripWebkit(prop))) { |
478 return null; | 528 return null; |
479 } | 529 } |
480 return firstLine; | 530 return firstLine; |
481 } | 531 } |
482 | 532 |
| 533 /** Find child anchor elements that contain the text [prop]. */ |
483 AnchorElement findAnchorElement(Element root, String prop) { | 534 AnchorElement findAnchorElement(Element root, String prop) { |
484 for (AnchorElement a in root.queryAll("a")) { | 535 for (AnchorElement a in root.queryAll("a")) { |
485 if (a.text.indexOf(prop) != -1) { | 536 if (a.text.contains(prop)) { |
486 return a; | 537 return a; |
487 } | 538 } |
488 } | 539 } |
489 return null; | 540 return null; |
490 } | 541 } |
491 | 542 |
492 // First surrounding element with an ID is safe enough. | 543 // First surrounding element with an ID is safe enough. |
493 Element findTigherRoot(Element elem, Element root) { | 544 Element findTighterRoot(Element elem, Element root) { |
494 Element candidate = elem; | 545 Element candidate = elem; |
495 while(root != candidate) { | 546 while (root != candidate) { |
496 candidate = candidate.parent; | 547 candidate = candidate.parent; |
497 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { | 548 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { |
498 break; | 549 break; |
499 } | 550 } |
500 } | 551 } |
501 return candidate; | 552 return candidate; |
502 } | 553 } |
503 | 554 |
504 // this is very slow and ugly.. consider rewriting. | 555 // TODO(jacobr): this is very slow and ugly.. consider rewriting or at least |
| 556 // commenting carefully. |
505 SectionParseResult filteredHtml(Element elem, Element root, String prop, | 557 SectionParseResult filteredHtml(Element elem, Element root, String prop, |
506 Function fragmentGeneratedCallback) { | 558 Function fragmentGeneratedCallback) { |
507 // Using a tighter root avoids false positives at the risk of trimming | 559 // Using a tighter root avoids false positives at the risk of trimming |
508 // text we shouldn't. | 560 // text we shouldn't. |
509 root = findTigherRoot(elem, root); | 561 root = findTighterRoot(elem, root); |
510 Range range = newRange(); | 562 final range = newRange(); |
511 range.setStartBefore(elem); | 563 range.setStartBefore(elem); |
512 | 564 |
513 Element current = elem; | 565 Element current = elem; |
514 while (current != null) { | 566 while (current != null) { |
515 range.setEndBefore(current); | 567 range.setEndBefore(current); |
516 if (current.classes.contains(DART_REMOVED)) { | 568 if (current.classes.contains(DART_REMOVED) && |
517 if (range.toString().trim().length > 0) { | 569 range.toString().trim().length > 0) { |
518 break; | 570 break; |
519 } | |
520 } | 571 } |
521 if (current.firstElementChild != null) { | 572 if (current.firstElementChild != null) { |
522 current = current.firstElementChild; | 573 current = current.firstElementChild; |
523 } else { | 574 } else { |
524 while (current != null) { | 575 while (current != null) { |
525 range.setEndAfter(current); | 576 range.setEndAfter(current); |
526 if (current == root) { | 577 if (current == root) { |
527 current = null; | 578 current = null; |
528 break; | 579 break; |
529 } | 580 } |
(...skipping 10 matching lines...) Expand all Loading... |
540 Range firstLine = findFirstLine(range, prop); | 591 Range firstLine = findFirstLine(range, prop); |
541 if (firstLine != null) { | 592 if (firstLine != null) { |
542 range.setStart(firstLine.endContainer, firstLine.endOffset); | 593 range.setStart(firstLine.endContainer, firstLine.endOffset); |
543 DocumentFragment firstLineClone = firstLine.cloneContents(); | 594 DocumentFragment firstLineClone = firstLine.cloneContents(); |
544 AnchorElement anchor = findAnchorElement(firstLineClone, prop); | 595 AnchorElement anchor = findAnchorElement(firstLineClone, prop); |
545 if (anchor != null) { | 596 if (anchor != null) { |
546 url = getAbsoluteUrl(anchor); | 597 url = getAbsoluteUrl(anchor); |
547 } | 598 } |
548 } | 599 } |
549 } | 600 } |
550 DocumentFragment fragment = range.cloneContents(); | 601 final fragment = range.cloneContents(); |
551 if (fragmentGeneratedCallback != null) { | 602 if (fragmentGeneratedCallback != null) { |
552 fragmentGeneratedCallback(fragment); | 603 fragmentGeneratedCallback(fragment); |
553 } | 604 } |
554 // Strip tags we don't want | 605 // Strip tags we don't want |
555 for (Element e in fragment.queryAll("script, object, style")) { | 606 for (Element e in fragment.queryAll("script, object, style")) { |
556 e.remove(); | 607 e.remove(); |
557 } | 608 } |
558 | 609 |
559 // Extract idl | 610 // Extract idl |
560 StringBuffer idl = new StringBuffer(); | 611 final idl = new StringBuffer(); |
561 if (prop != null && prop.length > 0) { | 612 if (prop != null && prop.length > 0) { |
562 // Only expect properties to have HTML. | 613 // Only expect properties to have HTML. |
563 for(Element e in fragment.queryAll(IDL_SELECTOR)) { | 614 for(Element e in fragment.queryAll(IDL_SELECTOR)) { |
564 idl.add(e.outerHTML); | 615 idl.add(e.outerHTML); |
565 e.remove(); | 616 e.remove(); |
566 } | 617 } |
567 // TODO(jacobr) this is a very basic regex to see if text looks like IDL | 618 // TODO(jacobr) this is a very basic regex to see if text looks like IDL |
568 RegExp likelyIdl = new RegExp(" $prop\\w*\\("); | 619 RegExp likelyIdl = new RegExp(" $prop\\w*\\("); |
569 | 620 |
570 for (Element e in fragment.queryAll("pre")) { | 621 for (Element e in fragment.queryAll("pre")) { |
571 // Check if it looks like idl... | 622 // Check if it looks like idl... |
572 String txt = e.text.trim(); | 623 String txt = e.text.trim(); |
573 if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1 | 624 if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) { |
574 && txt.indexOf(")") != -1) { | |
575 idl.add(e.outerHTML); | 625 idl.add(e.outerHTML); |
576 e.remove(); | 626 e.remove(); |
577 } | 627 } |
578 } | 628 } |
579 } | 629 } |
580 return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString()); | 630 return new SectionParseResult(genCleanHtml(fragment), url, idl.toString()); |
581 } | 631 } |
582 | 632 |
583 Element findBest(Element root, List<Text> allText, String prop, String propType)
{ | 633 /** |
584 // Best bet: match an id | 634 * Find the best child element of [root] that appears to be an API definition |
585 Element cand; | 635 * for [prop]. [allText] is a list of all text nodes under root computed by |
586 cand = root.query("#" + prop); | 636 * the caller to improve performance. |
| 637 */ |
| 638 Element findBest(Element root, List<Text> allText, String prop, |
| 639 String propType) { |
| 640 // Best bet: find a child of root where the id matches the property name. |
| 641 Element cand = root.query("#$prop"); |
587 | 642 |
588 if (cand == null && propType == "methods") { | 643 if (cand == null && propType == "methods") { |
589 cand = root.query("[id=" + prop + "\\(\\)]"); | 644 cand = root.query("[id=$prop\\(\\)]"); |
| 645 } |
| 646 while (cand != null && cand.text.trim().length == 0) { |
| 647 // We found the bookmark for the element but sadly it is just an empty |
| 648 // placeholder. Find the first real element. |
| 649 cand = cand.nextElementSibling; |
590 } | 650 } |
591 if (cand != null) { | 651 if (cand != null) { |
592 while (cand != null && cand.text.trim().length == 0) { | 652 return cand; |
593 // We found the bookmark for the element but sadly it is just an empty | |
594 // placeholder. Find the first real element. | |
595 cand = cand.nextElementSibling; | |
596 } | |
597 if (cand != null) { | |
598 return cand; | |
599 } | |
600 } | 653 } |
601 | 654 |
602 // If you are at least 70 pixels from the left, something is definitely fishy
and we shouldn't even consider this candidate. | 655 // If we are at least 70 pixels from the left, something is definitely |
| 656 // fishy and we shouldn't even consider this candidate as nobody visually |
| 657 // formats API docs like that. |
603 num candLeft = 70; | 658 num candLeft = 70; |
604 | 659 |
605 for (Text text in allText) { | 660 for (Text text in allText) { |
606 Element proposed = null; | 661 Element proposed = null; |
607 | 662 |
608 // var t = safeNameCleanup(text.text); | 663 // TODO(jacobr): does it hurt precision to use the full cleanup? |
609 // TODO(jacobr): does it hurt precision to use the full cleanup? | |
610 String t = fullNameCleanup(text.text); | 664 String t = fullNameCleanup(text.text); |
611 if (t == prop) { | 665 if (t == prop) { |
612 proposed = text.parent; | 666 proposed = text.parent; |
613 ClientRect candRect = getClientRect(proposed); | 667 ClientRect candRect = getClientRect(proposed); |
614 | 668 |
615 // TODO(jacobr): this is a good heuristic | 669 // TODO(jacobr): this is a good heuristic |
616 // if (selObj.selector.indexOf(" > DD ") == -1 | 670 // if (selObj.selector.indexOf(" > DD ") == -1 |
617 if (candRect.left < candLeft) { | 671 if (candRect.left < candLeft) { |
618 cand = proposed; | 672 cand = proposed; |
619 candLeft = candRect.left; | 673 candLeft = candRect.left; |
620 } | 674 } |
621 } | 675 } |
622 } | 676 } |
623 return cand; | 677 return cand; |
624 } | 678 } |
625 | 679 |
| 680 /** |
| 681 * Checks whether [e] is tagged as obsolete or deprecated using heuristics |
| 682 * for what these tags look like in the MDN docs. |
| 683 */ |
626 bool isObsolete(Element e) { | 684 bool isObsolete(Element e) { |
627 RegExp obsoleteRegExp = new RegExp(@"(^|\s)obsolete(?=\s|$)"); | 685 RegExp obsoleteRegExp = new RegExp(@"(^|\s)obsolete(?=\s|$)"); |
628 RegExp deprecatedRegExp = new RegExp(@"(^|\s)deprecated(?=\s|$)"); | 686 RegExp deprecatedRegExp = new RegExp(@"(^|\s)deprecated(?=\s|$)"); |
629 for (Element child in e.queryAll("span")) { | 687 for (Element child in e.queryAll("span")) { |
630 String t = child.text.toLowerCase(); | 688 String t = child.text.toLowerCase(); |
631 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true; | 689 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true; |
632 } | 690 } |
633 | 691 |
634 String text = e.text.toLowerCase(); | 692 String text = e.text.toLowerCase(); |
635 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text); | 693 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text); |
636 } | 694 } |
637 | 695 |
638 bool isFirstCharLowerCase(String str) { | 696 bool isFirstCharLowerCase(String str) { |
639 RegExp firstLower = new RegExp("^[a-z]"); | 697 return const RegExp("^[a-z]").hasMatch(str); |
640 return firstLower.hasMatch(str); | |
641 } | 698 } |
642 | 699 |
643 void scrapeSection(Element root, String sectionSelector, | 700 /** |
644 String currentType, | 701 * Extracts information from a fragment of HTML only searching under the [root] |
645 List members, | 702 * html node. [secitonSelector] specifies the query to use to find candidate |
646 String propType) { | 703 * sections of the document to consider (there may be more than one). |
| 704 * [currentType] specifies the name of the current class. [members] specifies |
| 705 * the known class members for this class that we are attempting to find |
| 706 * documentation for. [propType] indicates whether we are searching for |
| 707 * methods, properties, constants, or constructors. |
| 708 */ |
| 709 void scrapeSection(Element root, String sectionSelector, String currentType, |
| 710 List members, String propType) { |
647 Map expectedProps = dartIdl[propType]; | 711 Map expectedProps = dartIdl[propType]; |
648 | 712 |
649 Set<String> alreadyMatchedProperties = new Set<String>(); | 713 Set<String> alreadyMatchedProperties = new Set<String>(); |
650 bool onlyConsiderTables = false; | 714 bool onlyConsiderTables = false; |
651 ElementList allMatches = root.queryAll(sectionSelector); | 715 ElementList allMatches = root.queryAll(sectionSelector); |
652 if (allMatches.length == 0) { | 716 if (allMatches.length == 0) { |
| 717 // If we can't find any matches to the sectionSelector, we fall back to |
| 718 // considering all tables in the document. This is dangerous so we only |
| 719 // allow the safer table matching extraction rules for this case. |
653 allMatches = root.queryAll(".fullwidth-table"); | 720 allMatches = root.queryAll(".fullwidth-table"); |
654 onlyConsiderTables = true; | 721 onlyConsiderTables = true; |
655 } | 722 } |
656 for (Element matchElement in allMatches) { | 723 for (Element matchElement in allMatches) { |
657 DivElement match = matchElement.parent; | 724 final match = matchElement.parent; |
658 if (!match.id.startsWith("section") && !(match.id == "pageText")) { | 725 if (!match.id.startsWith("section") && match.id != "pageText") { |
659 throw "Enexpected element $match"; | 726 throw "Unexpected element $match"; |
660 } | 727 } |
| 728 // We don't want to later display this text a second time while for example |
| 729 // displaying class level summary information as then we would display |
| 730 // the same documentation twice. |
661 match.classes.add(DART_REMOVED); | 731 match.classes.add(DART_REMOVED); |
662 | 732 |
663 bool foundProps = false; | 733 bool foundProps = false; |
664 | 734 |
665 // TODO(jacobr): we should really look for the table tag instead | 735 // TODO(jacobr): we should really look for the table tag instead |
666 // add an assert if we are missing something that is a table... | 736 // add an assert if we are missing something that is a table... |
667 // TODO(jacobr) ignore tables in tables.... | 737 // TODO(jacobr) ignore tables in tables. |
668 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) { | 738 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) { |
669 int helpIndex = -1; | 739 int helpIndex = -1; |
670 num i = 0; | 740 num i = 0; |
671 for (Element r in t.queryAll("th, td.header")) { | 741 for (Element r in t.queryAll("th, td.header")) { |
672 var txt = r.text.trim().split(" ")[0].toLowerCase(); | 742 final txt = r.text.trim().split(" ")[0].toLowerCase(); |
673 if (txt == "description") { | 743 if (txt == "description") { |
674 helpIndex = i; | 744 helpIndex = i; |
675 break; | 745 break; |
676 } | 746 } |
677 i++; | 747 i++; |
678 } | 748 } |
679 | 749 |
680 List<int> numMatches = new List<int>(i); | 750 // Figure out which column in the table contains member names by |
| 751 // tracking how many member names each column contains. |
| 752 final numMatches = new List<int>(i); |
681 for (int j = 0; j < i; j++) { | 753 for (int j = 0; j < i; j++) { |
682 numMatches[j] = 0; | 754 numMatches[j] = 0; |
683 } | 755 } |
684 | 756 |
685 // Find the row that seems to have the most names that look like | 757 // Find the column that seems to have the most names that look like |
686 // expected properties. | 758 // expected properties. |
687 for (Element r in t.queryAll("tbody tr")) { | 759 for (Element r in t.queryAll("tbody tr")) { |
688 ElementList $row = r.elements; | 760 ElementList row = r.elements; |
689 if ($row.length == 0 || $row.first.classes.contains(".header")) { | 761 if (row.length == 0 || row.first.classes.contains(".header")) { |
690 continue; | 762 continue; |
691 } | 763 } |
692 | 764 |
693 for (int k = 0; k < numMatches.length && k < $row.length; k++) { | 765 for (int k = 0; k < numMatches.length && k < row.length; k++) { |
694 Element e = $row[k]; | 766 if (expectedProps.containsKey(fullNameCleanup(row[k].text))) { |
695 if (expectedProps.containsKey(fullNameCleanup(e.text))) { | |
696 numMatches[k]++; | 767 numMatches[k]++; |
697 break; | 768 break; |
698 } | 769 } |
699 } | 770 } |
700 } | 771 } |
701 | 772 |
702 int propNameIndex = 0; | 773 int propNameIndex = 0; |
703 { | 774 { |
704 int bestCount = numMatches[0]; | 775 int bestCount = numMatches[0]; |
705 for (int k = 1; k < numMatches.length; k++) { | 776 for (int k = 1; k < numMatches.length; k++) { |
706 if (numMatches[k] > bestCount) { | 777 if (numMatches[k] > bestCount) { |
707 bestCount = numMatches[k]; | 778 bestCount = numMatches[k]; |
708 propNameIndex = k; | 779 propNameIndex = k; |
709 } | 780 } |
710 } | 781 } |
711 } | 782 } |
712 | 783 |
713 for (Element r in t.queryAll("tbody tr")) { | 784 for (Element r in t.queryAll("tbody tr")) { |
714 ElementList $row = r.elements; | 785 final row = r.elements; |
715 if ($row.length > propNameIndex && $row.length > helpIndex ) { | 786 if (row.length > propNameIndex && row.length > helpIndex) { |
716 if ($row.first.classes.contains(".header")) { | 787 if (row.first.classes.contains(".header")) { |
717 continue; | 788 continue; |
718 } | 789 } |
719 // TODO(jacobr): this code for determining the namestr is needlessly | 790 // TODO(jacobr): this code for determining the namestr is needlessly |
720 // messy. | 791 // messy. |
721 Element nameRow = $row[propNameIndex]; | 792 final nameRow = row[propNameIndex]; |
722 AnchorElement a = nameRow.query("a"); | 793 AnchorElement a = nameRow.query("a"); |
723 String goodName = ''; | 794 String goodName = ''; |
724 if (a != null) { | 795 if (a != null) { |
725 goodName = a.text.trim(); | 796 goodName = a.text.trim(); |
726 } | 797 } |
727 String nameStr = nameRow.text; | 798 String nameStr = nameRow.text; |
728 | 799 |
729 Map entry = new Map<String, String>(); | 800 Map entry = new Map<String, String>(); |
730 | 801 |
731 // "currentType": $($row[1]).text().trim(), // find("code") ? | 802 entry["name"] = fullNameCleanup(nameStr.length > 0 ? |
732 entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodNam
e); | 803 nameStr : goodName); |
733 | 804 |
734 final parse = filteredHtml(nameRow, nameRow, entry["name"], null); | 805 final parse = filteredHtml(nameRow, nameRow, entry["name"], null); |
735 String altHelp = parse.html; | 806 String altHelp = parse.html; |
736 | 807 |
737 // "jsSignature": nameStr, | 808 entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ? |
738 entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp
: genPrettyHtmlFromElement($row[helpIndex]); | 809 altHelp : genPrettyHtmlFromElement(row[helpIndex]); |
739 // "altHelp" : altHelp, | |
740 if (parse.url != null) { | 810 if (parse.url != null) { |
741 entry["url"] = parse.url; | 811 entry["url"] = parse.url; |
742 } | 812 } |
743 | 813 |
744 if (parse.idl.length > 0) { | 814 if (parse.idl.length > 0) { |
745 entry["idl"] = parse.idl; | 815 entry["idl"] = parse.idl; |
746 } | 816 } |
747 | 817 |
748 entry["obsolete"] = isObsolete(r); | 818 entry["obsolete"] = isObsolete(r); |
749 | 819 |
750 if (entry["name"].length > 0) { | 820 if (entry["name"].length > 0) { |
751 cleanupEntry(members, entry); | 821 cleanupEntry(members, entry); |
752 alreadyMatchedProperties.add(entry['name']); | 822 alreadyMatchedProperties.add(entry['name']); |
753 foundProps = true; | 823 foundProps = true; |
754 } | 824 } |
755 } | 825 } |
756 } | 826 } |
757 } | 827 } |
758 | 828 |
759 if (onlyConsiderTables) { | 829 if (onlyConsiderTables) { |
760 continue; | 830 continue; |
761 } | 831 } |
| 832 |
762 // After this point we have higher risk tests that attempt to perform | 833 // After this point we have higher risk tests that attempt to perform |
763 // rudimentary page segmentation. | 834 // rudimentary page segmentation. This approach is much more error-prone |
| 835 // than using tables because the HTML is far less clearly structured. |
764 | 836 |
765 // Search for expected matching names. | 837 final allText = getAllTextNodes(match); |
766 List<Text> allText = getAllTextNodes(match); | |
767 | 838 |
768 Map<String, Element> pmap = new Map<String, Element>(); | 839 final pmap = new Map<String, Element>(); |
769 for (String prop in expectedProps.getKeys()) { | 840 for (final prop in expectedProps.getKeys()) { |
770 if (alreadyMatchedProperties.contains(prop)) { | 841 if (alreadyMatchedProperties.contains(prop)) { |
771 continue; | 842 continue; |
772 } | 843 } |
773 Element e = findBest(match, allText, prop, propType); | 844 final e = findBest(match, allText, prop, propType); |
774 if (e != null && !inTable(e)) { | 845 if (e != null && !inTable(e)) { |
775 pmap[prop] = e; | 846 pmap[prop] = e; |
776 } | 847 } |
777 } | 848 } |
778 | 849 |
779 for (String prop in pmap.getKeys()) { | 850 for (final prop in pmap.getKeys()) { |
780 Element e = pmap[prop]; | 851 pmap[prop].classes.add(DART_REMOVED); |
781 e.classes.add(DART_REMOVED); | |
782 } | 852 } |
783 | 853 |
| 854 // The problem is the MDN docs do place documentation for each method in a |
| 855 // nice self contained subtree. Instead you will see something like: |
| 856 |
| 857 // <h3>drawImage</h3> |
| 858 // <p>Draw image is an awesome method</p> |
| 859 // some more info on drawImage here |
| 860 // <h3>mozDrawWindow</h3> |
| 861 // <p>This API cannot currently be used by Web content. |
| 862 // It is chrome only.</p> |
| 863 // <h3>drawRect</h3> |
| 864 // <p>Always call drawRect instead of drawImage</p> |
| 865 // some more info on drawRect here... |
| 866 |
| 867 // The trouble is we will easily detect that the drawImage and drawRect |
| 868 // entries are method definitions because we know to search for these |
| 869 // method names but we will not detect that mozDrawWindow is a method |
| 870 // definition as that method doesn't exist in our IDL. Thus if we are not |
| 871 // careful the definition for the drawImage method will contain the |
| 872 // definition for the mozDrawWindow method as well which would result in |
| 873 // broken docs. We solve this problem by finding all content with similar |
| 874 // visual structure to the already found method definitions. It turns out |
| 875 // that using the visual position of each element on the page is much |
| 876 // more reliable than using the DOM structure |
| 877 // (e.g. section_root > div > h3) for the MDN docs because MDN authors |
| 878 // carefully check that the documentation for each method comment is |
| 879 // visually consistent but take less care to check that each |
| 880 // method comment has identical markup structure. |
784 for (String prop in pmap.getKeys()) { | 881 for (String prop in pmap.getKeys()) { |
785 Element e = pmap[prop]; | 882 Element e = pmap[prop]; |
786 ClientRect r = getClientRect(e); | 883 ClientRect r = getClientRect(e); |
787 // TODO(jacobr): a lot of these queries are identical. | 884 // TODO(jacobr): a lot of these queries are identical and this code |
788 for (Element cand in match.queryAll(e.tagName)) { | 885 // could easily be optimized. |
789 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX us
e a neg selector. | 886 for (final cand in match.queryAll(e.tagName)) { |
790 ClientRect candRect = getClientRect(cand); | 887 // TODO(jacobr): use a negative selector instead. |
791 // TODO(jacobr): this is somewhat loose. | 888 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) { |
| 889 final candRect = getClientRect(cand); |
| 890 // Only consider matches that have similar heights and identical left |
| 891 // coordinates. |
792 if (candRect.left == r.left && | 892 if (candRect.left == r.left && |
793 (candRect.height - r.height).abs() < 5) { | 893 (candRect.height - r.height).abs() < 5) { |
794 String propName = fullNameCleanup(cand.text); | 894 String propName = fullNameCleanup(cand.text); |
795 if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) ==
false && alreadyMatchedProperties.contains(propName) == false) { | 895 if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName) |
796 // Don't set here to avoid layouts... cand.classes.add(DART_REMOVE
D); | 896 && !alreadyMatchedProperties.contains(propName)) { |
797 pmap[propName] = cand; | 897 pmap[propName] = cand; |
798 } | 898 } |
799 } | 899 } |
800 } | 900 } |
801 } | 901 } |
802 } | 902 } |
803 | 903 |
| 904 // We mark these elements in batch to reduce the number of layouts |
| 905 // triggered. TODO(jacobr): use new batch based async measurement to make |
| 906 // this code flow simpler. |
804 for (String prop in pmap.getKeys()) { | 907 for (String prop in pmap.getKeys()) { |
805 Element e = pmap[prop]; | 908 Element e = pmap[prop]; |
806 e.classes.add(DART_REMOVED); | 909 e.classes.add(DART_REMOVED); |
807 } | 910 } |
808 | 911 |
809 // Find likely "subsections" of the main section and mark them with | 912 // Find likely "subsections" of the main section and mark them with |
810 // DART_REMOVED so we don't include them in member descriptions... which | 913 // DART_REMOVED so we don't include them in member descriptions... which |
811 // would suck. | 914 // would suck. |
812 for (Element e in match.queryAll("[id]")) { | 915 for (Element e in match.queryAll("[id]")) { |
813 if (e.id.indexOf(matchElement.id) != -1) { | 916 if (e.id.contains(matchElement.id)) { |
814 e.classes.add(DART_REMOVED); | 917 e.classes.add(DART_REMOVED); |
815 } | 918 } |
816 } | 919 } |
817 | 920 |
818 for (String prop in pmap.getKeys()) { | 921 for (String prop in pmap.getKeys()) { |
819 Element elem = pmap[prop]; | 922 Element elem = pmap[prop]; |
820 bool obsolete = false; | 923 bool obsolete = false; |
821 final parse = filteredHtml( | 924 final parse = filteredHtml( |
822 elem, match, prop, | 925 elem, match, prop, |
823 (Element e) { | 926 (Element e) { |
824 obsolete = isObsolete(e); | 927 obsolete = isObsolete(e); |
825 }); | 928 }); |
826 Map entry = { | 929 Map entry = { |
827 "url" : parse.url, | 930 "url" : parse.url, |
828 "name" : prop, | 931 "name" : prop, |
829 "help" : parse.html, | 932 "help" : parse.html, |
830 "obsolete" : obsolete | 933 "obsolete" : obsolete |
831 //"jsSignature" : nameStr | |
832 }; | 934 }; |
833 if (parse.idl.length > 0) { | 935 if (parse.idl.length > 0) { |
834 entry["idl"] = parse.idl; | 936 entry["idl"] = parse.idl; |
835 } | 937 } |
836 cleanupEntry(members, entry); | 938 cleanupEntry(members, entry); |
837 } | 939 } |
838 } | 940 } |
839 } | 941 } |
840 | 942 |
841 String trimHtml(String html) { | 943 String trimHtml(String html) { |
842 // TODO(jacobr): impl. | 944 // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc. |
843 return html; | 945 return html; |
844 } | 946 } |
845 | 947 |
846 bool maybeName(String name) { | 948 bool maybeName(String name) { |
847 RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$"); | 949 return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) || |
848 if (nameRegExp.hasMatch(name)) return true; | 950 const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name); |
849 RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$"); | |
850 if (constRegExp.hasMatch(name)) return true; | |
851 } | 951 } |
852 | 952 |
| 953 // TODO(jacobr): this element is ugly at the moment but will become easier to |
| 954 // read once ElementList supports most of the Element functionality. |
853 void markRemoved(var e) { | 955 void markRemoved(var e) { |
854 if (e != null) { | 956 if (e != null) { |
855 // TODO( remove) | |
856 if (e is Element) { | 957 if (e is Element) { |
857 e.classes.add(DART_REMOVED); | 958 e.classes.add(DART_REMOVED); |
858 } else { | 959 } else { |
859 for (Element el in e) { | 960 for (Element el in e) { |
860 el.classes.add(DART_REMOVED); | 961 el.classes.add(DART_REMOVED); |
861 } | 962 } |
862 } | 963 } |
863 } | 964 } |
864 } | 965 } |
865 | 966 |
| 967 // TODO(jacobr): remove this when the dartium JSON parser handles \n correctly. |
866 String JSONFIXUPHACK(String value) { | 968 String JSONFIXUPHACK(String value) { |
867 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX"); | 969 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX"); |
868 } | 970 } |
869 | 971 |
870 String mozToWebkit(String name) { | 972 String mozToWebkit(String name) { |
871 RegExp regExp = new RegExp("^moz"); | 973 return name.replaceFirst(const RegExp("^moz"), "webkit"); |
872 name = name.replaceFirst(regExp, "webkit"); | |
873 return name; | |
874 } | 974 } |
875 | 975 |
876 String stripWebkit(String name) { | 976 String stripWebkit(String name) { |
877 return trimPrefix(name, "webkit"); | 977 return trimPrefix(name, "webkit"); |
878 } | 978 } |
879 | 979 |
| 980 // TODO(jacobr): be more principled about this. |
880 String fullNameCleanup(String name) { | 981 String fullNameCleanup(String name) { |
881 int parenIndex = name.indexOf('('); | 982 int parenIndex = name.indexOf('('); |
882 if (parenIndex != -1) { | 983 if (parenIndex != -1) { |
883 // TODO(jacobr): workaround bug in: | |
884 // name = name.split("(")[0]; | |
885 name = name.substring(0, parenIndex); | 984 name = name.substring(0, parenIndex); |
886 } | 985 } |
887 name = name.split(" ")[0]; | 986 name = name.split(" ")[0]; |
888 name = name.split("\n")[0]; | 987 name = name.split("\n")[0]; |
889 name = name.split("\t")[0]; | 988 name = name.split("\t")[0]; |
890 name = name.split("*")[0]; | 989 name = name.split("*")[0]; |
891 name = name.trim(); | 990 name = name.trim(); |
892 name = safeNameCleanup(name); | 991 name = safeNameCleanup(name); |
893 return name; | 992 return name; |
894 } | 993 } |
895 | 994 |
896 // Less agressive than the full cleanup to avoid overeager matching of | 995 // Less agressive than the full name cleanup to avoid overeager matching. |
897 // everytyhing | 996 // TODO(jacobr): be more principled about this. |
898 String safeNameCleanup(String name) { | 997 String safeNameCleanup(String name) { |
899 int parenIndex = name.indexOf('('); | 998 int parenIndex = name.indexOf('('); |
900 if (parenIndex != -1 && name.indexOf(")") != -1) { | 999 if (parenIndex != -1 && name.indexOf(")") != -1) { |
901 // TODO(jacobr): workaround bug in: | 1000 // TODO(jacobr): workaround bug in: |
902 // name = name.split("(")[0]; | 1001 // name = name.split("(")[0]; |
903 name = name.substring(0, parenIndex); | 1002 name = name.substring(0, parenIndex); |
904 } | 1003 } |
905 name = name.trim(); | 1004 name = name.trim(); |
906 name = trimPrefix(name, currentType + "."); | 1005 name = trimPrefix(name, currentType + "."); |
907 name = trimPrefix(name, currentType.toLowerCase() + "."); | 1006 name = trimPrefix(name, currentType.toLowerCase() + "."); |
908 name = trimPrefix(name, currentTypeShort + "."); | 1007 name = trimPrefix(name, currentTypeShort + "."); |
909 name = trimPrefix(name, currentTypeShort.toLowerCase() + "."); | 1008 name = trimPrefix(name, currentTypeShort.toLowerCase() + "."); |
910 name = trimPrefix(name, currentTypeTiny + "."); | 1009 name = trimPrefix(name, currentTypeTiny + "."); |
911 name = trimPrefix(name, currentTypeTiny.toLowerCase() + "."); | 1010 name = trimPrefix(name, currentTypeTiny.toLowerCase() + "."); |
912 name = name.trim(); | 1011 name = name.trim(); |
913 name = mozToWebkit(name); | 1012 name = mozToWebkit(name); |
914 return name; | 1013 return name; |
915 } | 1014 } |
916 | 1015 |
| 1016 /** |
| 1017 * Remove h1, h2, and h3 headers. |
| 1018 */ |
917 void removeHeaders(DocumentFragment fragment) { | 1019 void removeHeaders(DocumentFragment fragment) { |
918 for (Element e in fragment.queryAll("h1, h2, h3")) { | 1020 for (Element e in fragment.queryAll("h1, h2, h3")) { |
919 e.remove(); | 1021 e.remove(); |
920 } | 1022 } |
921 } | 1023 } |
922 | 1024 |
| 1025 /** |
| 1026 * Given an [entry] representing a single method or property cleanup the |
| 1027 * values performing some simple normalization and only adding the entry to |
| 1028 * [members] if it has a valid name. |
| 1029 */ |
923 void cleanupEntry(List members, Map entry) { | 1030 void cleanupEntry(List members, Map entry) { |
924 if (entry.containsKey('help')) { | 1031 if (entry.containsKey('help')) { |
925 entry['help'] = trimHtml(entry['help']); | 1032 entry['help'] = trimHtml(entry['help']); |
926 } | 1033 } |
927 String name = fullNameCleanup(entry['name']); | 1034 String name = fullNameCleanup(entry['name']); |
928 entry['name'] = name; | 1035 entry['name'] = name; |
929 if (maybeName(name)) { | 1036 if (maybeName(name)) { |
930 for (String key in entry.getKeys()) { | 1037 for (String key in entry.getKeys()) { |
931 var value = entry[key]; | 1038 var value = entry[key]; |
932 if (value == null) { | 1039 if (value == null) { |
(...skipping 10 matching lines...) Expand all Loading... |
943 | 1050 |
944 // TODO(jacobr) dup with trim start.... | 1051 // TODO(jacobr) dup with trim start.... |
945 String trimPrefix(String str, String prefix) { | 1052 String trimPrefix(String str, String prefix) { |
946 if (str.indexOf(prefix) == 0) { | 1053 if (str.indexOf(prefix) == 0) { |
947 return str.substring(prefix.length); | 1054 return str.substring(prefix.length); |
948 } else { | 1055 } else { |
949 return str; | 1056 return str; |
950 } | 1057 } |
951 } | 1058 } |
952 | 1059 |
953 void resourceLoaded() { | |
954 if (data != null) run(); | |
955 } | |
956 | |
957 String trimStart(String str, String start) { | 1060 String trimStart(String str, String start) { |
958 if (str.startsWith(start) && str.length > start.length) { | 1061 if (str.startsWith(start) && str.length > start.length) { |
959 return str.substring(start.length); | 1062 return str.substring(start.length); |
960 } | 1063 } |
961 return str; | 1064 return str; |
962 } | 1065 } |
963 | 1066 |
964 String trimEnd(String str, String end) { | 1067 String trimEnd(String str, String end) { |
965 if (str.endsWith(end) && str.length > end.length) { | 1068 if (str.endsWith(end) && str.length > end.length) { |
966 return str.substring(0, str.length - end.length); | 1069 return str.substring(0, str.length - end.length); |
967 } | 1070 } |
968 return str; | 1071 return str; |
969 } | 1072 } |
970 | 1073 |
| 1074 /** |
| 1075 * Extract a section with name [key] using [selector] to find start points for |
| 1076 * the section in the document. |
| 1077 */ |
971 void extractSection(String selector, String key) { | 1078 void extractSection(String selector, String key) { |
972 for (Element e in document.queryAll(selector)) { | 1079 for (Element e in document.queryAll(selector)) { |
973 e = e.parent; | 1080 e = e.parent; |
974 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) { | 1081 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) { |
975 skip.remove(); | 1082 skip.remove(); |
976 } | 1083 } |
977 String html = filteredHtml(e, e, null, removeHeaders).html; | 1084 String html = filteredHtml(e, e, null, removeHeaders).html; |
978 if (html.length > 0) { | 1085 if (html.length > 0) { |
979 if (dbEntry.containsKey(key)) { | 1086 if (dbEntry.containsKey(key)) { |
980 dbEntry[key] += html; | 1087 dbEntry[key] += html; |
981 } else { | 1088 } else { |
982 dbEntry[key] = html; | 1089 dbEntry[key] = html; |
983 } | 1090 } |
984 } | 1091 } |
985 e.classes.add(DART_REMOVED); | 1092 e.classes.add(DART_REMOVED); |
986 } | 1093 } |
987 } | 1094 } |
988 | 1095 |
989 void run() { | 1096 void run() { |
990 // Inject CSS to insure lines don't wrap unless it was intentional. | 1097 // Inject CSS to ensure lines don't wrap unless they were intended to. |
| 1098 // This is needed to make the logic to determine what is a single line |
| 1099 // behave consistently even for very long method names. |
991 document.head.nodes.add(new Element.html(""" | 1100 document.head.nodes.add(new Element.html(""" |
992 <style type="text/css"> | 1101 <style type="text/css"> |
993 body { | 1102 body { |
994 width: 10000px; | 1103 width: 10000px; |
995 } | 1104 } |
996 </style>""")); | 1105 </style>""")); |
997 | 1106 |
998 String title = trimEnd(window.document.title.trim(), " - MDN"); | 1107 String title = trimEnd(window.document.title.trim(), " - MDN"); |
999 dbEntry['title'] = title; | 1108 dbEntry['title'] = title; |
1000 | 1109 |
1001 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best | 1110 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best |
1002 // place to do this... | 1111 // place to do this... |
| 1112 // TODO(jacobr): move this to right before we extract HTML. |
1003 | 1113 |
1004 // Remove the "Introduced in HTML <version>" boxes. | 1114 // Remove the "Introduced in HTML <version>" boxes. |
1005 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) { | 1115 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) { |
1006 e.remove(); | 1116 e.remove(); |
1007 } | 1117 } |
1008 | 1118 |
1009 // Flatten the list of known DOM types into a faster and case-insensitive map. | 1119 // Flatten the list of known DOM types into a faster and case-insensitive |
| 1120 // map. |
1010 domTypes = {}; | 1121 domTypes = {}; |
1011 for (final domType in domTypesRaw) { | 1122 for (final domType in domTypesRaw) { |
1012 domTypes[domType.toLowerCase()] = domType; | 1123 domTypes[domType.toLowerCase()] = domType; |
1013 } | 1124 } |
1014 | 1125 |
1015 // Fix up links. | 1126 // Fix up links. |
1016 final SHORT_LINK = const RegExp(@'^[\w/]+$'); | 1127 final SHORT_LINK = const RegExp(@'^[\w/]+$'); |
1017 final INNER_LINK = const RegExp(@'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$'); | 1128 final INNER_LINK = const RegExp(@'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$'); |
1018 final MEMBER_LINK = const RegExp(@'(\w+)[.#](\w+)'); | 1129 final MEMBER_LINK = const RegExp(@'(\w+)[.#](\w+)'); |
1019 final RELATIVE_LINK = const RegExp(@'^(?:../)*/?[Ee][Nn]/(.+)'); | 1130 final RELATIVE_LINK = const RegExp(@'^(?:../)*/?[Ee][Nn]/(.+)'); |
1020 | 1131 |
1021 // - Make relative links absolute. | 1132 // - Make relative links absolute. |
1022 // - If we can, take links that point to other MDN pages and retarget them | 1133 // - If we can, take links that point to other MDN pages and retarget them |
1023 // to appropriate pages in our docs. | 1134 // to appropriate pages in our docs. |
1024 // TODO(rnystrom): Add rel external to links we didn't fix. | 1135 // TODO(rnystrom): Add rel external to links we didn't fix. |
1025 for (AnchorElement a in document.queryAll('a')) { | 1136 for (AnchorElement a in document.queryAll('a')) { |
1026 // Get the raw attribute because we *don't* want the browser to fully- | 1137 // Get the raw attribute because we *don't* want the browser to fully- |
1027 // qualify the name for us since it has the wrong base address for the page. | 1138 // qualify the name for us since it has the wrong base address for the |
| 1139 // page. |
1028 var href = a.attributes['href']; | 1140 var href = a.attributes['href']; |
1029 | 1141 |
1030 // Ignore busted links. | 1142 // Ignore busted links. |
1031 if (href == null) continue; | 1143 if (href == null) continue; |
1032 | 1144 |
1033 // If we can recognize what it's pointing to, point it to our page instead. | 1145 // If we can recognize what it's pointing to, point it to our page instead. |
1034 tryToLinkToRealType(maybeType) { | 1146 tryToLinkToRealType(maybeType) { |
1035 // See if we know a type with that name. | 1147 // See if we know a type with that name. |
1036 final realType = domTypes[maybeType.toLowerCase()]; | 1148 final realType = domTypes[maybeType.toLowerCase()]; |
1037 if (realType != null) { | 1149 if (realType != null) { |
(...skipping 25 matching lines...) Expand all Loading... |
1063 tryToLinkToRealType(member[1]); | 1175 tryToLinkToRealType(member[1]); |
1064 } else { | 1176 } else { |
1065 tryToLinkToRealType(match[1]); | 1177 tryToLinkToRealType(match[1]); |
1066 } | 1178 } |
1067 } | 1179 } |
1068 | 1180 |
1069 // Put it back into the element. | 1181 // Put it back into the element. |
1070 a.attributes['href'] = href; | 1182 a.attributes['href'] = href; |
1071 } | 1183 } |
1072 | 1184 |
1073 if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) { | 1185 if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) { |
1074 bool foundMatch = false; | 1186 bool foundMatch = false; |
1075 // Test out if the title is really an HTML tag that matches the | 1187 // Test out if the title is really an HTML tag that matches the |
1076 // current class name. | 1188 // current class name. |
1077 for (String tag in [title.split(" ")[0], title.split(".").last()]) { | 1189 for (String tag in [title.split(" ")[0], title.split(".").last()]) { |
1078 try { | 1190 try { |
1079 dom.Element element = dom.document.createElement(tag); | 1191 dom.Element element = dom.document.createElement(tag); |
| 1192 // TODO(jacobr): this is a really ugly way of doing this that will |
| 1193 // stop working at some point soon. |
1080 if (element.typeName == currentType) { | 1194 if (element.typeName == currentType) { |
1081 foundMatch = true; | 1195 foundMatch = true; |
1082 break; | 1196 break; |
1083 } | 1197 } |
1084 } catch(e) {} | 1198 } catch(e) {} |
1085 } | 1199 } |
1086 if (foundMatch == false) { | 1200 if (!foundMatch) { |
1087 dbEntry['skipped'] = true; | 1201 dbEntry['skipped'] = true; |
1088 dbEntry['cause'] = "Suspect title"; | 1202 dbEntry['cause'] = "Suspect title"; |
1089 onEnd(); | 1203 onEnd(); |
1090 return; | 1204 return; |
1091 } | 1205 } |
1092 } | 1206 } |
1093 | 1207 |
1094 Element root = document.query(".pageText"); | 1208 Element root = document.query(".pageText"); |
1095 if (root == null) { | 1209 if (root == null) { |
1096 dbEntry['cause'] = '.pageText not found'; | 1210 dbEntry['cause'] = '.pageText not found'; |
1097 onEnd(); | 1211 onEnd(); |
1098 return; | 1212 return; |
1099 } | 1213 } |
1100 | 1214 |
1101 markRemoved(root.query("#Notes")); | 1215 markRemoved(root.query("#Notes")); |
1102 List members = dbEntry['members']; | 1216 List members = dbEntry['members']; |
1103 | 1217 |
| 1218 // This is a laundry list of CSS selectors for boilerplate content on the |
| 1219 // MDN pages that we should ignore for the purposes of extracting |
| 1220 // documentation. |
1104 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar")); | 1221 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar")); |
1105 markRemoved(document.queryAll("#article-nav")); | 1222 markRemoved(document.queryAll("#article-nav")); |
1106 markRemoved(document.queryAll(".hideforedit")); | 1223 markRemoved(document.queryAll(".hideforedit")); |
1107 markRemoved(document.queryAll(".navbox")); | 1224 markRemoved(document.queryAll(".navbox")); |
1108 markRemoved(document.query("#Method_overview")); | 1225 markRemoved(document.query("#Method_overview")); |
1109 markRemoved(document.queryAll("h1, h2")); | 1226 markRemoved(document.queryAll("h1, h2")); |
1110 | 1227 |
1111 scrapeSection(root, "#Methods", currentType, members, 'methods'); | 1228 scrapeSection(root, "#Methods", currentType, members, 'methods'); |
1112 scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType,
members, 'constants'); | 1229 scrapeSection(root, "#Constants, #Error_codes, #State_constants", |
| 1230 currentType, members, 'constants'); |
1113 // TODO(jacobr): infer tables based on multiple matches rather than | 1231 // TODO(jacobr): infer tables based on multiple matches rather than |
1114 // using a hard coded list of section ids. | 1232 // using a hard coded list of section ids. |
1115 scrapeSection(root, | 1233 scrapeSection(root, |
1116 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_prope
rties, #Event_handlers, #Event_Handlers", | 1234 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " + |
| 1235 "#DOM_properties, #Event_handlers, #Event_Handlers", |
1117 currentType, members, 'properties'); | 1236 currentType, members, 'properties'); |
1118 | 1237 |
1119 // Avoid doing this till now to avoid messing up the section scrape. | 1238 // Avoid doing this till now to avoid messing up the section scrape. |
1120 markRemoved(document.queryAll("h3")); | 1239 markRemoved(document.queryAll("h3")); |
1121 | 1240 |
1122 ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]"); | 1241 ElementList examples = root.queryAll("span[id^=example], span[id^=Example]"); |
1123 | 1242 |
1124 extractSection("#See_also", 'seeAlso'); | 1243 extractSection("#See_also", 'seeAlso'); |
1125 extractSection("#Specification, #Specifications", "specification"); | 1244 extractSection("#Specification, #Specifications", "specification"); |
1126 // $("#Methods").parent().remove(); // not safe (e.g. Document) | |
1127 | 1245 |
1128 // TODO(jacobr): actually extract the constructor(s) | 1246 // TODO(jacobr): actually extract the constructor(s) |
1129 extractSection("#Constructor, #Constructors", 'constructor'); | 1247 extractSection("#Constructor, #Constructors", 'constructor'); |
1130 extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); | 1248 extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); |
1131 | 1249 |
| 1250 // Extract examples. |
1132 List<String> exampleHtml = []; | 1251 List<String> exampleHtml = []; |
1133 for (Element e in $examples) { | 1252 for (Element e in examples) { |
1134 e.classes.add(DART_REMOVED); | 1253 e.classes.add(DART_REMOVED); |
1135 } | 1254 } |
1136 for (Element e in $examples) { | 1255 for (Element e in examples) { |
1137 String html = filteredHtml(e, root, null, | 1256 String html = filteredHtml(e, root, null, |
1138 (DocumentFragment fragment) { | 1257 (DocumentFragment fragment) { |
1139 removeHeaders(fragment); | 1258 removeHeaders(fragment); |
1140 if (fragment.text.trim().toLowerCase() == "example") { | 1259 if (fragment.text.trim().toLowerCase() == "example") { |
1141 // Degenerate example. | 1260 // Degenerate example. |
1142 fragment.nodes.clear(); | 1261 fragment.nodes.clear(); |
1143 } | 1262 } |
1144 }).html; | 1263 }).html; |
1145 if (html.length > 0) { | 1264 if (html.length > 0) { |
1146 exampleHtml.add(html); | 1265 exampleHtml.add(html); |
1147 } | 1266 } |
1148 } | 1267 } |
1149 if (exampleHtml.length > 0) { | 1268 if (exampleHtml.length > 0) { |
1150 dbEntry['examples'] = exampleHtml; | 1269 dbEntry['examples'] = exampleHtml; |
1151 } | 1270 } |
1152 | 1271 |
| 1272 // Extract the class summary. |
| 1273 // Basically everything left over after the #Summary or #Description tag is |
| 1274 // safe to include in the summary. |
1153 StringBuffer summary = new StringBuffer(); | 1275 StringBuffer summary = new StringBuffer(); |
1154 | |
1155 for (Element e in root.queryAll("#Summary, #Description")) { | 1276 for (Element e in root.queryAll("#Summary, #Description")) { |
1156 summary.add(filteredHtml(root, e, null, removeHeaders).html); | 1277 summary.add(filteredHtml(root, e, null, removeHeaders).html); |
1157 } | 1278 } |
1158 | 1279 |
1159 if (summary.length == 0) { | 1280 if (summary.length == 0) { |
1160 // Remove the "Gecko DOM Reference text" | 1281 // Remove the "Gecko DOM Reference text" |
1161 Element ref = root.query(".lang.lang-en"); | 1282 Element ref = root.query(".lang.lang-en"); |
1162 if (ref != null) { | 1283 if (ref != null) { |
1163 ref = ref.parent; | 1284 ref = ref.parent; |
1164 String refText = ref.text.trim(); | 1285 String refText = ref.text.trim(); |
1165 if (refText == "Gecko DOM Reference" || | 1286 if (refText == "Gecko DOM Reference" || |
1166 refText == "« Gecko DOM Reference") { | 1287 refText == "« Gecko DOM Reference") { |
1167 ref.remove(); | 1288 ref.remove(); |
1168 } | 1289 } |
1169 } | 1290 } |
1170 // Risky... this might add stuff we shouldn't. | 1291 // Risky... this might add stuff we shouldn't. |
1171 summary.add(filteredHtml(root, root, null, removeHeaders).html); | 1292 summary.add(filteredHtml(root, root, null, removeHeaders).html); |
1172 } | 1293 } |
1173 | 1294 |
1174 if (summary.length > 0) { | 1295 if (summary.length > 0) { |
1175 dbEntry['summary'] = summary.toString(); | 1296 dbEntry['summary'] = summary.toString(); |
1176 } | 1297 } |
1177 | 1298 |
1178 // Inject CSS to aid debugging in the browser. | 1299 // Inject CSS to aid debugging in the browser. |
| 1300 // We could avoid doing this if we know we are not running in a browser.. |
1179 document.head.nodes.add(new Element.html(DEBUG_CSS)); | 1301 document.head.nodes.add(new Element.html(DEBUG_CSS)); |
1180 | 1302 |
1181 onEnd(); | 1303 onEnd(); |
1182 } | 1304 } |
1183 | 1305 |
1184 void main() { | 1306 void main() { |
1185 window.on.load.add(documentLoaded); | 1307 window.on.load.add(documentLoaded); |
1186 } | 1308 } |
1187 | 1309 |
1188 void documentLoaded(event) { | 1310 void documentLoaded(event) { |
| 1311 // Load the database of expected methods and properties with an |
| 1312 // XMLHttpRequest. |
1189 new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) { | 1313 new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) { |
1190 data = JSON.parse(req.responseText); | 1314 data = JSON.parse(req.responseText); |
1191 dbEntry = {'members': [], 'srcUrl': pageUrl}; | 1315 dbEntry = {'members': [], 'srcUrl': pageUrl}; |
1192 resourceLoaded(); | 1316 run(); |
1193 }); | 1317 }); |
1194 } | 1318 } |
OLD | NEW |