| Index: utils/apidoc/mdn/extract.dart
|
| diff --git a/utils/apidoc/mdn/extract.dart b/utils/apidoc/mdn/extract.dart
|
| index 17627054da495a2b004b937e84d0593364b0fe29..4f143836163038db7a2e2a164fc49428b15c7c62 100644
|
| --- a/utils/apidoc/mdn/extract.dart
|
| +++ b/utils/apidoc/mdn/extract.dart
|
| @@ -14,8 +14,7 @@ Range _tempRange;
|
| // Hacks because ASYNC measurement is annoying when just writing a script.
|
| ClientRect getClientRect(Node n) {
|
| if (n is Element) {
|
| - Element e = n;
|
| - dom.Element raw = unwrapDomObject(e.dynamic);
|
| + dom.Element raw = unwrapDomObject(n.dynamic);
|
| return LevelDom.wrapClientRect(raw.getBoundingClientRect());
|
| } else {
|
| // Crazy hacks that works for nodes.... create a range and measure it.
|
| @@ -28,11 +27,18 @@ ClientRect getClientRect(Node n) {
|
| }
|
| }
|
|
|
| -final DART_REMOVED = "dart_removed";
|
| +/**
|
| + * CSS class that is added to elements in the DOM to indicate that they should
|
| + * be removed when extracting blocks of documentation. This is helpful when
|
| + * running this script in a web browser as it is easy to visually see what
|
| + * blocks of information were extracted when using CSS such as DEBUG_CSS
|
| + * which highlights elements that should be removed.
|
| + */
|
| +final DART_REMOVED = "dart-removed";
|
|
|
| final DEBUG_CSS = """
|
| <style type="text/css">
|
| - .dart_removed {
|
| + .dart-removed {
|
| background-color: rgba(255, 0, 0, 0.5);
|
| }
|
| </style>""";
|
| @@ -281,7 +287,7 @@ String getAbsoluteUrl(AnchorElement anchor) {
|
| }
|
|
|
| bool inTable(Node n) {
|
| - while(n != null) {
|
| + while (n != null) {
|
| if (n is TableElement) return true;
|
| n = n.parent;
|
| }
|
| @@ -295,7 +301,7 @@ String escapeHTML(str) {
|
| }
|
|
|
| List<Text> getAllTextNodes(Element elem) {
|
| - List<Text> nodes = <Text>[];
|
| + final nodes = <Text>[];
|
| helper(Node n) {
|
| if (n is Text) {
|
| nodes.add(n);
|
| @@ -323,8 +329,8 @@ bool isSkippableType(Node n) {
|
| }
|
| if (n is Text) return true;
|
|
|
| - for (Node child in n.nodes) {
|
| - if (isSkippableType(child) == false) {
|
| + for (final child in n.nodes) {
|
| + if (!isSkippableType(child)) {
|
| return false;
|
| }
|
| }
|
| @@ -342,6 +348,8 @@ void onEnd() {
|
| // workaround bug in JSON parser.
|
| dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n");
|
|
|
| + // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple
|
| + // isolate based Dart-JS interop solution in the future.
|
| window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*");
|
| }
|
|
|
| @@ -353,44 +361,81 @@ class SectionParseResult {
|
| }
|
|
|
| String genCleanHtml(Element root) {
|
| - for (Element e in root.queryAll(".$DART_REMOVED")) {
|
| + for (final e in root.queryAll(".$DART_REMOVED")) {
|
| e.classes.remove(DART_REMOVED);
|
| }
|
|
|
| // Ditch inline styles.
|
| - for (Element e in root.queryAll('[style]')) {
|
| + for (final e in root.queryAll('[style]')) {
|
| e.attributes.remove('style');
|
| }
|
|
|
| // These elements are just tags that we should suppress.
|
| - for (Element e in root.queryAll(".lang.lang-en")) {
|
| + for (final e in root.queryAll(".lang.lang-en")) {
|
| e.remove();
|
| }
|
|
|
| + Element parametersHeader;
|
| + Element returnValueHeader;
|
| + for (final e in root.queryAll("h6")) {
|
| + if (e.text == 'Parameters') {
|
| + parametersHeader = e;
|
| + } else if (e.text == 'Return value') {
|
| + returnValueHeader = e;
|
| + }
|
| + }
|
| +
|
| + if (parametersHeader != null) {
|
| + int numEmptyParameters = 0;
|
| + final parameterDescriptions = root.queryAll("dd");
|
| + for (Element parameterDescription in parameterDescriptions) {
|
| + if (parameterDescription.text.trim().length == 0) {
|
| + numEmptyParameters++;
|
| + }
|
| + }
|
| + if (numEmptyParameters > 0 &&
|
| + numEmptyParameters == parameterDescriptions.length) {
|
| + // Remove the parameter list as it adds zero value as all descriptions
|
| + // are empty.
|
| + parametersHeader.remove();
|
| + for (final e in root.queryAll("dl")) {
|
| + e.remove();
|
| + }
|
| + } else if (parameterDescriptions.length == 0 &&
|
| + parametersHeader.nextElementSibling != null &&
|
| + parametersHeader.nextElementSibling.text.trim() == 'None.') {
|
| + // No need to display that the function takes 0 parameters.
|
| + parametersHeader.nextElementSibling.remove();
|
| + parametersHeader.remove();
|
| + }
|
| + }
|
| +
|
| + // Heuristic: if the return value is a single word it is a type name not a
|
| + // useful text description so suppress it.
|
| + if (returnValueHeader != null &&
|
| + returnValueHeader.nextElementSibling != null &&
|
| + returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) {
|
| + returnValueHeader.nextElementSibling.remove();
|
| + returnValueHeader.remove();
|
| + }
|
| +
|
| bool changed = true;
|
| while (changed) {
|
| changed = false;
|
| - while (root.nodes.length == 1) {
|
| - Node child = root.nodes.first;
|
| - if (child is Element) {
|
| - root = child;
|
| - changed = true;
|
| - } else {
|
| - // Just calling innerHTML on the parent will be sufficient...
|
| - // and insures the output is properly escaped.
|
| - break;
|
| - }
|
| + while (root.nodes.length == 1 && root.nodes.first is Element) {
|
| + root = root.nodes.first;
|
| + changed = true;
|
| }
|
|
|
| // Trim useless nodes from the front.
|
| - while(root.nodes.length > 0 &&
|
| + while (root.nodes.length > 0 &&
|
| isSkippable(root.nodes.first)) {
|
| root.nodes.first.remove();
|
| changed = true;
|
| }
|
|
|
| // Trim useless nodes from the back.
|
| - while(root.nodes.length > 0 &&
|
| + while (root.nodes.length > 0 &&
|
| isSkippable(root.nodes.last())) {
|
| root.nodes.last().remove();
|
| changed = true;
|
| @@ -399,10 +444,6 @@ String genCleanHtml(Element root) {
|
| return JSONFIXUPHACK(root.innerHTML);
|
| }
|
|
|
| -String genPrettyHtml(DocumentFragment fragment) {
|
| - return genCleanHtml(fragment);
|
| -}
|
| -
|
| String genPrettyHtmlFromElement(Element e) {
|
| e = e.clone(true);
|
| return genCleanHtml(e);
|
| @@ -420,7 +461,7 @@ class PostOrderTraversalIterator implements Iterator<Node> {
|
|
|
| Node next() {
|
| if (_next == null) return null;
|
| - Node ret = _next;
|
| + final ret = _next;
|
| if (_next.nextNode != null) {
|
| _next = _leftMostDescendent(_next.nextNode);
|
| } else {
|
| @@ -444,12 +485,19 @@ class PostOrderTraversal implements Iterable<Node> {
|
| Iterator<Node> iterator() => new PostOrderTraversalIterator(_node);
|
| }
|
|
|
| +/**
|
| + * Estimate what content represents the first line of text within the [section]
|
| + * range returning null if there isn't a plausible first line of text that
|
| + * contains the string [prop]. We measure the actual rendered client rectangle
|
| + * for the text and use heuristics defining how many pixels text can vary by
|
| + * and still be viewed as being on the same line.
|
| + */
|
| Range findFirstLine(Range section, String prop) {
|
| - Range firstLine = newRange();
|
| + final firstLine = newRange();
|
| firstLine.setStart(section.startContainer, section.startOffset);
|
|
|
| num maxBottom = null;
|
| - for (Node n in new PostOrderTraversal(section.startContainer)) {
|
| + for (final n in new PostOrderTraversal(section.startContainer)) {
|
| int compareResult = section.comparePoint(n, 0);
|
| if (compareResult == -1) {
|
| // before range so skip.
|
| @@ -462,9 +510,8 @@ Range findFirstLine(Range section, String prop) {
|
| final rect = getClientRect(n);
|
| num bottom = rect.bottom;
|
| if (rect.height > 0 && rect.width > 0) {
|
| - if (maxBottom != null && (
|
| - maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom
|
| - )) {
|
| + if (maxBottom != null &&
|
| + maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) {
|
| break;
|
| } else if (maxBottom == null || maxBottom > bottom) {
|
| maxBottom = bottom;
|
| @@ -474,15 +521,19 @@ Range findFirstLine(Range section, String prop) {
|
| firstLine.setEndAfter(n);
|
| }
|
|
|
| - if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) {
|
| + // If the first line of text in the section does not contain the property
|
| + // name then we're not confident we are able to extract a high accuracy match
|
| + // so we should not return anything.
|
| + if (!firstLine.toString().contains(stripWebkit(prop))) {
|
| return null;
|
| }
|
| return firstLine;
|
| }
|
|
|
| +/** Find child anchor elements that contain the text [prop]. */
|
| AnchorElement findAnchorElement(Element root, String prop) {
|
| for (AnchorElement a in root.queryAll("a")) {
|
| - if (a.text.indexOf(prop) != -1) {
|
| + if (a.text.contains(prop)) {
|
| return a;
|
| }
|
| }
|
| @@ -490,9 +541,9 @@ AnchorElement findAnchorElement(Element root, String prop) {
|
| }
|
|
|
| // First surrounding element with an ID is safe enough.
|
| -Element findTigherRoot(Element elem, Element root) {
|
| +Element findTighterRoot(Element elem, Element root) {
|
| Element candidate = elem;
|
| - while(root != candidate) {
|
| + while (root != candidate) {
|
| candidate = candidate.parent;
|
| if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {
|
| break;
|
| @@ -501,22 +552,22 @@ Element findTigherRoot(Element elem, Element root) {
|
| return candidate;
|
| }
|
|
|
| -// this is very slow and ugly.. consider rewriting.
|
| +// TODO(jacobr): this is very slow and ugly.. consider rewriting or at least
|
| +// commenting carefully.
|
| SectionParseResult filteredHtml(Element elem, Element root, String prop,
|
| Function fragmentGeneratedCallback) {
|
| // Using a tighter root avoids false positives at the risk of trimming
|
| // text we shouldn't.
|
| - root = findTigherRoot(elem, root);
|
| - Range range = newRange();
|
| + root = findTighterRoot(elem, root);
|
| + final range = newRange();
|
| range.setStartBefore(elem);
|
|
|
| Element current = elem;
|
| while (current != null) {
|
| range.setEndBefore(current);
|
| - if (current.classes.contains(DART_REMOVED)) {
|
| - if (range.toString().trim().length > 0) {
|
| - break;
|
| - }
|
| + if (current.classes.contains(DART_REMOVED) &&
|
| + range.toString().trim().length > 0) {
|
| + break;
|
| }
|
| if (current.firstElementChild != null) {
|
| current = current.firstElementChild;
|
| @@ -547,7 +598,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
|
| }
|
| }
|
| }
|
| - DocumentFragment fragment = range.cloneContents();
|
| + final fragment = range.cloneContents();
|
| if (fragmentGeneratedCallback != null) {
|
| fragmentGeneratedCallback(fragment);
|
| }
|
| @@ -557,7 +608,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
|
| }
|
|
|
| // Extract idl
|
| - StringBuffer idl = new StringBuffer();
|
| + final idl = new StringBuffer();
|
| if (prop != null && prop.length > 0) {
|
| // Only expect properties to have HTML.
|
| for(Element e in fragment.queryAll(IDL_SELECTOR)) {
|
| @@ -570,43 +621,46 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
|
| for (Element e in fragment.queryAll("pre")) {
|
| // Check if it looks like idl...
|
| String txt = e.text.trim();
|
| - if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1
|
| - && txt.indexOf(")") != -1) {
|
| + if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) {
|
| idl.add(e.outerHTML);
|
| e.remove();
|
| }
|
| }
|
| }
|
| - return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString());
|
| + return new SectionParseResult(genCleanHtml(fragment), url, idl.toString());
|
| }
|
|
|
| -Element findBest(Element root, List<Text> allText, String prop, String propType) {
|
| - // Best bet: match an id
|
| - Element cand;
|
| - cand = root.query("#" + prop);
|
| +/**
|
| + * Find the best child element of [root] that appears to be an API definition
|
| + * for [prop]. [allText] is a list of all text nodes under root computed by
|
| + * the caller to improve performance.
|
| + */
|
| +Element findBest(Element root, List<Text> allText, String prop,
|
| + String propType) {
|
| + // Best bet: find a child of root where the id matches the property name.
|
| + Element cand = root.query("#$prop");
|
|
|
| if (cand == null && propType == "methods") {
|
| - cand = root.query("[id=" + prop + "\\(\\)]");
|
| + cand = root.query("[id=$prop\\(\\)]");
|
| + }
|
| + while (cand != null && cand.text.trim().length == 0) {
|
| + // We found the bookmark for the element but sadly it is just an empty
|
| + // placeholder. Find the first real element.
|
| + cand = cand.nextElementSibling;
|
| }
|
| if (cand != null) {
|
| - while (cand != null && cand.text.trim().length == 0) {
|
| - // We found the bookmark for the element but sadly it is just an empty
|
| - // placeholder. Find the first real element.
|
| - cand = cand.nextElementSibling;
|
| - }
|
| - if (cand != null) {
|
| - return cand;
|
| - }
|
| + return cand;
|
| }
|
|
|
| - // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate.
|
| + // If we are at least 70 pixels from the left, something is definitely
|
| + // fishy and we shouldn't even consider this candidate as nobody visually
|
| + // formats API docs like that.
|
| num candLeft = 70;
|
|
|
| for (Text text in allText) {
|
| Element proposed = null;
|
|
|
| -// var t = safeNameCleanup(text.text);
|
| -// TODO(jacobr): does it hurt precision to use the full cleanup?
|
| + // TODO(jacobr): does it hurt precision to use the full cleanup?
|
| String t = fullNameCleanup(text.text);
|
| if (t == prop) {
|
| proposed = text.parent;
|
| @@ -623,6 +677,10 @@ Element findBest(Element root, List<Text> allText, String prop, String propType)
|
| return cand;
|
| }
|
|
|
| +/**
|
| + * Checks whether [e] is tagged as obsolete or deprecated using heuristics
|
| + * for what these tags look like in the MDN docs.
|
| + */
|
| bool isObsolete(Element e) {
|
| RegExp obsoleteRegExp = new RegExp(@"(^|\s)obsolete(?=\s|$)");
|
| RegExp deprecatedRegExp = new RegExp(@"(^|\s)deprecated(?=\s|$)");
|
| @@ -636,40 +694,52 @@ bool isObsolete(Element e) {
|
| }
|
|
|
| bool isFirstCharLowerCase(String str) {
|
| - RegExp firstLower = new RegExp("^[a-z]");
|
| - return firstLower.hasMatch(str);
|
| + return const RegExp("^[a-z]").hasMatch(str);
|
| }
|
|
|
| -void scrapeSection(Element root, String sectionSelector,
|
| - String currentType,
|
| - List members,
|
| - String propType) {
|
| +/**
|
| + * Extracts information from a fragment of HTML only searching under the [root]
|
| + * html node. [secitonSelector] specifies the query to use to find candidate
|
| + * sections of the document to consider (there may be more than one).
|
| + * [currentType] specifies the name of the current class. [members] specifies
|
| + * the known class members for this class that we are attempting to find
|
| + * documentation for. [propType] indicates whether we are searching for
|
| + * methods, properties, constants, or constructors.
|
| + */
|
| +void scrapeSection(Element root, String sectionSelector, String currentType,
|
| + List members, String propType) {
|
| Map expectedProps = dartIdl[propType];
|
|
|
| Set<String> alreadyMatchedProperties = new Set<String>();
|
| bool onlyConsiderTables = false;
|
| ElementList allMatches = root.queryAll(sectionSelector);
|
| if (allMatches.length == 0) {
|
| + // If we can't find any matches to the sectionSelector, we fall back to
|
| + // considering all tables in the document. This is dangerous so we only
|
| + // allow the safer table matching extraction rules for this case.
|
| allMatches = root.queryAll(".fullwidth-table");
|
| onlyConsiderTables = true;
|
| }
|
| for (Element matchElement in allMatches) {
|
| - DivElement match = matchElement.parent;
|
| - if (!match.id.startsWith("section") && !(match.id == "pageText")) {
|
| - throw "Enexpected element $match";
|
| + final match = matchElement.parent;
|
| + if (!match.id.startsWith("section") && match.id != "pageText") {
|
| + throw "Unexpected element $match";
|
| }
|
| + // We don't want to later display this text a second time while for example
|
| + // displaying class level summary information as then we would display
|
| + // the same documentation twice.
|
| match.classes.add(DART_REMOVED);
|
|
|
| bool foundProps = false;
|
|
|
| // TODO(jacobr): we should really look for the table tag instead
|
| // add an assert if we are missing something that is a table...
|
| - // TODO(jacobr) ignore tables in tables....
|
| + // TODO(jacobr) ignore tables in tables.
|
| for (Element t in match.queryAll('.standard-table, .fullwidth-table')) {
|
| int helpIndex = -1;
|
| num i = 0;
|
| for (Element r in t.queryAll("th, td.header")) {
|
| - var txt = r.text.trim().split(" ")[0].toLowerCase();
|
| + final txt = r.text.trim().split(" ")[0].toLowerCase();
|
| if (txt == "description") {
|
| helpIndex = i;
|
| break;
|
| @@ -677,22 +747,23 @@ void scrapeSection(Element root, String sectionSelector,
|
| i++;
|
| }
|
|
|
| - List<int> numMatches = new List<int>(i);
|
| + // Figure out which column in the table contains member names by
|
| + // tracking how many member names each column contains.
|
| + final numMatches = new List<int>(i);
|
| for (int j = 0; j < i; j++) {
|
| numMatches[j] = 0;
|
| }
|
|
|
| - // Find the row that seems to have the most names that look like
|
| + // Find the column that seems to have the most names that look like
|
| // expected properties.
|
| for (Element r in t.queryAll("tbody tr")) {
|
| - ElementList $row = r.elements;
|
| - if ($row.length == 0 || $row.first.classes.contains(".header")) {
|
| + ElementList row = r.elements;
|
| + if (row.length == 0 || row.first.classes.contains(".header")) {
|
| continue;
|
| }
|
|
|
| - for (int k = 0; k < numMatches.length && k < $row.length; k++) {
|
| - Element e = $row[k];
|
| - if (expectedProps.containsKey(fullNameCleanup(e.text))) {
|
| + for (int k = 0; k < numMatches.length && k < row.length; k++) {
|
| + if (expectedProps.containsKey(fullNameCleanup(row[k].text))) {
|
| numMatches[k]++;
|
| break;
|
| }
|
| @@ -711,14 +782,14 @@ void scrapeSection(Element root, String sectionSelector,
|
| }
|
|
|
| for (Element r in t.queryAll("tbody tr")) {
|
| - ElementList $row = r.elements;
|
| - if ($row.length > propNameIndex && $row.length > helpIndex ) {
|
| - if ($row.first.classes.contains(".header")) {
|
| + final row = r.elements;
|
| + if (row.length > propNameIndex && row.length > helpIndex) {
|
| + if (row.first.classes.contains(".header")) {
|
| continue;
|
| }
|
| // TODO(jacobr): this code for determining the namestr is needlessly
|
| // messy.
|
| - Element nameRow = $row[propNameIndex];
|
| + final nameRow = row[propNameIndex];
|
| AnchorElement a = nameRow.query("a");
|
| String goodName = '';
|
| if (a != null) {
|
| @@ -728,15 +799,14 @@ void scrapeSection(Element root, String sectionSelector,
|
|
|
| Map entry = new Map<String, String>();
|
|
|
| - // "currentType": $($row[1]).text().trim(), // find("code") ?
|
| - entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodName);
|
| + entry["name"] = fullNameCleanup(nameStr.length > 0 ?
|
| + nameStr : goodName);
|
|
|
| final parse = filteredHtml(nameRow, nameRow, entry["name"], null);
|
| String altHelp = parse.html;
|
|
|
| - // "jsSignature": nameStr,
|
| - entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]);
|
| - // "altHelp" : altHelp,
|
| + entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ?
|
| + altHelp : genPrettyHtmlFromElement(row[helpIndex]);
|
| if (parse.url != null) {
|
| entry["url"] = parse.url;
|
| }
|
| @@ -759,41 +829,71 @@ void scrapeSection(Element root, String sectionSelector,
|
| if (onlyConsiderTables) {
|
| continue;
|
| }
|
| +
|
| // After this point we have higher risk tests that attempt to perform
|
| - // rudimentary page segmentation.
|
| + // rudimentary page segmentation. This approach is much more error-prone
|
| + // than using tables because the HTML is far less clearly structured.
|
|
|
| - // Search for expected matching names.
|
| - List<Text> allText = getAllTextNodes(match);
|
| + final allText = getAllTextNodes(match);
|
|
|
| - Map<String, Element> pmap = new Map<String, Element>();
|
| - for (String prop in expectedProps.getKeys()) {
|
| + final pmap = new Map<String, Element>();
|
| + for (final prop in expectedProps.getKeys()) {
|
| if (alreadyMatchedProperties.contains(prop)) {
|
| continue;
|
| }
|
| - Element e = findBest(match, allText, prop, propType);
|
| + final e = findBest(match, allText, prop, propType);
|
| if (e != null && !inTable(e)) {
|
| pmap[prop] = e;
|
| }
|
| }
|
|
|
| - for (String prop in pmap.getKeys()) {
|
| - Element e = pmap[prop];
|
| - e.classes.add(DART_REMOVED);
|
| + for (final prop in pmap.getKeys()) {
|
| + pmap[prop].classes.add(DART_REMOVED);
|
| }
|
|
|
| + // The problem is the MDN docs do place documentation for each method in a
|
| + // nice self contained subtree. Instead you will see something like:
|
| +
|
| + // <h3>drawImage</h3>
|
| + // <p>Draw image is an awesome method</p>
|
| + // some more info on drawImage here
|
| + // <h3>mozDrawWindow</h3>
|
| + // <p>This API cannot currently be used by Web content.
|
| + // It is chrome only.</p>
|
| + // <h3>drawRect</h3>
|
| + // <p>Always call drawRect instead of drawImage</p>
|
| + // some more info on drawRect here...
|
| +
|
| + // The trouble is we will easily detect that the drawImage and drawRect
|
| + // entries are method definitions because we know to search for these
|
| + // method names but we will not detect that mozDrawWindow is a method
|
| + // definition as that method doesn't exist in our IDL. Thus if we are not
|
| + // careful the definition for the drawImage method will contain the
|
| + // definition for the mozDrawWindow method as well which would result in
|
| + // broken docs. We solve this problem by finding all content with similar
|
| + // visual structure to the already found method definitions. It turns out
|
| + // that using the visual position of each element on the page is much
|
| + // more reliable than using the DOM structure
|
| + // (e.g. section_root > div > h3) for the MDN docs because MDN authors
|
| + // carefully check that the documentation for each method comment is
|
| + // visually consistent but take less care to check that each
|
| + // method comment has identical markup structure.
|
| for (String prop in pmap.getKeys()) {
|
| Element e = pmap[prop];
|
| ClientRect r = getClientRect(e);
|
| - // TODO(jacobr): a lot of these queries are identical.
|
| - for (Element cand in match.queryAll(e.tagName)) {
|
| - if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX use a neg selector.
|
| - ClientRect candRect = getClientRect(cand);
|
| - // TODO(jacobr): this is somewhat loose.
|
| + // TODO(jacobr): a lot of these queries are identical and this code
|
| + // could easily be optimized.
|
| + for (final cand in match.queryAll(e.tagName)) {
|
| + // TODO(jacobr): use a negative selector instead.
|
| + if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) {
|
| + final candRect = getClientRect(cand);
|
| + // Only consider matches that have similar heights and identical left
|
| + // coordinates.
|
| if (candRect.left == r.left &&
|
| (candRect.height - r.height).abs() < 5) {
|
| String propName = fullNameCleanup(cand.text);
|
| - if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) {
|
| - // Don't set here to avoid layouts... cand.classes.add(DART_REMOVED);
|
| + if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName)
|
| + && !alreadyMatchedProperties.contains(propName)) {
|
| pmap[propName] = cand;
|
| }
|
| }
|
| @@ -801,6 +901,9 @@ void scrapeSection(Element root, String sectionSelector,
|
| }
|
| }
|
|
|
| + // We mark these elements in batch to reduce the number of layouts
|
| + // triggered. TODO(jacobr): use new batch based async measurement to make
|
| + // this code flow simpler.
|
| for (String prop in pmap.getKeys()) {
|
| Element e = pmap[prop];
|
| e.classes.add(DART_REMOVED);
|
| @@ -810,7 +913,7 @@ void scrapeSection(Element root, String sectionSelector,
|
| // DART_REMOVED so we don't include them in member descriptions... which
|
| // would suck.
|
| for (Element e in match.queryAll("[id]")) {
|
| - if (e.id.indexOf(matchElement.id) != -1) {
|
| + if (e.id.contains(matchElement.id)) {
|
| e.classes.add(DART_REMOVED);
|
| }
|
| }
|
| @@ -828,7 +931,6 @@ void scrapeSection(Element root, String sectionSelector,
|
| "name" : prop,
|
| "help" : parse.html,
|
| "obsolete" : obsolete
|
| - //"jsSignature" : nameStr
|
| };
|
| if (parse.idl.length > 0) {
|
| entry["idl"] = parse.idl;
|
| @@ -839,20 +941,19 @@ void scrapeSection(Element root, String sectionSelector,
|
| }
|
|
|
| String trimHtml(String html) {
|
| - // TODO(jacobr): impl.
|
| + // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc.
|
| return html;
|
| }
|
|
|
| bool maybeName(String name) {
|
| - RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$");
|
| - if (nameRegExp.hasMatch(name)) return true;
|
| - RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$");
|
| - if (constRegExp.hasMatch(name)) return true;
|
| + return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) ||
|
| + const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name);
|
| }
|
|
|
| +// TODO(jacobr): this element is ugly at the moment but will become easier to
|
| +// read once ElementList supports most of the Element functionality.
|
| void markRemoved(var e) {
|
| if (e != null) {
|
| - // TODO( remove)
|
| if (e is Element) {
|
| e.classes.add(DART_REMOVED);
|
| } else {
|
| @@ -863,25 +964,23 @@ void markRemoved(var e) {
|
| }
|
| }
|
|
|
| +// TODO(jacobr): remove this when the dartium JSON parser handles \n correctly.
|
| String JSONFIXUPHACK(String value) {
|
| return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX");
|
| }
|
|
|
| String mozToWebkit(String name) {
|
| - RegExp regExp = new RegExp("^moz");
|
| - name = name.replaceFirst(regExp, "webkit");
|
| - return name;
|
| + return name.replaceFirst(const RegExp("^moz"), "webkit");
|
| }
|
|
|
| String stripWebkit(String name) {
|
| return trimPrefix(name, "webkit");
|
| }
|
|
|
| +// TODO(jacobr): be more principled about this.
|
| String fullNameCleanup(String name) {
|
| int parenIndex = name.indexOf('(');
|
| if (parenIndex != -1) {
|
| - // TODO(jacobr): workaround bug in:
|
| - // name = name.split("(")[0];
|
| name = name.substring(0, parenIndex);
|
| }
|
| name = name.split(" ")[0];
|
| @@ -893,8 +992,8 @@ String fullNameCleanup(String name) {
|
| return name;
|
| }
|
|
|
| -// Less agressive than the full cleanup to avoid overeager matching of
|
| -// everytyhing
|
| +// Less agressive than the full name cleanup to avoid overeager matching.
|
| +// TODO(jacobr): be more principled about this.
|
| String safeNameCleanup(String name) {
|
| int parenIndex = name.indexOf('(');
|
| if (parenIndex != -1 && name.indexOf(")") != -1) {
|
| @@ -914,12 +1013,20 @@ String safeNameCleanup(String name) {
|
| return name;
|
| }
|
|
|
| +/**
|
| + * Remove h1, h2, and h3 headers.
|
| + */
|
| void removeHeaders(DocumentFragment fragment) {
|
| for (Element e in fragment.queryAll("h1, h2, h3")) {
|
| e.remove();
|
| }
|
| }
|
|
|
| +/**
|
| + * Given an [entry] representing a single method or property cleanup the
|
| + * values performing some simple normalization and only adding the entry to
|
| + * [members] if it has a valid name.
|
| + */
|
| void cleanupEntry(List members, Map entry) {
|
| if (entry.containsKey('help')) {
|
| entry['help'] = trimHtml(entry['help']);
|
| @@ -950,10 +1057,6 @@ String trimPrefix(String str, String prefix) {
|
| }
|
| }
|
|
|
| -void resourceLoaded() {
|
| - if (data != null) run();
|
| -}
|
| -
|
| String trimStart(String str, String start) {
|
| if (str.startsWith(start) && str.length > start.length) {
|
| return str.substring(start.length);
|
| @@ -968,6 +1071,10 @@ String trimEnd(String str, String end) {
|
| return str;
|
| }
|
|
|
| +/**
|
| + * Extract a section with name [key] using [selector] to find start points for
|
| + * the section in the document.
|
| + */
|
| void extractSection(String selector, String key) {
|
| for (Element e in document.queryAll(selector)) {
|
| e = e.parent;
|
| @@ -987,7 +1094,9 @@ void extractSection(String selector, String key) {
|
| }
|
|
|
| void run() {
|
| - // Inject CSS to insure lines don't wrap unless it was intentional.
|
| + // Inject CSS to ensure lines don't wrap unless they were intended to.
|
| + // This is needed to make the logic to determine what is a single line
|
| + // behave consistently even for very long method names.
|
| document.head.nodes.add(new Element.html("""
|
| <style type="text/css">
|
| body {
|
| @@ -1000,13 +1109,15 @@ void run() {
|
|
|
| // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best
|
| // place to do this...
|
| + // TODO(jacobr): move this to right before we extract HTML.
|
|
|
| // Remove the "Introduced in HTML <version>" boxes.
|
| for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) {
|
| e.remove();
|
| }
|
|
|
| - // Flatten the list of known DOM types into a faster and case-insensitive map.
|
| + // Flatten the list of known DOM types into a faster and case-insensitive
|
| + // map.
|
| domTypes = {};
|
| for (final domType in domTypesRaw) {
|
| domTypes[domType.toLowerCase()] = domType;
|
| @@ -1024,7 +1135,8 @@ void run() {
|
| // TODO(rnystrom): Add rel external to links we didn't fix.
|
| for (AnchorElement a in document.queryAll('a')) {
|
| // Get the raw attribute because we *don't* want the browser to fully-
|
| - // qualify the name for us since it has the wrong base address for the page.
|
| + // qualify the name for us since it has the wrong base address for the
|
| + // page.
|
| var href = a.attributes['href'];
|
|
|
| // Ignore busted links.
|
| @@ -1070,20 +1182,22 @@ void run() {
|
| a.attributes['href'] = href;
|
| }
|
|
|
| - if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) {
|
| + if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) {
|
| bool foundMatch = false;
|
| // Test out if the title is really an HTML tag that matches the
|
| // current class name.
|
| for (String tag in [title.split(" ")[0], title.split(".").last()]) {
|
| try {
|
| dom.Element element = dom.document.createElement(tag);
|
| + // TODO(jacobr): this is a really ugly way of doing this that will
|
| + // stop working at some point soon.
|
| if (element.typeName == currentType) {
|
| foundMatch = true;
|
| break;
|
| }
|
| } catch(e) {}
|
| }
|
| - if (foundMatch == false) {
|
| + if (!foundMatch) {
|
| dbEntry['skipped'] = true;
|
| dbEntry['cause'] = "Suspect title";
|
| onEnd();
|
| @@ -1101,6 +1215,9 @@ void run() {
|
| markRemoved(root.query("#Notes"));
|
| List members = dbEntry['members'];
|
|
|
| + // This is a laundry list of CSS selectors for boilerplate content on the
|
| + // MDN pages that we should ignore for the purposes of extracting
|
| + // documentation.
|
| markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar"));
|
| markRemoved(document.queryAll("#article-nav"));
|
| markRemoved(document.queryAll(".hideforedit"));
|
| @@ -1109,31 +1226,33 @@ void run() {
|
| markRemoved(document.queryAll("h1, h2"));
|
|
|
| scrapeSection(root, "#Methods", currentType, members, 'methods');
|
| - scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants');
|
| + scrapeSection(root, "#Constants, #Error_codes, #State_constants",
|
| + currentType, members, 'constants');
|
| // TODO(jacobr): infer tables based on multiple matches rather than
|
| // using a hard coded list of section ids.
|
| scrapeSection(root,
|
| - "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_properties, #Event_handlers, #Event_Handlers",
|
| + "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " +
|
| + "#DOM_properties, #Event_handlers, #Event_Handlers",
|
| currentType, members, 'properties');
|
|
|
| // Avoid doing this till now to avoid messing up the section scrape.
|
| markRemoved(document.queryAll("h3"));
|
|
|
| - ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]");
|
| + ElementList examples = root.queryAll("span[id^=example], span[id^=Example]");
|
|
|
| extractSection("#See_also", 'seeAlso');
|
| extractSection("#Specification, #Specifications", "specification");
|
| - // $("#Methods").parent().remove(); // not safe (e.g. Document)
|
|
|
| // TODO(jacobr): actually extract the constructor(s)
|
| extractSection("#Constructor, #Constructors", 'constructor');
|
| extractSection("#Browser_compatibility, #Compatibility", 'compatibility');
|
|
|
| + // Extract examples.
|
| List<String> exampleHtml = [];
|
| - for (Element e in $examples) {
|
| + for (Element e in examples) {
|
| e.classes.add(DART_REMOVED);
|
| }
|
| - for (Element e in $examples) {
|
| + for (Element e in examples) {
|
| String html = filteredHtml(e, root, null,
|
| (DocumentFragment fragment) {
|
| removeHeaders(fragment);
|
| @@ -1150,8 +1269,10 @@ void run() {
|
| dbEntry['examples'] = exampleHtml;
|
| }
|
|
|
| + // Extract the class summary.
|
| + // Basically everything left over after the #Summary or #Description tag is
|
| + // safe to include in the summary.
|
| StringBuffer summary = new StringBuffer();
|
| -
|
| for (Element e in root.queryAll("#Summary, #Description")) {
|
| summary.add(filteredHtml(root, e, null, removeHeaders).html);
|
| }
|
| @@ -1176,6 +1297,7 @@ void run() {
|
| }
|
|
|
| // Inject CSS to aid debugging in the browser.
|
| + // We could avoid doing this if we know we are not running in a browser..
|
| document.head.nodes.add(new Element.html(DEBUG_CSS));
|
|
|
| onEnd();
|
| @@ -1186,9 +1308,11 @@ void main() {
|
| }
|
|
|
| void documentLoaded(event) {
|
| + // Load the database of expected methods and properties with an
|
| + // XMLHttpRequest.
|
| new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) {
|
| data = JSON.parse(req.responseText);
|
| dbEntry = {'members': [], 'srcUrl': pageUrl};
|
| - resourceLoaded();
|
| + run();
|
| });
|
| }
|
|
|