Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Unified Diff: utils/apidoc/mdn/extract.dart

Issue 9315026: Cleanup mdn scripts (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Code review fixes Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « utils/apidoc/mdn/crawl.js ('k') | utils/apidoc/mdn/extract.sh » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: utils/apidoc/mdn/extract.dart
diff --git a/utils/apidoc/mdn/extract.dart b/utils/apidoc/mdn/extract.dart
index 17627054da495a2b004b937e84d0593364b0fe29..4f143836163038db7a2e2a164fc49428b15c7c62 100644
--- a/utils/apidoc/mdn/extract.dart
+++ b/utils/apidoc/mdn/extract.dart
@@ -14,8 +14,7 @@ Range _tempRange;
// Hacks because ASYNC measurement is annoying when just writing a script.
ClientRect getClientRect(Node n) {
if (n is Element) {
- Element e = n;
- dom.Element raw = unwrapDomObject(e.dynamic);
+ dom.Element raw = unwrapDomObject(n.dynamic);
return LevelDom.wrapClientRect(raw.getBoundingClientRect());
} else {
// Crazy hacks that works for nodes.... create a range and measure it.
@@ -28,11 +27,18 @@ ClientRect getClientRect(Node n) {
}
}
-final DART_REMOVED = "dart_removed";
+/**
+ * CSS class that is added to elements in the DOM to indicate that they should
+ * be removed when extracting blocks of documentation. This is helpful when
+ * running this script in a web browser as it is easy to visually see what
+ * blocks of information were extracted when using CSS such as DEBUG_CSS
+ * which highlights elements that should be removed.
+ */
+final DART_REMOVED = "dart-removed";
final DEBUG_CSS = """
<style type="text/css">
- .dart_removed {
+ .dart-removed {
background-color: rgba(255, 0, 0, 0.5);
}
</style>""";
@@ -281,7 +287,7 @@ String getAbsoluteUrl(AnchorElement anchor) {
}
bool inTable(Node n) {
- while(n != null) {
+ while (n != null) {
if (n is TableElement) return true;
n = n.parent;
}
@@ -295,7 +301,7 @@ String escapeHTML(str) {
}
List<Text> getAllTextNodes(Element elem) {
- List<Text> nodes = <Text>[];
+ final nodes = <Text>[];
helper(Node n) {
if (n is Text) {
nodes.add(n);
@@ -323,8 +329,8 @@ bool isSkippableType(Node n) {
}
if (n is Text) return true;
- for (Node child in n.nodes) {
- if (isSkippableType(child) == false) {
+ for (final child in n.nodes) {
+ if (!isSkippableType(child)) {
return false;
}
}
@@ -342,6 +348,8 @@ void onEnd() {
// workaround bug in JSON parser.
dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n");
+ // Use postMessage to end the JSON to JavaScript. TODO(jacobr): use a simple
+ // isolate based Dart-JS interop solution in the future.
window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*");
}
@@ -353,44 +361,81 @@ class SectionParseResult {
}
String genCleanHtml(Element root) {
- for (Element e in root.queryAll(".$DART_REMOVED")) {
+ for (final e in root.queryAll(".$DART_REMOVED")) {
e.classes.remove(DART_REMOVED);
}
// Ditch inline styles.
- for (Element e in root.queryAll('[style]')) {
+ for (final e in root.queryAll('[style]')) {
e.attributes.remove('style');
}
// These elements are just tags that we should suppress.
- for (Element e in root.queryAll(".lang.lang-en")) {
+ for (final e in root.queryAll(".lang.lang-en")) {
e.remove();
}
+ Element parametersHeader;
+ Element returnValueHeader;
+ for (final e in root.queryAll("h6")) {
+ if (e.text == 'Parameters') {
+ parametersHeader = e;
+ } else if (e.text == 'Return value') {
+ returnValueHeader = e;
+ }
+ }
+
+ if (parametersHeader != null) {
+ int numEmptyParameters = 0;
+ final parameterDescriptions = root.queryAll("dd");
+ for (Element parameterDescription in parameterDescriptions) {
+ if (parameterDescription.text.trim().length == 0) {
+ numEmptyParameters++;
+ }
+ }
+ if (numEmptyParameters > 0 &&
+ numEmptyParameters == parameterDescriptions.length) {
+ // Remove the parameter list as it adds zero value as all descriptions
+ // are empty.
+ parametersHeader.remove();
+ for (final e in root.queryAll("dl")) {
+ e.remove();
+ }
+ } else if (parameterDescriptions.length == 0 &&
+ parametersHeader.nextElementSibling != null &&
+ parametersHeader.nextElementSibling.text.trim() == 'None.') {
+ // No need to display that the function takes 0 parameters.
+ parametersHeader.nextElementSibling.remove();
+ parametersHeader.remove();
+ }
+ }
+
+ // Heuristic: if the return value is a single word it is a type name not a
+ // useful text description so suppress it.
+ if (returnValueHeader != null &&
+ returnValueHeader.nextElementSibling != null &&
+ returnValueHeader.nextElementSibling.text.trim().split(' ').length < 2) {
+ returnValueHeader.nextElementSibling.remove();
+ returnValueHeader.remove();
+ }
+
bool changed = true;
while (changed) {
changed = false;
- while (root.nodes.length == 1) {
- Node child = root.nodes.first;
- if (child is Element) {
- root = child;
- changed = true;
- } else {
- // Just calling innerHTML on the parent will be sufficient...
- // and insures the output is properly escaped.
- break;
- }
+ while (root.nodes.length == 1 && root.nodes.first is Element) {
+ root = root.nodes.first;
+ changed = true;
}
// Trim useless nodes from the front.
- while(root.nodes.length > 0 &&
+ while (root.nodes.length > 0 &&
isSkippable(root.nodes.first)) {
root.nodes.first.remove();
changed = true;
}
// Trim useless nodes from the back.
- while(root.nodes.length > 0 &&
+ while (root.nodes.length > 0 &&
isSkippable(root.nodes.last())) {
root.nodes.last().remove();
changed = true;
@@ -399,10 +444,6 @@ String genCleanHtml(Element root) {
return JSONFIXUPHACK(root.innerHTML);
}
-String genPrettyHtml(DocumentFragment fragment) {
- return genCleanHtml(fragment);
-}
-
String genPrettyHtmlFromElement(Element e) {
e = e.clone(true);
return genCleanHtml(e);
@@ -420,7 +461,7 @@ class PostOrderTraversalIterator implements Iterator<Node> {
Node next() {
if (_next == null) return null;
- Node ret = _next;
+ final ret = _next;
if (_next.nextNode != null) {
_next = _leftMostDescendent(_next.nextNode);
} else {
@@ -444,12 +485,19 @@ class PostOrderTraversal implements Iterable<Node> {
Iterator<Node> iterator() => new PostOrderTraversalIterator(_node);
}
+/**
+ * Estimate what content represents the first line of text within the [section]
+ * range returning null if there isn't a plausible first line of text that
+ * contains the string [prop]. We measure the actual rendered client rectangle
+ * for the text and use heuristics defining how many pixels text can vary by
+ * and still be viewed as being on the same line.
+ */
Range findFirstLine(Range section, String prop) {
- Range firstLine = newRange();
+ final firstLine = newRange();
firstLine.setStart(section.startContainer, section.startOffset);
num maxBottom = null;
- for (Node n in new PostOrderTraversal(section.startContainer)) {
+ for (final n in new PostOrderTraversal(section.startContainer)) {
int compareResult = section.comparePoint(n, 0);
if (compareResult == -1) {
// before range so skip.
@@ -462,9 +510,8 @@ Range findFirstLine(Range section, String prop) {
final rect = getClientRect(n);
num bottom = rect.bottom;
if (rect.height > 0 && rect.width > 0) {
- if (maxBottom != null && (
- maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom
- )) {
+ if (maxBottom != null &&
+ maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) {
break;
} else if (maxBottom == null || maxBottom > bottom) {
maxBottom = bottom;
@@ -474,15 +521,19 @@ Range findFirstLine(Range section, String prop) {
firstLine.setEndAfter(n);
}
- if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) {
+ // If the first line of text in the section does not contain the property
+ // name then we're not confident we are able to extract a high accuracy match
+ // so we should not return anything.
+ if (!firstLine.toString().contains(stripWebkit(prop))) {
return null;
}
return firstLine;
}
+/** Find child anchor elements that contain the text [prop]. */
AnchorElement findAnchorElement(Element root, String prop) {
for (AnchorElement a in root.queryAll("a")) {
- if (a.text.indexOf(prop) != -1) {
+ if (a.text.contains(prop)) {
return a;
}
}
@@ -490,9 +541,9 @@ AnchorElement findAnchorElement(Element root, String prop) {
}
// First surrounding element with an ID is safe enough.
-Element findTigherRoot(Element elem, Element root) {
+Element findTighterRoot(Element elem, Element root) {
Element candidate = elem;
- while(root != candidate) {
+ while (root != candidate) {
candidate = candidate.parent;
if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {
break;
@@ -501,22 +552,22 @@ Element findTigherRoot(Element elem, Element root) {
return candidate;
}
-// this is very slow and ugly.. consider rewriting.
+// TODO(jacobr): this is very slow and ugly.. consider rewriting or at least
+// commenting carefully.
SectionParseResult filteredHtml(Element elem, Element root, String prop,
Function fragmentGeneratedCallback) {
// Using a tighter root avoids false positives at the risk of trimming
// text we shouldn't.
- root = findTigherRoot(elem, root);
- Range range = newRange();
+ root = findTighterRoot(elem, root);
+ final range = newRange();
range.setStartBefore(elem);
Element current = elem;
while (current != null) {
range.setEndBefore(current);
- if (current.classes.contains(DART_REMOVED)) {
- if (range.toString().trim().length > 0) {
- break;
- }
+ if (current.classes.contains(DART_REMOVED) &&
+ range.toString().trim().length > 0) {
+ break;
}
if (current.firstElementChild != null) {
current = current.firstElementChild;
@@ -547,7 +598,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
}
}
}
- DocumentFragment fragment = range.cloneContents();
+ final fragment = range.cloneContents();
if (fragmentGeneratedCallback != null) {
fragmentGeneratedCallback(fragment);
}
@@ -557,7 +608,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
}
// Extract idl
- StringBuffer idl = new StringBuffer();
+ final idl = new StringBuffer();
if (prop != null && prop.length > 0) {
// Only expect properties to have HTML.
for(Element e in fragment.queryAll(IDL_SELECTOR)) {
@@ -570,43 +621,46 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
for (Element e in fragment.queryAll("pre")) {
// Check if it looks like idl...
String txt = e.text.trim();
- if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1
- && txt.indexOf(")") != -1) {
+ if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) {
idl.add(e.outerHTML);
e.remove();
}
}
}
- return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString());
+ return new SectionParseResult(genCleanHtml(fragment), url, idl.toString());
}
-Element findBest(Element root, List<Text> allText, String prop, String propType) {
- // Best bet: match an id
- Element cand;
- cand = root.query("#" + prop);
+/**
+ * Find the best child element of [root] that appears to be an API definition
+ * for [prop]. [allText] is a list of all text nodes under root computed by
+ * the caller to improve performance.
+ */
+Element findBest(Element root, List<Text> allText, String prop,
+ String propType) {
+ // Best bet: find a child of root where the id matches the property name.
+ Element cand = root.query("#$prop");
if (cand == null && propType == "methods") {
- cand = root.query("[id=" + prop + "\\(\\)]");
+ cand = root.query("[id=$prop\\(\\)]");
+ }
+ while (cand != null && cand.text.trim().length == 0) {
+ // We found the bookmark for the element but sadly it is just an empty
+ // placeholder. Find the first real element.
+ cand = cand.nextElementSibling;
}
if (cand != null) {
- while (cand != null && cand.text.trim().length == 0) {
- // We found the bookmark for the element but sadly it is just an empty
- // placeholder. Find the first real element.
- cand = cand.nextElementSibling;
- }
- if (cand != null) {
- return cand;
- }
+ return cand;
}
- // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate.
+ // If we are at least 70 pixels from the left, something is definitely
+ // fishy and we shouldn't even consider this candidate as nobody visually
+ // formats API docs like that.
num candLeft = 70;
for (Text text in allText) {
Element proposed = null;
-// var t = safeNameCleanup(text.text);
-// TODO(jacobr): does it hurt precision to use the full cleanup?
+ // TODO(jacobr): does it hurt precision to use the full cleanup?
String t = fullNameCleanup(text.text);
if (t == prop) {
proposed = text.parent;
@@ -623,6 +677,10 @@ Element findBest(Element root, List<Text> allText, String prop, String propType)
return cand;
}
+/**
+ * Checks whether [e] is tagged as obsolete or deprecated using heuristics
+ * for what these tags look like in the MDN docs.
+ */
bool isObsolete(Element e) {
RegExp obsoleteRegExp = new RegExp(@"(^|\s)obsolete(?=\s|$)");
RegExp deprecatedRegExp = new RegExp(@"(^|\s)deprecated(?=\s|$)");
@@ -636,40 +694,52 @@ bool isObsolete(Element e) {
}
bool isFirstCharLowerCase(String str) {
- RegExp firstLower = new RegExp("^[a-z]");
- return firstLower.hasMatch(str);
+ return const RegExp("^[a-z]").hasMatch(str);
}
-void scrapeSection(Element root, String sectionSelector,
- String currentType,
- List members,
- String propType) {
+/**
+ * Extracts information from a fragment of HTML only searching under the [root]
+ * html node. [secitonSelector] specifies the query to use to find candidate
+ * sections of the document to consider (there may be more than one).
+ * [currentType] specifies the name of the current class. [members] specifies
+ * the known class members for this class that we are attempting to find
+ * documentation for. [propType] indicates whether we are searching for
+ * methods, properties, constants, or constructors.
+ */
+void scrapeSection(Element root, String sectionSelector, String currentType,
+ List members, String propType) {
Map expectedProps = dartIdl[propType];
Set<String> alreadyMatchedProperties = new Set<String>();
bool onlyConsiderTables = false;
ElementList allMatches = root.queryAll(sectionSelector);
if (allMatches.length == 0) {
+ // If we can't find any matches to the sectionSelector, we fall back to
+ // considering all tables in the document. This is dangerous so we only
+ // allow the safer table matching extraction rules for this case.
allMatches = root.queryAll(".fullwidth-table");
onlyConsiderTables = true;
}
for (Element matchElement in allMatches) {
- DivElement match = matchElement.parent;
- if (!match.id.startsWith("section") && !(match.id == "pageText")) {
- throw "Enexpected element $match";
+ final match = matchElement.parent;
+ if (!match.id.startsWith("section") && match.id != "pageText") {
+ throw "Unexpected element $match";
}
+ // We don't want to later display this text a second time while for example
+ // displaying class level summary information as then we would display
+ // the same documentation twice.
match.classes.add(DART_REMOVED);
bool foundProps = false;
// TODO(jacobr): we should really look for the table tag instead
// add an assert if we are missing something that is a table...
- // TODO(jacobr) ignore tables in tables....
+ // TODO(jacobr) ignore tables in tables.
for (Element t in match.queryAll('.standard-table, .fullwidth-table')) {
int helpIndex = -1;
num i = 0;
for (Element r in t.queryAll("th, td.header")) {
- var txt = r.text.trim().split(" ")[0].toLowerCase();
+ final txt = r.text.trim().split(" ")[0].toLowerCase();
if (txt == "description") {
helpIndex = i;
break;
@@ -677,22 +747,23 @@ void scrapeSection(Element root, String sectionSelector,
i++;
}
- List<int> numMatches = new List<int>(i);
+ // Figure out which column in the table contains member names by
+ // tracking how many member names each column contains.
+ final numMatches = new List<int>(i);
for (int j = 0; j < i; j++) {
numMatches[j] = 0;
}
- // Find the row that seems to have the most names that look like
+ // Find the column that seems to have the most names that look like
// expected properties.
for (Element r in t.queryAll("tbody tr")) {
- ElementList $row = r.elements;
- if ($row.length == 0 || $row.first.classes.contains(".header")) {
+ ElementList row = r.elements;
+ if (row.length == 0 || row.first.classes.contains(".header")) {
continue;
}
- for (int k = 0; k < numMatches.length && k < $row.length; k++) {
- Element e = $row[k];
- if (expectedProps.containsKey(fullNameCleanup(e.text))) {
+ for (int k = 0; k < numMatches.length && k < row.length; k++) {
+ if (expectedProps.containsKey(fullNameCleanup(row[k].text))) {
numMatches[k]++;
break;
}
@@ -711,14 +782,14 @@ void scrapeSection(Element root, String sectionSelector,
}
for (Element r in t.queryAll("tbody tr")) {
- ElementList $row = r.elements;
- if ($row.length > propNameIndex && $row.length > helpIndex ) {
- if ($row.first.classes.contains(".header")) {
+ final row = r.elements;
+ if (row.length > propNameIndex && row.length > helpIndex) {
+ if (row.first.classes.contains(".header")) {
continue;
}
// TODO(jacobr): this code for determining the namestr is needlessly
// messy.
- Element nameRow = $row[propNameIndex];
+ final nameRow = row[propNameIndex];
AnchorElement a = nameRow.query("a");
String goodName = '';
if (a != null) {
@@ -728,15 +799,14 @@ void scrapeSection(Element root, String sectionSelector,
Map entry = new Map<String, String>();
- // "currentType": $($row[1]).text().trim(), // find("code") ?
- entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodName);
+ entry["name"] = fullNameCleanup(nameStr.length > 0 ?
+ nameStr : goodName);
final parse = filteredHtml(nameRow, nameRow, entry["name"], null);
String altHelp = parse.html;
- // "jsSignature": nameStr,
- entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]);
- // "altHelp" : altHelp,
+ entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ?
+ altHelp : genPrettyHtmlFromElement(row[helpIndex]);
if (parse.url != null) {
entry["url"] = parse.url;
}
@@ -759,41 +829,71 @@ void scrapeSection(Element root, String sectionSelector,
if (onlyConsiderTables) {
continue;
}
+
// After this point we have higher risk tests that attempt to perform
- // rudimentary page segmentation.
+ // rudimentary page segmentation. This approach is much more error-prone
+ // than using tables because the HTML is far less clearly structured.
- // Search for expected matching names.
- List<Text> allText = getAllTextNodes(match);
+ final allText = getAllTextNodes(match);
- Map<String, Element> pmap = new Map<String, Element>();
- for (String prop in expectedProps.getKeys()) {
+ final pmap = new Map<String, Element>();
+ for (final prop in expectedProps.getKeys()) {
if (alreadyMatchedProperties.contains(prop)) {
continue;
}
- Element e = findBest(match, allText, prop, propType);
+ final e = findBest(match, allText, prop, propType);
if (e != null && !inTable(e)) {
pmap[prop] = e;
}
}
- for (String prop in pmap.getKeys()) {
- Element e = pmap[prop];
- e.classes.add(DART_REMOVED);
+ for (final prop in pmap.getKeys()) {
+ pmap[prop].classes.add(DART_REMOVED);
}
+ // The problem is the MDN docs do place documentation for each method in a
+ // nice self contained subtree. Instead you will see something like:
+
+ // <h3>drawImage</h3>
+ // <p>Draw image is an awesome method</p>
+ // some more info on drawImage here
+ // <h3>mozDrawWindow</h3>
+ // <p>This API cannot currently be used by Web content.
+ // It is chrome only.</p>
+ // <h3>drawRect</h3>
+ // <p>Always call drawRect instead of drawImage</p>
+ // some more info on drawRect here...
+
+ // The trouble is we will easily detect that the drawImage and drawRect
+ // entries are method definitions because we know to search for these
+ // method names but we will not detect that mozDrawWindow is a method
+ // definition as that method doesn't exist in our IDL. Thus if we are not
+ // careful the definition for the drawImage method will contain the
+ // definition for the mozDrawWindow method as well which would result in
+ // broken docs. We solve this problem by finding all content with similar
+ // visual structure to the already found method definitions. It turns out
+ // that using the visual position of each element on the page is much
+ // more reliable than using the DOM structure
+ // (e.g. section_root > div > h3) for the MDN docs because MDN authors
+ // carefully check that the documentation for each method comment is
+ // visually consistent but take less care to check that each
+ // method comment has identical markup structure.
for (String prop in pmap.getKeys()) {
Element e = pmap[prop];
ClientRect r = getClientRect(e);
- // TODO(jacobr): a lot of these queries are identical.
- for (Element cand in match.queryAll(e.tagName)) {
- if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX use a neg selector.
- ClientRect candRect = getClientRect(cand);
- // TODO(jacobr): this is somewhat loose.
+ // TODO(jacobr): a lot of these queries are identical and this code
+ // could easily be optimized.
+ for (final cand in match.queryAll(e.tagName)) {
+ // TODO(jacobr): use a negative selector instead.
+ if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) {
+ final candRect = getClientRect(cand);
+ // Only consider matches that have similar heights and identical left
+ // coordinates.
if (candRect.left == r.left &&
(candRect.height - r.height).abs() < 5) {
String propName = fullNameCleanup(cand.text);
- if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) {
- // Don't set here to avoid layouts... cand.classes.add(DART_REMOVED);
+ if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName)
+ && !alreadyMatchedProperties.contains(propName)) {
pmap[propName] = cand;
}
}
@@ -801,6 +901,9 @@ void scrapeSection(Element root, String sectionSelector,
}
}
+ // We mark these elements in batch to reduce the number of layouts
+ // triggered. TODO(jacobr): use new batch based async measurement to make
+ // this code flow simpler.
for (String prop in pmap.getKeys()) {
Element e = pmap[prop];
e.classes.add(DART_REMOVED);
@@ -810,7 +913,7 @@ void scrapeSection(Element root, String sectionSelector,
// DART_REMOVED so we don't include them in member descriptions... which
// would suck.
for (Element e in match.queryAll("[id]")) {
- if (e.id.indexOf(matchElement.id) != -1) {
+ if (e.id.contains(matchElement.id)) {
e.classes.add(DART_REMOVED);
}
}
@@ -828,7 +931,6 @@ void scrapeSection(Element root, String sectionSelector,
"name" : prop,
"help" : parse.html,
"obsolete" : obsolete
- //"jsSignature" : nameStr
};
if (parse.idl.length > 0) {
entry["idl"] = parse.idl;
@@ -839,20 +941,19 @@ void scrapeSection(Element root, String sectionSelector,
}
String trimHtml(String html) {
- // TODO(jacobr): impl.
+ // TODO(jacobr): implement this. Remove spurious enclosing HTML tags, etc.
return html;
}
bool maybeName(String name) {
- RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$");
- if (nameRegExp.hasMatch(name)) return true;
- RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$");
- if (constRegExp.hasMatch(name)) return true;
+ return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) ||
+ const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name);
}
+// TODO(jacobr): this element is ugly at the moment but will become easier to
+// read once ElementList supports most of the Element functionality.
void markRemoved(var e) {
if (e != null) {
- // TODO( remove)
if (e is Element) {
e.classes.add(DART_REMOVED);
} else {
@@ -863,25 +964,23 @@ void markRemoved(var e) {
}
}
+// TODO(jacobr): remove this when the dartium JSON parser handles \n correctly.
String JSONFIXUPHACK(String value) {
return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX");
}
String mozToWebkit(String name) {
- RegExp regExp = new RegExp("^moz");
- name = name.replaceFirst(regExp, "webkit");
- return name;
+ return name.replaceFirst(const RegExp("^moz"), "webkit");
}
String stripWebkit(String name) {
return trimPrefix(name, "webkit");
}
+// TODO(jacobr): be more principled about this.
String fullNameCleanup(String name) {
int parenIndex = name.indexOf('(');
if (parenIndex != -1) {
- // TODO(jacobr): workaround bug in:
- // name = name.split("(")[0];
name = name.substring(0, parenIndex);
}
name = name.split(" ")[0];
@@ -893,8 +992,8 @@ String fullNameCleanup(String name) {
return name;
}
-// Less agressive than the full cleanup to avoid overeager matching of
-// everytyhing
+// Less agressive than the full name cleanup to avoid overeager matching.
+// TODO(jacobr): be more principled about this.
String safeNameCleanup(String name) {
int parenIndex = name.indexOf('(');
if (parenIndex != -1 && name.indexOf(")") != -1) {
@@ -914,12 +1013,20 @@ String safeNameCleanup(String name) {
return name;
}
+/**
+ * Remove h1, h2, and h3 headers.
+ */
void removeHeaders(DocumentFragment fragment) {
for (Element e in fragment.queryAll("h1, h2, h3")) {
e.remove();
}
}
+/**
+ * Given an [entry] representing a single method or property cleanup the
+ * values performing some simple normalization and only adding the entry to
+ * [members] if it has a valid name.
+ */
void cleanupEntry(List members, Map entry) {
if (entry.containsKey('help')) {
entry['help'] = trimHtml(entry['help']);
@@ -950,10 +1057,6 @@ String trimPrefix(String str, String prefix) {
}
}
-void resourceLoaded() {
- if (data != null) run();
-}
-
String trimStart(String str, String start) {
if (str.startsWith(start) && str.length > start.length) {
return str.substring(start.length);
@@ -968,6 +1071,10 @@ String trimEnd(String str, String end) {
return str;
}
+/**
+ * Extract a section with name [key] using [selector] to find start points for
+ * the section in the document.
+ */
void extractSection(String selector, String key) {
for (Element e in document.queryAll(selector)) {
e = e.parent;
@@ -987,7 +1094,9 @@ void extractSection(String selector, String key) {
}
void run() {
- // Inject CSS to insure lines don't wrap unless it was intentional.
+ // Inject CSS to ensure lines don't wrap unless they were intended to.
+ // This is needed to make the logic to determine what is a single line
+ // behave consistently even for very long method names.
document.head.nodes.add(new Element.html("""
<style type="text/css">
body {
@@ -1000,13 +1109,15 @@ void run() {
// TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best
// place to do this...
+ // TODO(jacobr): move this to right before we extract HTML.
// Remove the "Introduced in HTML <version>" boxes.
for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) {
e.remove();
}
- // Flatten the list of known DOM types into a faster and case-insensitive map.
+ // Flatten the list of known DOM types into a faster and case-insensitive
+ // map.
domTypes = {};
for (final domType in domTypesRaw) {
domTypes[domType.toLowerCase()] = domType;
@@ -1024,7 +1135,8 @@ void run() {
// TODO(rnystrom): Add rel external to links we didn't fix.
for (AnchorElement a in document.queryAll('a')) {
// Get the raw attribute because we *don't* want the browser to fully-
- // qualify the name for us since it has the wrong base address for the page.
+ // qualify the name for us since it has the wrong base address for the
+ // page.
var href = a.attributes['href'];
// Ignore busted links.
@@ -1070,20 +1182,22 @@ void run() {
a.attributes['href'] = href;
}
- if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) {
+ if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) {
bool foundMatch = false;
// Test out if the title is really an HTML tag that matches the
// current class name.
for (String tag in [title.split(" ")[0], title.split(".").last()]) {
try {
dom.Element element = dom.document.createElement(tag);
+ // TODO(jacobr): this is a really ugly way of doing this that will
+ // stop working at some point soon.
if (element.typeName == currentType) {
foundMatch = true;
break;
}
} catch(e) {}
}
- if (foundMatch == false) {
+ if (!foundMatch) {
dbEntry['skipped'] = true;
dbEntry['cause'] = "Suspect title";
onEnd();
@@ -1101,6 +1215,9 @@ void run() {
markRemoved(root.query("#Notes"));
List members = dbEntry['members'];
+ // This is a laundry list of CSS selectors for boilerplate content on the
+ // MDN pages that we should ignore for the purposes of extracting
+ // documentation.
markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar"));
markRemoved(document.queryAll("#article-nav"));
markRemoved(document.queryAll(".hideforedit"));
@@ -1109,31 +1226,33 @@ void run() {
markRemoved(document.queryAll("h1, h2"));
scrapeSection(root, "#Methods", currentType, members, 'methods');
- scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants');
+ scrapeSection(root, "#Constants, #Error_codes, #State_constants",
+ currentType, members, 'constants');
// TODO(jacobr): infer tables based on multiple matches rather than
// using a hard coded list of section ids.
scrapeSection(root,
- "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_properties, #Event_handlers, #Event_Handlers",
+ "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " +
+ "#DOM_properties, #Event_handlers, #Event_Handlers",
currentType, members, 'properties');
// Avoid doing this till now to avoid messing up the section scrape.
markRemoved(document.queryAll("h3"));
- ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]");
+ ElementList examples = root.queryAll("span[id^=example], span[id^=Example]");
extractSection("#See_also", 'seeAlso');
extractSection("#Specification, #Specifications", "specification");
- // $("#Methods").parent().remove(); // not safe (e.g. Document)
// TODO(jacobr): actually extract the constructor(s)
extractSection("#Constructor, #Constructors", 'constructor');
extractSection("#Browser_compatibility, #Compatibility", 'compatibility');
+ // Extract examples.
List<String> exampleHtml = [];
- for (Element e in $examples) {
+ for (Element e in examples) {
e.classes.add(DART_REMOVED);
}
- for (Element e in $examples) {
+ for (Element e in examples) {
String html = filteredHtml(e, root, null,
(DocumentFragment fragment) {
removeHeaders(fragment);
@@ -1150,8 +1269,10 @@ void run() {
dbEntry['examples'] = exampleHtml;
}
+ // Extract the class summary.
+ // Basically everything left over after the #Summary or #Description tag is
+ // safe to include in the summary.
StringBuffer summary = new StringBuffer();
-
for (Element e in root.queryAll("#Summary, #Description")) {
summary.add(filteredHtml(root, e, null, removeHeaders).html);
}
@@ -1176,6 +1297,7 @@ void run() {
}
// Inject CSS to aid debugging in the browser.
+ // We could avoid doing this if we know we are not running in a browser..
document.head.nodes.add(new Element.html(DEBUG_CSS));
onEnd();
@@ -1186,9 +1308,11 @@ void main() {
}
void documentLoaded(event) {
+ // Load the database of expected methods and properties with an
+ // XMLHttpRequest.
new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) {
data = JSON.parse(req.responseText);
dbEntry = {'members': [], 'srcUrl': pageUrl};
- resourceLoaded();
+ run();
});
}
« no previous file with comments | « utils/apidoc/mdn/crawl.js ('k') | utils/apidoc/mdn/extract.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698