Index: utils/apidoc/mdn/extract.dart |
diff --git a/utils/apidoc/mdn/extract.dart b/utils/apidoc/mdn/extract.dart |
index 17627054da495a2b004b937e84d0593364b0fe29..58be76f7220027e04b514240b434763730e96bdb 100644 |
--- a/utils/apidoc/mdn/extract.dart |
+++ b/utils/apidoc/mdn/extract.dart |
@@ -14,8 +14,7 @@ Range _tempRange; |
// Hacks because ASYNC measurement is annoying when just writing a script. |
ClientRect getClientRect(Node n) { |
if (n is Element) { |
- Element e = n; |
- dom.Element raw = unwrapDomObject(e.dynamic); |
+ dom.Element raw = unwrapDomObject(n.dynamic); |
return LevelDom.wrapClientRect(raw.getBoundingClientRect()); |
} else { |
// Crazy hacks that works for nodes.... create a range and measure it. |
@@ -28,11 +27,18 @@ ClientRect getClientRect(Node n) { |
} |
} |
-final DART_REMOVED = "dart_removed"; |
+/** |
+ * CSS class that is added to elements in the DOM to indicate that they should |
+ * be removed when extracting blocks of documentation. This is helpful when |
+ * running this script in a web browser as it is easy to visually see what |
+ * blocks of information were extracted when using CSS such as DEBUG_CSS |
+ * which highlights elements that should be removed. |
+ */ |
+final DART_REMOVED = "dart-removed"; |
final DEBUG_CSS = """ |
<style type="text/css"> |
- .dart_removed { |
+ .dart-removed { |
background-color: rgba(255, 0, 0, 0.5); |
} |
</style>"""; |
@@ -281,7 +287,7 @@ String getAbsoluteUrl(AnchorElement anchor) { |
} |
bool inTable(Node n) { |
- while(n != null) { |
+ while (n != null) { |
if (n is TableElement) return true; |
n = n.parent; |
} |
@@ -295,7 +301,7 @@ String escapeHTML(str) { |
} |
List<Text> getAllTextNodes(Element elem) { |
- List<Text> nodes = <Text>[]; |
+ final nodes = <Text>[]; |
helper(Node n) { |
if (n is Text) { |
nodes.add(n); |
@@ -323,8 +329,8 @@ bool isSkippableType(Node n) { |
} |
if (n is Text) return true; |
- for (Node child in n.nodes) { |
- if (isSkippableType(child) == false) { |
+ for (final child in n.nodes) { |
+ if (!isSkippableType(child)) { |
return false; |
} |
} |
@@ -353,44 +359,81 @@ class SectionParseResult { |
} |
String genCleanHtml(Element root) { |
- for (Element e in root.queryAll(".$DART_REMOVED")) { |
+ for (final e in root.queryAll(".$DART_REMOVED")) { |
e.classes.remove(DART_REMOVED); |
} |
// Ditch inline styles. |
- for (Element e in root.queryAll('[style]')) { |
+ for (final e in root.queryAll('[style]')) { |
e.attributes.remove('style'); |
} |
// These elements are just tags that we should suppress. |
- for (Element e in root.queryAll(".lang.lang-en")) { |
+ for (final e in root.queryAll(".lang.lang-en")) { |
e.remove(); |
} |
+ Element parametersList; |
+ Element returnValue; |
nweiz
2012/02/02 00:16:19
Maybe "parametersHeader" and "returnHeader"? Curre
|
+ for (final e in root.queryAll("h6")) { |
+ if (e.text == 'Parameters') { |
+ parametersList = e; |
+ } else if (e.text == 'Return value') { |
+ returnValue = e; |
+ } |
+ } |
+ |
+ if (parametersList != null) { |
+ int numEmptyParameters = 0; |
+ final parameterDescriptions = root.queryAll("dd"); |
+ for (Element parameterDescription in parameterDescriptions) { |
+ if (parameterDescription.text.trim().length == 0) { |
+ numEmptyParameters++; |
nweiz
2012/02/02 00:16:19
"numEmptyParameters = parameterDescriptions.filter
|
+ } |
+ } |
+ if (numEmptyParameters > 0 && |
+ numEmptyParameters == parameterDescriptions.length) { |
+ // Remove the parameter list as it adds zero value as all descriptions |
+ // are empty. |
+ parametersList.remove(); |
+ for (final e in root.queryAll("dl")) { |
+ e.remove(); |
+ } |
+ } else if (parameterDescriptions.length == 0 && |
+ parametersList.nextElementSibling != null && |
+ parametersList.nextElementSibling.text.trim() == 'None.') { |
+ // No need to display that the function takes 0 parameters. |
+ parametersList.nextElementSibling.remove(); |
+ parametersList.remove(); |
+ } |
+ } |
+ |
+ // Heuristic: if the return value is a single word it is a type name not a |
+ // useful text description so suppress it. |
+ if (returnValue != null && |
+ returnValue.nextElementSibling != null && |
+ returnValue.nextElementSibling.text.trim().split(' ').length <= 1) { |
+ returnValue.nextElementSibling.remove(); |
+ returnValue.remove(); |
+ } |
+ |
bool changed = true; |
while (changed) { |
changed = false; |
- while (root.nodes.length == 1) { |
- Node child = root.nodes.first; |
- if (child is Element) { |
- root = child; |
- changed = true; |
- } else { |
- // Just calling innerHTML on the parent will be sufficient... |
- // and insures the output is properly escaped. |
- break; |
- } |
+ while (root.nodes.length == 1 && root.nodes.first is Element) { |
+ root = root.nodes.first; |
+ changed = true; |
} |
// Trim useless nodes from the front. |
- while(root.nodes.length > 0 && |
+ while (root.nodes.length > 0 && |
isSkippable(root.nodes.first)) { |
root.nodes.first.remove(); |
changed = true; |
} |
// Trim useless nodes from the back. |
- while(root.nodes.length > 0 && |
+ while (root.nodes.length > 0 && |
isSkippable(root.nodes.last())) { |
root.nodes.last().remove(); |
changed = true; |
@@ -399,10 +442,6 @@ String genCleanHtml(Element root) { |
return JSONFIXUPHACK(root.innerHTML); |
} |
-String genPrettyHtml(DocumentFragment fragment) { |
- return genCleanHtml(fragment); |
-} |
- |
String genPrettyHtmlFromElement(Element e) { |
e = e.clone(true); |
return genCleanHtml(e); |
@@ -420,7 +459,7 @@ class PostOrderTraversalIterator implements Iterator<Node> { |
Node next() { |
if (_next == null) return null; |
- Node ret = _next; |
+ final ret = _next; |
if (_next.nextNode != null) { |
_next = _leftMostDescendent(_next.nextNode); |
} else { |
@@ -445,11 +484,11 @@ class PostOrderTraversal implements Iterable<Node> { |
} |
Range findFirstLine(Range section, String prop) { |
- Range firstLine = newRange(); |
+ final firstLine = newRange(); |
firstLine.setStart(section.startContainer, section.startOffset); |
num maxBottom = null; |
- for (Node n in new PostOrderTraversal(section.startContainer)) { |
+ for (final n in new PostOrderTraversal(section.startContainer)) { |
int compareResult = section.comparePoint(n, 0); |
if (compareResult == -1) { |
// before range so skip. |
@@ -462,9 +501,8 @@ Range findFirstLine(Range section, String prop) { |
final rect = getClientRect(n); |
num bottom = rect.bottom; |
if (rect.height > 0 && rect.width > 0) { |
- if (maxBottom != null && ( |
- maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom |
- )) { |
+ if (maxBottom != null && |
+ maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) { |
break; |
} else if (maxBottom == null || maxBottom > bottom) { |
maxBottom = bottom; |
@@ -474,7 +512,10 @@ Range findFirstLine(Range section, String prop) { |
firstLine.setEndAfter(n); |
} |
- if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) { |
+ // If the first line of text in the section does not contain the property |
+ // name then we're not confident we are able to extract a high accuracy match |
+ // so we should not return anything. |
+ if (!firstLine.toString().contains(stripWebkit(prop))) { |
return null; |
} |
return firstLine; |
@@ -482,7 +523,7 @@ Range findFirstLine(Range section, String prop) { |
AnchorElement findAnchorElement(Element root, String prop) { |
for (AnchorElement a in root.queryAll("a")) { |
- if (a.text.indexOf(prop) != -1) { |
+ if (a.text.contains(prop)) { |
return a; |
} |
} |
@@ -490,9 +531,9 @@ AnchorElement findAnchorElement(Element root, String prop) { |
} |
// First surrounding element with an ID is safe enough. |
-Element findTigherRoot(Element elem, Element root) { |
+Element findTighterRoot(Element elem, Element root) { |
Element candidate = elem; |
- while(root != candidate) { |
+ while (root != candidate) { |
candidate = candidate.parent; |
if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { |
break; |
@@ -501,22 +542,22 @@ Element findTigherRoot(Element elem, Element root) { |
return candidate; |
} |
-// this is very slow and ugly.. consider rewriting. |
+// TODO(jacobr): this is very slow and ugly.. consider rewriting or at least |
+// commenting carefully. |
SectionParseResult filteredHtml(Element elem, Element root, String prop, |
Function fragmentGeneratedCallback) { |
// Using a tighter root avoids false positives at the risk of trimming |
// text we shouldn't. |
- root = findTigherRoot(elem, root); |
- Range range = newRange(); |
+ root = findTighterRoot(elem, root); |
+ final range = newRange(); |
range.setStartBefore(elem); |
Element current = elem; |
while (current != null) { |
range.setEndBefore(current); |
- if (current.classes.contains(DART_REMOVED)) { |
- if (range.toString().trim().length > 0) { |
- break; |
- } |
+ if (current.classes.contains(DART_REMOVED) && |
+ range.toString().trim().length > 0) { |
+ break; |
} |
if (current.firstElementChild != null) { |
current = current.firstElementChild; |
@@ -547,7 +588,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop, |
} |
} |
} |
- DocumentFragment fragment = range.cloneContents(); |
+ final fragment = range.cloneContents(); |
if (fragmentGeneratedCallback != null) { |
fragmentGeneratedCallback(fragment); |
} |
@@ -557,7 +598,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop, |
} |
// Extract idl |
- StringBuffer idl = new StringBuffer(); |
+ final idl = new StringBuffer(); |
if (prop != null && prop.length > 0) { |
// Only expect properties to have HTML. |
for(Element e in fragment.queryAll(IDL_SELECTOR)) { |
@@ -570,43 +611,41 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop, |
for (Element e in fragment.queryAll("pre")) { |
// Check if it looks like idl... |
String txt = e.text.trim(); |
- if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1 |
- && txt.indexOf(")") != -1) { |
+ if (likelyIdl.hasMatch(txt) && txt.contains("\n") && txt.contains(")")) { |
idl.add(e.outerHTML); |
e.remove(); |
} |
} |
} |
- return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString()); |
+ return new SectionParseResult(genCleanHtml(fragment), url, idl.toString()); |
} |
-Element findBest(Element root, List<Text> allText, String prop, String propType) { |
+Element findBest(Element root, List<Text> allText, String prop, |
+ String propType) { |
// Best bet: match an id |
Element cand; |
- cand = root.query("#" + prop); |
+ cand = root.query("#$prop"); |
if (cand == null && propType == "methods") { |
- cand = root.query("[id=" + prop + "\\(\\)]"); |
+ cand = root.query("[id=$prop\\(\\)]"); |
+ } |
+ while (cand != null && cand.text.trim().length == 0) { |
+ // We found the bookmark for the element but sadly it is just an empty |
+ // placeholder. Find the first real element. |
+ cand = cand.nextElementSibling; |
} |
if (cand != null) { |
- while (cand != null && cand.text.trim().length == 0) { |
- // We found the bookmark for the element but sadly it is just an empty |
- // placeholder. Find the first real element. |
- cand = cand.nextElementSibling; |
- } |
- if (cand != null) { |
- return cand; |
- } |
+ return cand; |
} |
- // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate. |
+ // If you are at least 70 pixels from the left, something is definitely |
+ // fishy and we shouldn't even consider this candidate. |
num candLeft = 70; |
for (Text text in allText) { |
Element proposed = null; |
-// var t = safeNameCleanup(text.text); |
-// TODO(jacobr): does it hurt precision to use the full cleanup? |
+ // TODO(jacobr): does it hurt precision to use the full cleanup? |
String t = fullNameCleanup(text.text); |
if (t == prop) { |
proposed = text.parent; |
@@ -636,14 +675,12 @@ bool isObsolete(Element e) { |
} |
bool isFirstCharLowerCase(String str) { |
- RegExp firstLower = new RegExp("^[a-z]"); |
- return firstLower.hasMatch(str); |
+ return const RegExp("^[a-z]").hasMatch(str); |
} |
-void scrapeSection(Element root, String sectionSelector, |
- String currentType, |
- List members, |
- String propType) { |
+// TODO(jacobr): document this method. |
+void scrapeSection(Element root, String sectionSelector, String currentType, |
+ List members, String propType) { |
Map expectedProps = dartIdl[propType]; |
Set<String> alreadyMatchedProperties = new Set<String>(); |
@@ -655,8 +692,8 @@ void scrapeSection(Element root, String sectionSelector, |
} |
for (Element matchElement in allMatches) { |
DivElement match = matchElement.parent; |
- if (!match.id.startsWith("section") && !(match.id == "pageText")) { |
- throw "Enexpected element $match"; |
+ if (!match.id.startsWith("section") && match.id != "pageText") { |
+ throw "Unexpected element $match"; |
} |
match.classes.add(DART_REMOVED); |
@@ -669,7 +706,7 @@ void scrapeSection(Element root, String sectionSelector, |
int helpIndex = -1; |
num i = 0; |
for (Element r in t.queryAll("th, td.header")) { |
- var txt = r.text.trim().split(" ")[0].toLowerCase(); |
+ final txt = r.text.trim().split(" ")[0].toLowerCase(); |
if (txt == "description") { |
helpIndex = i; |
break; |
@@ -685,14 +722,13 @@ void scrapeSection(Element root, String sectionSelector, |
// Find the row that seems to have the most names that look like |
// expected properties. |
for (Element r in t.queryAll("tbody tr")) { |
- ElementList $row = r.elements; |
- if ($row.length == 0 || $row.first.classes.contains(".header")) { |
+ ElementList row = r.elements; |
+ if (row.length == 0 || row.first.classes.contains(".header")) { |
continue; |
} |
- for (int k = 0; k < numMatches.length && k < $row.length; k++) { |
- Element e = $row[k]; |
- if (expectedProps.containsKey(fullNameCleanup(e.text))) { |
+ for (int k = 0; k < numMatches.length && k < row.length; k++) { |
+ if (expectedProps.containsKey(fullNameCleanup(row[k].text))) { |
numMatches[k]++; |
break; |
} |
@@ -711,14 +747,14 @@ void scrapeSection(Element root, String sectionSelector, |
} |
for (Element r in t.queryAll("tbody tr")) { |
- ElementList $row = r.elements; |
- if ($row.length > propNameIndex && $row.length > helpIndex ) { |
- if ($row.first.classes.contains(".header")) { |
+ ElementList row = r.elements; |
+ if (row.length > propNameIndex && row.length > helpIndex) { |
+ if (row.first.classes.contains(".header")) { |
continue; |
} |
// TODO(jacobr): this code for determining the namestr is needlessly |
// messy. |
- Element nameRow = $row[propNameIndex]; |
+ Element nameRow = row[propNameIndex]; |
AnchorElement a = nameRow.query("a"); |
String goodName = ''; |
if (a != null) { |
@@ -728,15 +764,14 @@ void scrapeSection(Element root, String sectionSelector, |
Map entry = new Map<String, String>(); |
- // "currentType": $($row[1]).text().trim(), // find("code") ? |
- entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodName); |
+ entry["name"] = fullNameCleanup(nameStr.length > 0 ? |
+ nameStr : goodName); |
final parse = filteredHtml(nameRow, nameRow, entry["name"], null); |
String altHelp = parse.html; |
- // "jsSignature": nameStr, |
- entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]); |
- // "altHelp" : altHelp, |
+ entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ? |
+ altHelp : genPrettyHtmlFromElement(row[helpIndex]); |
if (parse.url != null) { |
entry["url"] = parse.url; |
} |
@@ -777,8 +812,7 @@ void scrapeSection(Element root, String sectionSelector, |
} |
for (String prop in pmap.getKeys()) { |
- Element e = pmap[prop]; |
- e.classes.add(DART_REMOVED); |
+ pmap[prop].classes.add(DART_REMOVED); |
} |
for (String prop in pmap.getKeys()) { |
@@ -786,14 +820,15 @@ void scrapeSection(Element root, String sectionSelector, |
ClientRect r = getClientRect(e); |
// TODO(jacobr): a lot of these queries are identical. |
for (Element cand in match.queryAll(e.tagName)) { |
- if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX use a neg selector. |
+ // TODO(jacobr): use a negative selector instead. |
+ if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) { |
ClientRect candRect = getClientRect(cand); |
// TODO(jacobr): this is somewhat loose. |
if (candRect.left == r.left && |
(candRect.height - r.height).abs() < 5) { |
String propName = fullNameCleanup(cand.text); |
- if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) { |
- // Don't set here to avoid layouts... cand.classes.add(DART_REMOVED); |
+ if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName) |
+ && !alreadyMatchedProperties.contains(propName)) { |
pmap[propName] = cand; |
} |
} |
@@ -810,7 +845,7 @@ void scrapeSection(Element root, String sectionSelector, |
// DART_REMOVED so we don't include them in member descriptions... which |
// would suck. |
for (Element e in match.queryAll("[id]")) { |
- if (e.id.indexOf(matchElement.id) != -1) { |
+ if (e.id.contains(matchElement.id)) { |
e.classes.add(DART_REMOVED); |
} |
} |
@@ -828,7 +863,6 @@ void scrapeSection(Element root, String sectionSelector, |
"name" : prop, |
"help" : parse.html, |
"obsolete" : obsolete |
- //"jsSignature" : nameStr |
}; |
if (parse.idl.length > 0) { |
entry["idl"] = parse.idl; |
@@ -844,10 +878,8 @@ String trimHtml(String html) { |
} |
bool maybeName(String name) { |
- RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$"); |
- if (nameRegExp.hasMatch(name)) return true; |
- RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$"); |
- if (constRegExp.hasMatch(name)) return true; |
+ return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) || |
+ const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name); |
} |
void markRemoved(var e) { |
@@ -868,9 +900,7 @@ String JSONFIXUPHACK(String value) { |
} |
String mozToWebkit(String name) { |
- RegExp regExp = new RegExp("^moz"); |
- name = name.replaceFirst(regExp, "webkit"); |
- return name; |
+ return name.replaceFirst(const RegExp("^moz"), "webkit"); |
} |
String stripWebkit(String name) { |
@@ -950,10 +980,6 @@ String trimPrefix(String str, String prefix) { |
} |
} |
-void resourceLoaded() { |
- if (data != null) run(); |
-} |
- |
String trimStart(String str, String start) { |
if (str.startsWith(start) && str.length > start.length) { |
return str.substring(start.length); |
@@ -987,7 +1013,7 @@ void extractSection(String selector, String key) { |
} |
void run() { |
- // Inject CSS to insure lines don't wrap unless it was intentional. |
+ // Inject CSS to ensure lines don't wrap unless it was intentional. |
document.head.nodes.add(new Element.html(""" |
<style type="text/css"> |
body { |
@@ -1006,7 +1032,8 @@ void run() { |
e.remove(); |
} |
- // Flatten the list of known DOM types into a faster and case-insensitive map. |
+ // Flatten the list of known DOM types into a faster and case-insensitive |
+ // map. |
domTypes = {}; |
for (final domType in domTypesRaw) { |
domTypes[domType.toLowerCase()] = domType; |
@@ -1024,7 +1051,8 @@ void run() { |
// TODO(rnystrom): Add rel external to links we didn't fix. |
for (AnchorElement a in document.queryAll('a')) { |
// Get the raw attribute because we *don't* want the browser to fully- |
- // qualify the name for us since it has the wrong base address for the page. |
+ // qualify the name for us since it has the wrong base address for the |
+ // page. |
var href = a.attributes['href']; |
// Ignore busted links. |
@@ -1070,7 +1098,7 @@ void run() { |
a.attributes['href'] = href; |
} |
- if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) { |
+ if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) { |
bool foundMatch = false; |
// Test out if the title is really an HTML tag that matches the |
// current class name. |
@@ -1083,7 +1111,7 @@ void run() { |
} |
} catch(e) {} |
} |
- if (foundMatch == false) { |
+ if (!foundMatch) { |
dbEntry['skipped'] = true; |
dbEntry['cause'] = "Suspect title"; |
onEnd(); |
@@ -1109,31 +1137,32 @@ void run() { |
markRemoved(document.queryAll("h1, h2")); |
scrapeSection(root, "#Methods", currentType, members, 'methods'); |
- scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants'); |
+ scrapeSection(root, "#Constants, #Error_codes, #State_constants", |
+ currentType, members, 'constants'); |
// TODO(jacobr): infer tables based on multiple matches rather than |
// using a hard coded list of section ids. |
scrapeSection(root, |
- "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_properties, #Event_handlers, #Event_Handlers", |
+ "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " + |
+ "#DOM_properties, #Event_handlers, #Event_Handlers", |
currentType, members, 'properties'); |
// Avoid doing this till now to avoid messing up the section scrape. |
markRemoved(document.queryAll("h3")); |
- ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]"); |
+ ElementList examples = root.queryAll("span[id^=example], span[id^=Example]"); |
extractSection("#See_also", 'seeAlso'); |
extractSection("#Specification, #Specifications", "specification"); |
- // $("#Methods").parent().remove(); // not safe (e.g. Document) |
// TODO(jacobr): actually extract the constructor(s) |
extractSection("#Constructor, #Constructors", 'constructor'); |
extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); |
List<String> exampleHtml = []; |
- for (Element e in $examples) { |
+ for (Element e in examples) { |
e.classes.add(DART_REMOVED); |
} |
- for (Element e in $examples) { |
+ for (Element e in examples) { |
String html = filteredHtml(e, root, null, |
(DocumentFragment fragment) { |
removeHeaders(fragment); |
@@ -1189,6 +1218,6 @@ void documentLoaded(event) { |
new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) { |
data = JSON.parse(req.responseText); |
dbEntry = {'members': [], 'srcUrl': pageUrl}; |
- resourceLoaded(); |
+ run(); |
}); |
} |