Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(630)

Unified Diff: utils/apidoc/mdn/extract.dart

Issue 9315026: Cleanup mdn scripts (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: utils/apidoc/mdn/extract.dart
diff --git a/utils/apidoc/mdn/extract.dart b/utils/apidoc/mdn/extract.dart
index 17627054da495a2b004b937e84d0593364b0fe29..a037e2fa0da34e4c5b2062f47981f3f1d90184f1 100644
--- a/utils/apidoc/mdn/extract.dart
+++ b/utils/apidoc/mdn/extract.dart
@@ -14,8 +14,7 @@ Range _tempRange;
// Hacks because ASYNC measurement is annoying when just writing a script.
ClientRect getClientRect(Node n) {
if (n is Element) {
- Element e = n;
- dom.Element raw = unwrapDomObject(e.dynamic);
+ dom.Element raw = unwrapDomObject(n.dynamic);
return LevelDom.wrapClientRect(raw.getBoundingClientRect());
} else {
// Crazy hacks that works for nodes.... create a range and measure it.
@@ -28,11 +27,18 @@ ClientRect getClientRect(Node n) {
}
}
-final DART_REMOVED = "dart_removed";
+/**
+ * CSS class that is added to elements in the DOM to indicate that they should
+ * be removed when extracting blocks of documentation. This is helpful when
+ * running this script in a web browser as it is easy to visually see what
+ * blocks of information were extracted when using CSS such as DEBUG_CSS
+ * which highlights elements that should be removed.
+ */
+final DART_REMOVED = "dart-removed";
final DEBUG_CSS = """
<style type="text/css">
- .dart_removed {
+ .dart-removed {
background-color: rgba(255, 0, 0, 0.5);
}
</style>""";
@@ -281,7 +287,7 @@ String getAbsoluteUrl(AnchorElement anchor) {
}
bool inTable(Node n) {
- while(n != null) {
+ while (n != null) {
if (n is TableElement) return true;
n = n.parent;
}
@@ -295,7 +301,7 @@ String escapeHTML(str) {
}
List<Text> getAllTextNodes(Element elem) {
- List<Text> nodes = <Text>[];
+ final nodes = <Text>[];
Bob Nystrom 2012/02/01 18:41:13 :D
Jacob 2012/02/01 21:12:02 Done.
helper(Node n) {
if (n is Text) {
nodes.add(n);
@@ -323,8 +329,8 @@ bool isSkippableType(Node n) {
}
if (n is Text) return true;
- for (Node child in n.nodes) {
- if (isSkippableType(child) == false) {
+ for (final child in n.nodes) {
+ if (!isSkippableType(child)) {
return false;
}
}
@@ -353,44 +359,81 @@ class SectionParseResult {
}
String genCleanHtml(Element root) {
- for (Element e in root.queryAll(".$DART_REMOVED")) {
+ for (final e in root.queryAll(".$DART_REMOVED")) {
e.classes.remove(DART_REMOVED);
}
// Ditch inline styles.
- for (Element e in root.queryAll('[style]')) {
+ for (final e in root.queryAll('[style]')) {
e.attributes.remove('style');
}
// These elements are just tags that we should suppress.
- for (Element e in root.queryAll(".lang.lang-en")) {
+ for (final e in root.queryAll(".lang.lang-en")) {
e.remove();
}
+ Element parametersList;
+ Element returnValue;
+ for (final e in root.queryAll("h6")) {
+ if (e.text == 'Parameters') {
+ parametersList = e;
+ } else if (e.text == 'Return value') {
+ returnValue = e;
+ }
+ }
+
+ if (parametersList != null) {
+ int numEmptyParameters = 0;
+ final parameterDescriptions = root.queryAll("dd");
+ for (Element parameterDescription in parameterDescriptions) {
+ if (parameterDescription.text.trim().length == 0) {
+ numEmptyParameters++;
+ }
+ }
+ if (numEmptyParameters > 0 &&
+ numEmptyParameters == parameterDescriptions.length) {
+ // Remove the parameter list as it adds zero value as all descriptions
+ // are empty.
+ parametersList.remove();
+ for (final e in root.queryAll("dl")) {
+ e.remove();
+ }
+ } else if (parameterDescriptions.length == 0 &&
+ parametersList.nextElementSibling != null &&
+ parametersList.nextElementSibling.text.trim() == 'None.') {
+ // No need to display that the function takes 0 parameters.
+ parametersList.nextElementSibling.remove();
+ parametersList.remove();
+ }
+ }
+
+ // Heuristic: if the return value is a single word it is a type name not a
+ // useful text description so suppress it.
Bob Nystrom 2012/02/01 18:41:13 Comments like this are very helpful.
Jacob 2012/02/01 21:12:02 Done.
+ if (returnValue != null &&
+ returnValue.nextElementSibling != null &&
+ returnValue.nextElementSibling.text.trim().split(' ').length <= 1) {
+ returnValue.nextElementSibling.remove();
+ returnValue.remove();
+ }
+
bool changed = true;
while (changed) {
changed = false;
- while (root.nodes.length == 1) {
- Node child = root.nodes.first;
- if (child is Element) {
- root = child;
- changed = true;
- } else {
- // Just calling innerHTML on the parent will be sufficient...
- // and insures the output is properly escaped.
- break;
- }
+ while (root.nodes.length == 1 && root.nodes.first is Element) {
+ root = root.nodes.first;
+ changed = true;
}
// Trim useless nodes from the front.
- while(root.nodes.length > 0 &&
+ while (root.nodes.length > 0 &&
isSkippable(root.nodes.first)) {
root.nodes.first.remove();
changed = true;
}
// Trim useless nodes from the back.
- while(root.nodes.length > 0 &&
+ while (root.nodes.length > 0 &&
isSkippable(root.nodes.last())) {
root.nodes.last().remove();
changed = true;
@@ -399,10 +442,6 @@ String genCleanHtml(Element root) {
return JSONFIXUPHACK(root.innerHTML);
}
-String genPrettyHtml(DocumentFragment fragment) {
- return genCleanHtml(fragment);
-}
-
String genPrettyHtmlFromElement(Element e) {
e = e.clone(true);
return genCleanHtml(e);
@@ -420,7 +459,7 @@ class PostOrderTraversalIterator implements Iterator<Node> {
Node next() {
if (_next == null) return null;
- Node ret = _next;
+ final ret = _next;
if (_next.nextNode != null) {
_next = _leftMostDescendent(_next.nextNode);
} else {
@@ -445,11 +484,11 @@ class PostOrderTraversal implements Iterable<Node> {
}
Range findFirstLine(Range section, String prop) {
- Range firstLine = newRange();
+ final firstLine = newRange();
firstLine.setStart(section.startContainer, section.startOffset);
num maxBottom = null;
- for (Node n in new PostOrderTraversal(section.startContainer)) {
+ for (final n in new PostOrderTraversal(section.startContainer)) {
int compareResult = section.comparePoint(n, 0);
if (compareResult == -1) {
// before range so skip.
@@ -462,9 +501,8 @@ Range findFirstLine(Range section, String prop) {
final rect = getClientRect(n);
num bottom = rect.bottom;
if (rect.height > 0 && rect.width > 0) {
- if (maxBottom != null && (
- maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom
- )) {
+ if (maxBottom != null &&
+ maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom) {
break;
} else if (maxBottom == null || maxBottom > bottom) {
maxBottom = bottom;
@@ -474,7 +512,10 @@ Range findFirstLine(Range section, String prop) {
firstLine.setEndAfter(n);
}
- if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) {
+ // If the first line of text in the section does not contain the property
+ // name then we're not confident we are able to extract a high accuracy match
+ // so we should not return anything.
+ if (!firstLine.toString().contains(stripWebkit(prop))) {
return null;
}
return firstLine;
@@ -482,7 +523,7 @@ Range findFirstLine(Range section, String prop) {
AnchorElement findAnchorElement(Element root, String prop) {
for (AnchorElement a in root.queryAll("a")) {
- if (a.text.indexOf(prop) != -1) {
+ if (a.text.contains(prop)) {
return a;
}
}
@@ -490,9 +531,9 @@ AnchorElement findAnchorElement(Element root, String prop) {
}
// First surrounding element with an ID is safe enough.
-Element findTigherRoot(Element elem, Element root) {
+Element findTighterRoot(Element elem, Element root) {
Element candidate = elem;
- while(root != candidate) {
+ while (root != candidate) {
candidate = candidate.parent;
if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) {
break;
@@ -501,22 +542,22 @@ Element findTigherRoot(Element elem, Element root) {
return candidate;
}
-// this is very slow and ugly.. consider rewriting.
+// TODO(jacobr): this is very slow and ugly.. consider rewriting or at least
+// commenting carefully.
SectionParseResult filteredHtml(Element elem, Element root, String prop,
Function fragmentGeneratedCallback) {
// Using a tighter root avoids false positives at the risk of trimming
// text we shouldn't.
- root = findTigherRoot(elem, root);
- Range range = newRange();
+ root = findTighterRoot(elem, root);
+ final range = newRange();
range.setStartBefore(elem);
Element current = elem;
while (current != null) {
range.setEndBefore(current);
- if (current.classes.contains(DART_REMOVED)) {
- if (range.toString().trim().length > 0) {
- break;
- }
+ if (current.classes.contains(DART_REMOVED) &&
+ range.toString().trim().length > 0) {
+ break;
}
if (current.firstElementChild != null) {
current = current.firstElementChild;
@@ -547,7 +588,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
}
}
}
- DocumentFragment fragment = range.cloneContents();
+ final fragment = range.cloneContents();
if (fragmentGeneratedCallback != null) {
fragmentGeneratedCallback(fragment);
}
@@ -557,7 +598,7 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
}
// Extract idl
- StringBuffer idl = new StringBuffer();
+ final idl = new StringBuffer();
if (prop != null && prop.length > 0) {
// Only expect properties to have HTML.
for(Element e in fragment.queryAll(IDL_SELECTOR)) {
@@ -570,43 +611,42 @@ SectionParseResult filteredHtml(Element elem, Element root, String prop,
for (Element e in fragment.queryAll("pre")) {
// Check if it looks like idl...
String txt = e.text.trim();
- if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1
- && txt.indexOf(")") != -1) {
+ if (likelyIdl.hasMatch(txt) && txt.contains("\n")
+ && txt.contains(")")) {
Bob Nystrom 2012/02/01 18:41:13 This will probably fit on one line now.
Jacob 2012/02/01 21:12:02 Done.
idl.add(e.outerHTML);
e.remove();
}
}
}
- return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString());
+ return new SectionParseResult(genCleanHtml(fragment), url, idl.toString());
}
-Element findBest(Element root, List<Text> allText, String prop, String propType) {
+Element findBest(Element root, List<Text> allText, String prop,
+ String propType) {
// Best bet: match an id
Element cand;
- cand = root.query("#" + prop);
+ cand = root.query("#$prop");
if (cand == null && propType == "methods") {
- cand = root.query("[id=" + prop + "\\(\\)]");
+ cand = root.query("[id=$prop\\(\\)]");
+ }
+ while (cand != null && cand.text.trim().length == 0) {
+ // We found the bookmark for the element but sadly it is just an empty
+ // placeholder. Find the first real element.
+ cand = cand.nextElementSibling;
}
if (cand != null) {
- while (cand != null && cand.text.trim().length == 0) {
- // We found the bookmark for the element but sadly it is just an empty
- // placeholder. Find the first real element.
- cand = cand.nextElementSibling;
- }
- if (cand != null) {
- return cand;
- }
+ return cand;
}
- // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate.
+ // If you are at least 70 pixels from the left, something is definitely
+ // fishy and we shouldn't even consider this candidate.
num candLeft = 70;
for (Text text in allText) {
Element proposed = null;
-// var t = safeNameCleanup(text.text);
-// TODO(jacobr): does it hurt precision to use the full cleanup?
+ // TODO(jacobr): does it hurt precision to use the full cleanup?
String t = fullNameCleanup(text.text);
if (t == prop) {
proposed = text.parent;
@@ -636,14 +676,12 @@ bool isObsolete(Element e) {
}
bool isFirstCharLowerCase(String str) {
- RegExp firstLower = new RegExp("^[a-z]");
- return firstLower.hasMatch(str);
+ return const RegExp("^[a-z]").hasMatch(str);
}
-void scrapeSection(Element root, String sectionSelector,
- String currentType,
- List members,
- String propType) {
+// TODO(jacobr): document this method.
+void scrapeSection(Element root, String sectionSelector, String currentType,
+ List members, String propType) {
Map expectedProps = dartIdl[propType];
Set<String> alreadyMatchedProperties = new Set<String>();
@@ -655,8 +693,8 @@ void scrapeSection(Element root, String sectionSelector,
}
for (Element matchElement in allMatches) {
DivElement match = matchElement.parent;
- if (!match.id.startsWith("section") && !(match.id == "pageText")) {
- throw "Enexpected element $match";
+ if (!match.id.startsWith("section") && match.id != "pageText") {
+ throw "Unexpected element $match";
}
match.classes.add(DART_REMOVED);
@@ -669,7 +707,7 @@ void scrapeSection(Element root, String sectionSelector,
int helpIndex = -1;
num i = 0;
for (Element r in t.queryAll("th, td.header")) {
- var txt = r.text.trim().split(" ")[0].toLowerCase();
+ final txt = r.text.trim().split(" ")[0].toLowerCase();
if (txt == "description") {
helpIndex = i;
break;
@@ -685,14 +723,13 @@ void scrapeSection(Element root, String sectionSelector,
// Find the row that seems to have the most names that look like
// expected properties.
for (Element r in t.queryAll("tbody tr")) {
- ElementList $row = r.elements;
- if ($row.length == 0 || $row.first.classes.contains(".header")) {
+ ElementList row = r.elements;
+ if (row.length == 0 || row.first.classes.contains(".header")) {
continue;
}
- for (int k = 0; k < numMatches.length && k < $row.length; k++) {
- Element e = $row[k];
- if (expectedProps.containsKey(fullNameCleanup(e.text))) {
+ for (int k = 0; k < numMatches.length && k < row.length; k++) {
+ if (expectedProps.containsKey(fullNameCleanup(row[k].text))) {
numMatches[k]++;
break;
}
@@ -711,14 +748,14 @@ void scrapeSection(Element root, String sectionSelector,
}
for (Element r in t.queryAll("tbody tr")) {
- ElementList $row = r.elements;
- if ($row.length > propNameIndex && $row.length > helpIndex ) {
- if ($row.first.classes.contains(".header")) {
+ ElementList row = r.elements;
+ if (row.length > propNameIndex && row.length > helpIndex) {
+ if (row.first.classes.contains(".header")) {
continue;
}
// TODO(jacobr): this code for determining the namestr is needlessly
// messy.
- Element nameRow = $row[propNameIndex];
+ Element nameRow = row[propNameIndex];
AnchorElement a = nameRow.query("a");
String goodName = '';
if (a != null) {
@@ -728,15 +765,14 @@ void scrapeSection(Element root, String sectionSelector,
Map entry = new Map<String, String>();
- // "currentType": $($row[1]).text().trim(), // find("code") ?
- entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodName);
+ entry["name"] = fullNameCleanup(nameStr.length > 0 ?
+ nameStr : goodName);
final parse = filteredHtml(nameRow, nameRow, entry["name"], null);
String altHelp = parse.html;
- // "jsSignature": nameStr,
- entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]);
- // "altHelp" : altHelp,
+ entry["help"] = (helpIndex == -1 || row[helpIndex] == null) ?
+ altHelp : genPrettyHtmlFromElement(row[helpIndex]);
if (parse.url != null) {
entry["url"] = parse.url;
}
@@ -777,8 +813,7 @@ void scrapeSection(Element root, String sectionSelector,
}
for (String prop in pmap.getKeys()) {
- Element e = pmap[prop];
- e.classes.add(DART_REMOVED);
+ pmap[prop].classes.add(DART_REMOVED);
}
for (String prop in pmap.getKeys()) {
@@ -786,14 +821,15 @@ void scrapeSection(Element root, String sectionSelector,
ClientRect r = getClientRect(e);
// TODO(jacobr): a lot of these queries are identical.
for (Element cand in match.queryAll(e.tagName)) {
- if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX use a neg selector.
+ // TODO(jacobr): use a negative selector instead.
+ if (!cand.classes.contains(DART_REMOVED) && !inTable(cand)) {
ClientRect candRect = getClientRect(cand);
// TODO(jacobr): this is somewhat loose.
if (candRect.left == r.left &&
(candRect.height - r.height).abs() < 5) {
String propName = fullNameCleanup(cand.text);
- if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) {
- // Don't set here to avoid layouts... cand.classes.add(DART_REMOVED);
+ if (isFirstCharLowerCase(propName) && !pmap.containsKey(propName)
+ && !alreadyMatchedProperties.contains(propName)) {
pmap[propName] = cand;
}
}
@@ -810,7 +846,7 @@ void scrapeSection(Element root, String sectionSelector,
// DART_REMOVED so we don't include them in member descriptions... which
// would suck.
for (Element e in match.queryAll("[id]")) {
- if (e.id.indexOf(matchElement.id) != -1) {
+ if (e.id.contains(matchElement.id)) {
e.classes.add(DART_REMOVED);
}
}
@@ -828,7 +864,6 @@ void scrapeSection(Element root, String sectionSelector,
"name" : prop,
"help" : parse.html,
"obsolete" : obsolete
- //"jsSignature" : nameStr
};
if (parse.idl.length > 0) {
entry["idl"] = parse.idl;
@@ -844,10 +879,8 @@ String trimHtml(String html) {
}
bool maybeName(String name) {
Bob Nystrom 2012/02/01 18:41:13 Doesn't seem to be used?
Jacob 2012/02/01 21:12:02 It is used. You must be just searching this diff
- RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$");
- if (nameRegExp.hasMatch(name)) return true;
- RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$");
- if (constRegExp.hasMatch(name)) return true;
+ return const RegExp("^[a-z][a-z0-9A-Z]+\$").hasMatch(name) ||
+ const RegExp("^[A-Z][A-Z_]*\$").hasMatch(name);
}
void markRemoved(var e) {
@@ -868,9 +901,7 @@ String JSONFIXUPHACK(String value) {
}
String mozToWebkit(String name) {
- RegExp regExp = new RegExp("^moz");
- name = name.replaceFirst(regExp, "webkit");
- return name;
+ return name.replaceFirst(const RegExp("^moz"), "webkit");
}
String stripWebkit(String name) {
@@ -950,10 +981,6 @@ String trimPrefix(String str, String prefix) {
}
}
-void resourceLoaded() {
- if (data != null) run();
-}
-
String trimStart(String str, String start) {
if (str.startsWith(start) && str.length > start.length) {
return str.substring(start.length);
@@ -987,7 +1014,7 @@ void extractSection(String selector, String key) {
}
void run() {
- // Inject CSS to insure lines don't wrap unless it was intentional.
+ // Inject CSS to ensure lines don't wrap unless it was intentional.
document.head.nodes.add(new Element.html("""
<style type="text/css">
body {
@@ -1006,7 +1033,8 @@ void run() {
e.remove();
}
- // Flatten the list of known DOM types into a faster and case-insensitive map.
+ // Flatten the list of known DOM types into a faster and case-insensitive
+ // map.
domTypes = {};
for (final domType in domTypesRaw) {
domTypes[domType.toLowerCase()] = domType;
@@ -1024,7 +1052,8 @@ void run() {
// TODO(rnystrom): Add rel external to links we didn't fix.
for (AnchorElement a in document.queryAll('a')) {
// Get the raw attribute because we *don't* want the browser to fully-
- // qualify the name for us since it has the wrong base address for the page.
+ // qualify the name for us since it has the wrong base address for the
+ // page.
var href = a.attributes['href'];
// Ignore busted links.
@@ -1070,7 +1099,7 @@ void run() {
a.attributes['href'] = href;
}
- if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) {
+ if (!title.toLowerCase().contains(currentTypeTiny.toLowerCase())) {
bool foundMatch = false;
// Test out if the title is really an HTML tag that matches the
// current class name.
@@ -1083,7 +1112,7 @@ void run() {
}
} catch(e) {}
}
- if (foundMatch == false) {
+ if (!foundMatch ){
Bob Nystrom 2012/02/01 18:41:13 Remove space before ")".
Jacob 2012/02/01 21:12:02 rem oved space )
dbEntry['skipped'] = true;
dbEntry['cause'] = "Suspect title";
onEnd();
@@ -1109,31 +1138,32 @@ void run() {
markRemoved(document.queryAll("h1, h2"));
scrapeSection(root, "#Methods", currentType, members, 'methods');
- scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants');
+ scrapeSection(root, "#Constants, #Error_codes, #State_constants",
+ currentType, members, 'constants');
// TODO(jacobr): infer tables based on multiple matches rather than
// using a hard coded list of section ids.
scrapeSection(root,
- "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_properties, #Event_handlers, #Event_Handlers",
+ "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, " +
+ "#DOM_properties, #Event_handlers, #Event_Handlers",
currentType, members, 'properties');
// Avoid doing this till now to avoid messing up the section scrape.
markRemoved(document.queryAll("h3"));
- ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]");
+ ElementList examples = root.queryAll("span[id^=example], span[id^=Example]");
extractSection("#See_also", 'seeAlso');
extractSection("#Specification, #Specifications", "specification");
- // $("#Methods").parent().remove(); // not safe (e.g. Document)
// TODO(jacobr): actually extract the constructor(s)
extractSection("#Constructor, #Constructors", 'constructor');
extractSection("#Browser_compatibility, #Compatibility", 'compatibility');
List<String> exampleHtml = [];
- for (Element e in $examples) {
+ for (Element e in examples) {
e.classes.add(DART_REMOVED);
}
- for (Element e in $examples) {
+ for (Element e in examples) {
String html = filteredHtml(e, root, null,
(DocumentFragment fragment) {
removeHeaders(fragment);
@@ -1189,6 +1219,6 @@ void documentLoaded(event) {
new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) {
data = JSON.parse(req.responseText);
dbEntry = {'members': [], 'srcUrl': pageUrl};
- resourceLoaded();
+ run();
});
}

Powered by Google App Engine
This is Rietveld 408576698