Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1099)

Unified Diff: utils/apidoc/mdn/extractRunner.js

Issue 9225039: Integrate MDN content into API documentation. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Remove temp code. Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: utils/apidoc/mdn/extractRunner.js
diff --git a/utils/apidoc/mdn/extractRunner.js b/utils/apidoc/mdn/extractRunner.js
new file mode 100644
index 0000000000000000000000000000000000000000..1004fdec440c68419557153ab36f8a02441098d0
--- /dev/null
+++ b/utils/apidoc/mdn/extractRunner.js
@@ -0,0 +1,179 @@
+var fs = require('fs');
nweiz 2012/02/01 00:10:39 It's not clear why this is in JS either. The reaso
Jacob 2012/02/01 07:48:26 Same reason as other js script. This should be re
+var util = require('util');
+var exec = require('child_process').exec;
+var path = require('path');
+
+var db = {};
+var metadata = {};
+var USE_VM = false;
+
+// Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart
+// TODO(jacobr): figure out a cleaner way to parse this data.
+var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER";
+var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER";
+
+var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString());
+var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').toString());
+var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString());
nweiz 2012/02/01 00:10:39 Line lengths.
Jacob 2012/02/01 07:48:26 Done.
+
+try {
+ fs.mkdirSync('output/extract');
+} catch (e) {
+ // It doesn't matter if the directories already exist.
+}
+
+var errorFiles = [];
+// TODO(jacobr): blacklist these types as we can't get good docs for them.
+// ["Performance"]
+
+function parseFile(type, onDone, entry, file, searchResultIndex) {
nweiz 2012/02/01 00:10:39 Why is this a separate function? It's only called
Jacob 2012/02/01 07:48:26 Seems like a reasonable function name to me.
nweiz 2012/02/01 21:23:02 The name is reasonable, but "// parse the HTML fil
Jacob 2012/02/02 05:26:38 here's a different take on why this is a good func
nweiz 2012/02/02 19:54:34 This function also includes reading the input file
+ var inputFile;
+ try {
+ inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString();
+ } catch (e) {
+ console.warn("Couldn't read: " + file);
+ onDone();
+ return;
+ }
+
+ var inputFileRaw = inputFile;
+ // Cached pages have multiple DOCTYPE tags. Strip off the first one so that
+ // we have valid HTML.
+ if (inputFile.indexOf("<!DOCTYPE") == 0) {
+ inputFile = inputFile.substr(1);
nweiz 2012/02/01 00:10:39 It would be much clearer to just do "var matchInde
Jacob 2012/02/01 07:48:26 Can't do that now that a toLowerCase is also neede
+ var matchIndex = inputFile.indexOf("<!DOCTYPE");
+ if (matchIndex == -1) {
+ // not a cached page.
+ inputFile = inputFileRaw;
+ } else {
+ inputFile = inputFile.substr(matchIndex);
+ }
+ }
+
+ // Disable all existing javascript in the input file to speedup parsing and
nweiz 2012/02/01 00:10:39 Grammar nit: "speed up"
Jacob 2012/02/01 07:48:26 speedup seems like it is valid. http://en.wikipedi
nweiz 2012/02/01 21:23:02 "Speedup" is a noun; "speed up" is the verb form.
Jacob 2012/02/02 05:26:38 Done.
+ // avoid conflicts between our JS and the JS in the file.
+ inputFile = inputFile.replace(/<script type="text\/javascript"/g,
+ '<script type="text/ignored"');
+
+ var endBodyIndex = inputFile.lastIndexOf("</body>");
+ if (endBodyIndex == -1) {
+ // Some files are missing a closing body tag.
+ endBodyIndex = inputFile.lastIndexOf("</html>");
+ }
+ if (endBodyIndex == -1) {
+ if (inputFile.indexOf("Error 404 (Not Found)") != -1) {
+ console.warn("Skipping 404 file");
nweiz 2012/02/01 00:10:39 List the filename.
Jacob 2012/02/01 07:48:26 Done.
+ onDone();
+ return;
+ }
+ throw "Unexpected file format for " + file;
nweiz 2012/02/01 00:10:39 Why are we throwing here instead of warning?
Jacob 2012/02/01 07:48:26 because that indicates a serious bug
+ }
+
+ // Remove all easy to remove script tags to speed page load.
nweiz 2012/02/01 00:10:39 That's not what this code is doing.
Jacob 2012/02/01 07:48:26 yep. i used to and then stopped. removed comment.
+ inputFile = inputFile.substring(0, endBodyIndex) +
+ '<script type="text/javascript">\n' +
+ ' if (window.layoutTestController) {\n' +
nweiz 2012/02/01 00:10:39 Why are we feature-detecting here? Are we planning
Jacob 2012/02/01 07:48:26 added comment explaining this. // We feature
+ ' var controller = window.layoutTestController;\n' +
+ ' controller.dumpAsText();\n' +
+ ' controller.waitUntilDone();\n' +
+ ' }\n' +
+ 'window.addEventListener("message", receiveMessage, false);\n' +
+ 'function receiveMessage(event) {\n' +
+ ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' +
+ ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' +
+ ' if (window.layoutTestController) {\n' +
+ ' document.documentElement.textContent = "";\n' +
+ ' window.layoutTestController.notifyDone();\n' +
+ ' }\n' +
+ '}\n' +
+ '</script>\n' +
+ (USE_VM ?
+ '<script type="application/dart" src="../../extract.dart"></script>' :
+ '<script type="text/javascript" src="../../output/extract.dart.js"></script>') +
nweiz 2012/02/01 00:10:39 Line length
Jacob 2012/02/01 07:48:26 Done.
+ '\n' + inputFile.substring(endBodyIndex);
+
+ console.log("Processing: " + file);
+ var dumpFileName = "output/extract/" + file;
nweiz 2012/02/01 00:10:39 Style nit: unnecessary variable.
Jacob 2012/02/01 07:48:26 Done.
+ var absoluteDumpFileName = path.resolve(dumpFileName);
+ fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8');
+ var parseArgs = {
+ type: type,
+ searchResult: entry,
+ dartIdl: dartIdl[type]
+ };
+ fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs),
+ 'utf8');
+
+ var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' +
+ 'DumpRenderTree ' + absoluteDumpFileName;
nweiz 2012/02/01 00:10:39 TODO: Make this run on platforms other than OS X.
Jacob 2012/02/01 07:48:26 Done.
+ console.log(cmd);
+ var child = exec(cmd,
nweiz 2012/02/01 00:10:39 Unused variable.
Jacob 2012/02/01 07:48:26 Done.
+ function (error, stdout, stderr) {
+ var msgIndex = stdout.indexOf(START_DART_MESSAGE);
+ var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length);
+ var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE));
nweiz 2012/02/01 00:10:39 Shouldn't have "var". Actually, this stuff should
Jacob 2012/02/01 07:48:26 Done.
+ console.log('all: ' + stdout);
+ console.log('stderr: ' + stderr);
+ if (error !== null) {
+ console.log('exec error: ' + error);
+ }
+
+ if (!(type in db)) {
+ db[type] = [];
+ }
+ try {
+ db[type][searchResultIndex] = JSON.parse(msg);
+ } catch(e) {
+ console.warn("error parsing result for " + type + " file= "+ file);
+ errorFiles.push(file);
+ fs.writeFileSync("output/errors.json",
+ JSON.stringify(errorFiles, null, ' '), 'utf8');
nweiz 2012/02/01 00:10:39 Why is this written again for every error?
Jacob 2012/02/01 07:48:26 So that if you press control-c you always have an
nweiz 2012/02/01 21:23:02 I see. Useful inline comment?
Jacob 2012/02/02 05:26:38 Done.
+ }
+ onDone();
+ });
+}
+var tasks = [];
+
+var numProcesses = 8;
+// Have numProcesses extraction tasks running simultaneously to improve
+// performance. If your machine is slow, you may need to dial back the
+// parallelism.
nweiz 2012/02/01 00:10:39 This comment should probably be attached to numPro
Jacob 2012/02/01 07:48:26 Done. Also moved to the top of the file.
+var numPending = numProcesses;
+
+function processNextTask() {
nweiz 2012/02/01 00:10:39 If you're trying to do stuff in parallel, this wou
Jacob 2012/02/01 07:48:26 I strongly disagree because the bottleneck is in t
+ numPending--;
+ if (tasks.length > 0) {
+ numPending++;
+ var task = tasks.pop();
+ task();
+ } else {
+ if (numPending <= 0) {
+ console.log("Successfully completed all tasks");
+ fs.writeFileSync("output/database.json",
+ JSON.stringify(db, null, ' '), 'utf8');
+ }
+ }
+}
+
+function createTask(type, entry, index) {
nweiz 2012/02/01 00:10:39 This also doesn't seem worth a function.
Jacob 2012/02/01 07:48:26 I disagree
nweiz 2012/02/01 21:23:02 Why? I don't think it adds any clarity over just p
Jacob 2012/02/02 05:26:38 Keep in mind this is JavaScript not Dart so you'd
nweiz 2012/02/02 19:54:34 Good point, I had forgotten about Javascript's cra
+ return function () {
+ var file = type + index + '.html';
+ parseFile(type, processNextTask, entry, file, index);
+ };
+}
+
+for (var i = 0; i < domTypes.length; i++) {
+ var type = domTypes[i];
+ var entries = cacheData[type];
+ if (entries != null) {
nweiz 2012/02/01 00:10:39 Style nit: if (!entries)
Jacob 2012/02/01 07:48:26 I disagree. I prefer != null as it more clearly sp
nweiz 2012/02/01 21:23:02 I'm not sure I like the idea of writing Javascript
Jacob 2012/02/02 05:26:38 writing JavaScript as if it were dart is the right
+ for (var j = 0; j < entries.length; j++) {
+ tasks.push(createTask(type, entries[j], j));
+ }
+ } else {
+ console.warn("No crawled files for " + type);
+ }
+}
+
+for (var p = 0; p < numProcesses; p++) {
+ processNextTask();
+}

Powered by Google App Engine
This is Rietveld 408576698