Index: utils/apidoc/mdn/extractRunner.js |
diff --git a/utils/apidoc/mdn/extractRunner.js b/utils/apidoc/mdn/extractRunner.js |
new file mode 100644 |
index 0000000000000000000000000000000000000000..1004fdec440c68419557153ab36f8a02441098d0 |
--- /dev/null |
+++ b/utils/apidoc/mdn/extractRunner.js |
@@ -0,0 +1,179 @@ |
+var fs = require('fs'); |
+var util = require('util'); |
+var exec = require('child_process').exec; |
+var path = require('path'); |
+ |
+var db = {}; |
+var metadata = {}; |
+var USE_VM = false; |
+ |
+// Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart |
+// TODO(jacobr): figure out a cleaner way to parse this data. |
+var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
+var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
+ |
+var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString()); |
+var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').toString()); |
+var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString()); |
+ |
+try { |
+ fs.mkdirSync('output/extract'); |
+} catch (e) { |
+ // It doesn't matter if the directories already exist. |
+} |
+ |
+var errorFiles = []; |
+// TODO(jacobr): blacklist these types as we can't get good docs for them. |
+// ["Performance"] |
+ |
+function parseFile(type, onDone, entry, file, searchResultIndex) { |
+ var inputFile; |
+ try { |
+ inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); |
+ } catch (e) { |
+ console.warn("Couldn't read: " + file); |
+ onDone(); |
+ return; |
+ } |
+ |
+ var inputFileRaw = inputFile; |
+ // Cached pages have multiple DOCTYPE tags. Strip off the first one so that |
+ // we have valid HTML. |
+ if (inputFile.indexOf("<!DOCTYPE") == 0) { |
+ inputFile = inputFile.substr(1); |
+ var matchIndex = inputFile.indexOf("<!DOCTYPE"); |
+ if (matchIndex == -1) { |
+ // not a cached page. |
+ inputFile = inputFileRaw; |
+ } else { |
+ inputFile = inputFile.substr(matchIndex); |
+ } |
+ } |
+ |
+ // Disable all existing javascript in the input file to speedup parsing and |
+ // avoid conflicts between our JS and the JS in the file. |
+ inputFile = inputFile.replace(/<script type="text\/javascript"/g, |
+ '<script type="text/ignored"'); |
+ |
+ var endBodyIndex = inputFile.lastIndexOf("</body>"); |
+ if (endBodyIndex == -1) { |
+ // Some files are missing a closing body tag. |
+ endBodyIndex = inputFile.lastIndexOf("</html>"); |
+ } |
+ if (endBodyIndex == -1) { |
+ if (inputFile.indexOf("Error 404 (Not Found)") != -1) { |
+ console.warn("Skipping 404 file"); |
+ onDone(); |
+ return; |
+ } |
+ throw "Unexpected file format for " + file; |
+ } |
+ |
+ // Remove all easy to remove script tags to speed page load. |
+ inputFile = inputFile.substring(0, endBodyIndex) + |
+ '<script type="text/javascript">\n' + |
+ ' if (window.layoutTestController) {\n' + |
+ ' var controller = window.layoutTestController;\n' + |
+ ' controller.dumpAsText();\n' + |
+ ' controller.waitUntilDone();\n' + |
+ ' }\n' + |
+ 'window.addEventListener("message", receiveMessage, false);\n' + |
+ 'function receiveMessage(event) {\n' + |
+ ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + |
+ ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + |
+ ' if (window.layoutTestController) {\n' + |
+ ' document.documentElement.textContent = "";\n' + |
+ ' window.layoutTestController.notifyDone();\n' + |
+ ' }\n' + |
+ '}\n' + |
+ '</script>\n' + |
+ (USE_VM ? |
+ '<script type="application/dart" src="../../extract.dart"></script>' : |
+ '<script type="text/javascript" src="../../output/extract.dart.js"></script>') + |
+ '\n' + inputFile.substring(endBodyIndex); |
+ |
+ console.log("Processing: " + file); |
+ var dumpFileName = "output/extract/" + file; |
+ var absoluteDumpFileName = path.resolve(dumpFileName); |
+ fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); |
+ var parseArgs = { |
+ type: type, |
+ searchResult: entry, |
+ dartIdl: dartIdl[type] |
+ }; |
+ fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), |
+ 'utf8'); |
+ |
+ var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + |
+ 'DumpRenderTree ' + absoluteDumpFileName; |
+ console.log(cmd); |
+ var child = exec(cmd, |
+ function (error, stdout, stderr) { |
+ var msgIndex = stdout.indexOf(START_DART_MESSAGE); |
+ var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); |
+ var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); |
+ console.log('all: ' + stdout); |
+ console.log('stderr: ' + stderr); |
+ if (error !== null) { |
+ console.log('exec error: ' + error); |
+ } |
+ |
+ if (!(type in db)) { |
+ db[type] = []; |
+ } |
+ try { |
+ db[type][searchResultIndex] = JSON.parse(msg); |
+ } catch(e) { |
+ console.warn("error parsing result for " + type + " file= "+ file); |
+ errorFiles.push(file); |
+ fs.writeFileSync("output/errors.json", |
+ JSON.stringify(errorFiles, null, ' '), 'utf8'); |
+ } |
+ onDone(); |
+ }); |
+} |
+var tasks = []; |
+ |
+var numProcesses = 8; |
+// Have numProcesses extraction tasks running simultaneously to improve |
+// performance. If your machine is slow, you may need to dial back the |
+// parallelism. |
+var numPending = numProcesses; |
+ |
+function processNextTask() { |
+ numPending--; |
+ if (tasks.length > 0) { |
+ numPending++; |
+ var task = tasks.pop(); |
+ task(); |
+ } else { |
+ if (numPending <= 0) { |
+ console.log("Successfully completed all tasks"); |
+ fs.writeFileSync("output/database.json", |
+ JSON.stringify(db, null, ' '), 'utf8'); |
+ } |
+ } |
+} |
+ |
+function createTask(type, entry, index) { |
+ return function () { |
+ var file = type + index + '.html'; |
+ parseFile(type, processNextTask, entry, file, index); |
+ }; |
+} |
+ |
+for (var i = 0; i < domTypes.length; i++) { |
+ var type = domTypes[i]; |
+ var entries = cacheData[type]; |
+ if (entries != null) { |
+ for (var j = 0; j < entries.length; j++) { |
+ tasks.push(createTask(type, entries[j], j)); |
+ } |
+ } else { |
+ console.warn("No crawled files for " + type); |
+ } |
+} |
+ |
+for (var p = 0; p < numProcesses; p++) { |
+ processNextTask(); |
+} |