OLD | NEW |
(Empty) | |
| 1 var fs = require('fs'); |
| 2 var util = require('util'); |
| 3 var exec = require('child_process').exec; |
| 4 var path = require('path'); |
| 5 |
| 6 var db = {}; |
| 7 var metadata = {}; |
| 8 var USE_VM = false; |
| 9 |
| 10 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart |
| 11 // TODO(jacobr): figure out a cleaner way to parse this data. |
| 12 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
| 13 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
| 14 |
| 15 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString
()); |
| 16 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').to
String()); |
| 17 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString()
); |
| 18 |
| 19 try { |
| 20 fs.mkdirSync('output/extract'); |
| 21 } catch (e) { |
| 22 // It doesn't matter if the directories already exist. |
| 23 } |
| 24 |
| 25 var errorFiles = []; |
| 26 // TODO(jacobr): blacklist these types as we can't get good docs for them. |
| 27 // ["Performance"] |
| 28 |
| 29 function parseFile(type, onDone, entry, file, searchResultIndex) { |
| 30 var inputFile; |
| 31 try { |
| 32 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); |
| 33 } catch (e) { |
| 34 console.warn("Couldn't read: " + file); |
| 35 onDone(); |
| 36 return; |
| 37 } |
| 38 |
| 39 var inputFileRaw = inputFile; |
| 40 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that |
| 41 // we have valid HTML. |
| 42 if (inputFile.indexOf("<!DOCTYPE") == 0) { |
| 43 inputFile = inputFile.substr(1); |
| 44 var matchIndex = inputFile.indexOf("<!DOCTYPE"); |
| 45 if (matchIndex == -1) { |
| 46 // not a cached page. |
| 47 inputFile = inputFileRaw; |
| 48 } else { |
| 49 inputFile = inputFile.substr(matchIndex); |
| 50 } |
| 51 } |
| 52 |
| 53 // Disable all existing javascript in the input file to speedup parsing and |
| 54 // avoid conflicts between our JS and the JS in the file. |
| 55 inputFile = inputFile.replace(/<script type="text\/javascript"/g, |
| 56 '<script type="text/ignored"'); |
| 57 |
| 58 var endBodyIndex = inputFile.lastIndexOf("</body>"); |
| 59 if (endBodyIndex == -1) { |
| 60 // Some files are missing a closing body tag. |
| 61 endBodyIndex = inputFile.lastIndexOf("</html>"); |
| 62 } |
| 63 if (endBodyIndex == -1) { |
| 64 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { |
| 65 console.warn("Skipping 404 file"); |
| 66 onDone(); |
| 67 return; |
| 68 } |
| 69 throw "Unexpected file format for " + file; |
| 70 } |
| 71 |
| 72 // Remove all easy to remove script tags to speed page load. |
| 73 inputFile = inputFile.substring(0, endBodyIndex) + |
| 74 '<script type="text/javascript">\n' + |
| 75 ' if (window.layoutTestController) {\n' + |
| 76 ' var controller = window.layoutTestController;\n' + |
| 77 ' controller.dumpAsText();\n' + |
| 78 ' controller.waitUntilDone();\n' + |
| 79 ' }\n' + |
| 80 'window.addEventListener("message", receiveMessage, false);\n' + |
| 81 'function receiveMessage(event) {\n' + |
| 82 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + |
| 83 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + |
| 84 ' if (window.layoutTestController) {\n' + |
| 85 ' document.documentElement.textContent = "";\n' + |
| 86 ' window.layoutTestController.notifyDone();\n' + |
| 87 ' }\n' + |
| 88 '}\n' + |
| 89 '</script>\n' + |
| 90 (USE_VM ? |
| 91 '<script type="application/dart" src="../../extract.dart"></script>' : |
| 92 '<script type="text/javascript" src="../../output/extract.dart.js"></scrip
t>') + |
| 93 '\n' + inputFile.substring(endBodyIndex); |
| 94 |
| 95 console.log("Processing: " + file); |
| 96 var dumpFileName = "output/extract/" + file; |
| 97 var absoluteDumpFileName = path.resolve(dumpFileName); |
| 98 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); |
| 99 var parseArgs = { |
| 100 type: type, |
| 101 searchResult: entry, |
| 102 dartIdl: dartIdl[type] |
| 103 }; |
| 104 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), |
| 105 'utf8'); |
| 106 |
| 107 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + |
| 108 'DumpRenderTree ' + absoluteDumpFileName; |
| 109 console.log(cmd); |
| 110 var child = exec(cmd, |
| 111 function (error, stdout, stderr) { |
| 112 var msgIndex = stdout.indexOf(START_DART_MESSAGE); |
| 113 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); |
| 114 var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); |
| 115 console.log('all: ' + stdout); |
| 116 console.log('stderr: ' + stderr); |
| 117 if (error !== null) { |
| 118 console.log('exec error: ' + error); |
| 119 } |
| 120 |
| 121 if (!(type in db)) { |
| 122 db[type] = []; |
| 123 } |
| 124 try { |
| 125 db[type][searchResultIndex] = JSON.parse(msg); |
| 126 } catch(e) { |
| 127 console.warn("error parsing result for " + type + " file= "+ file); |
| 128 errorFiles.push(file); |
| 129 fs.writeFileSync("output/errors.json", |
| 130 JSON.stringify(errorFiles, null, ' '), 'utf8'); |
| 131 } |
| 132 onDone(); |
| 133 }); |
| 134 } |
| 135 var tasks = []; |
| 136 |
| 137 var numProcesses = 8; |
| 138 // Have numProcesses extraction tasks running simultaneously to improve |
| 139 // performance. If your machine is slow, you may need to dial back the |
| 140 // parallelism. |
| 141 var numPending = numProcesses; |
| 142 |
| 143 function processNextTask() { |
| 144 numPending--; |
| 145 if (tasks.length > 0) { |
| 146 numPending++; |
| 147 var task = tasks.pop(); |
| 148 task(); |
| 149 } else { |
| 150 if (numPending <= 0) { |
| 151 console.log("Successfully completed all tasks"); |
| 152 fs.writeFileSync("output/database.json", |
| 153 JSON.stringify(db, null, ' '), 'utf8'); |
| 154 } |
| 155 } |
| 156 } |
| 157 |
| 158 function createTask(type, entry, index) { |
| 159 return function () { |
| 160 var file = type + index + '.html'; |
| 161 parseFile(type, processNextTask, entry, file, index); |
| 162 }; |
| 163 } |
| 164 |
| 165 for (var i = 0; i < domTypes.length; i++) { |
| 166 var type = domTypes[i]; |
| 167 var entries = cacheData[type]; |
| 168 if (entries != null) { |
| 169 for (var j = 0; j < entries.length; j++) { |
| 170 tasks.push(createTask(type, entries[j], j)); |
| 171 } |
| 172 } else { |
| 173 console.warn("No crawled files for " + type); |
| 174 } |
| 175 } |
| 176 |
| 177 for (var p = 0; p < numProcesses; p++) { |
| 178 processNextTask(); |
| 179 } |
OLD | NEW |