| OLD | NEW |
| 1 var fs = require('fs'); | 1 var fs = require('fs'); |
| 2 var util = require('util'); | 2 var util = require('util'); |
| 3 var exec = require('child_process').exec; | 3 var exec = require('child_process').exec; |
| 4 var path = require('path'); | 4 var path = require('path'); |
| 5 | 5 |
| 6 // We have numProcesses extraction tasks running simultaneously to improve |
| 7 // performance. If your machine is slow, you may need to dial back the |
| 8 // parallelism. |
| 9 var numProcesses = 8; |
| 10 |
| 6 var db = {}; | 11 var db = {}; |
| 7 var metadata = {}; | 12 var metadata = {}; |
| 8 var USE_VM = false; | 13 var USE_VM = false; |
| 9 | 14 |
| 10 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart | 15 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart |
| 11 // TODO(jacobr): figure out a cleaner way to parse this data. | 16 // TODO(jacobr): figure out a cleaner way to parse this data. |
| 12 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 17 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
| 13 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 18 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
| 14 | 19 |
| 15 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString
()); | 20 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', |
| 16 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').to
String()); | 21 'utf8').toString()); |
| 17 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString()
); | 22 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', |
| 23 'utf8').toString()); |
| 24 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', |
| 25 'utf8').toString()); |
| 18 | 26 |
| 19 try { | 27 try { |
| 20 fs.mkdirSync('output/extract'); | 28 fs.mkdirSync('output/extract'); |
| 21 } catch (e) { | 29 } catch (e) { |
| 22 // It doesn't matter if the directories already exist. | 30 // It doesn't matter if the directories already exist. |
| 23 } | 31 } |
| 24 | 32 |
| 25 var errorFiles = []; | 33 var errorFiles = []; |
| 26 // TODO(jacobr): blacklist these types as we can't get good docs for them. | 34 // TODO(jacobr): blacklist these types as we can't get good docs for them. |
| 27 // ["Performance"] | 35 // ["Performance"] |
| 28 | 36 |
| 29 function parseFile(type, onDone, entry, file, searchResultIndex) { | 37 function parseFile(type, onDone, entry, file, searchResultIndex) { |
| 30 var inputFile; | 38 var inputFile; |
| 31 try { | 39 try { |
| 32 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); | 40 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); |
| 33 } catch (e) { | 41 } catch (e) { |
| 34 console.warn("Couldn't read: " + file); | 42 console.warn("Couldn't read: " + file); |
| 35 onDone(); | 43 onDone(); |
| 36 return; | 44 return; |
| 37 } | 45 } |
| 38 | 46 |
| 39 var inputFileRaw = inputFile; | 47 var inputFileRaw = inputFile; |
| 40 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that | 48 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that |
| 41 // we have valid HTML. | 49 // we have valid HTML. |
| 42 if (inputFile.indexOf("<!DOCTYPE") == 0) { | 50 // TODO(jacobr): use a regular expression instead of indexOf. |
| 43 inputFile = inputFile.substr(1); | 51 if (inputFile.toLowerCase().indexOf("<!doctype") == 0) { |
| 44 var matchIndex = inputFile.indexOf("<!DOCTYPE"); | 52 var matchIndex = inputFile.toLowerCase().indexOf("<!doctype", 1); |
| 45 if (matchIndex == -1) { | 53 if (matchIndex == -1) { |
| 46 // not a cached page. | 54 // not a cached page. |
| 47 inputFile = inputFileRaw; | 55 inputFile = inputFileRaw; |
| 48 } else { | 56 } else { |
| 49 inputFile = inputFile.substr(matchIndex); | 57 inputFile = inputFile.substr(matchIndex); |
| 50 } | 58 } |
| 51 } | 59 } |
| 52 | 60 |
| 53 // Disable all existing javascript in the input file to speedup parsing and | 61 // Disable all existing javascript in the input file to speed up parsing and |
| 54 // avoid conflicts between our JS and the JS in the file. | 62 // avoid conflicts between our JS and the JS in the file. |
| 55 inputFile = inputFile.replace(/<script type="text\/javascript"/g, | 63 inputFile = inputFile.replace(/<script type="text\/javascript"/g, |
| 56 '<script type="text/ignored"'); | 64 '<script type="text/ignored"'); |
| 57 | 65 |
| 58 var endBodyIndex = inputFile.lastIndexOf("</body>"); | 66 var endBodyIndex = inputFile.lastIndexOf("</body>"); |
| 59 if (endBodyIndex == -1) { | 67 if (endBodyIndex == -1) { |
| 60 // Some files are missing a closing body tag. | 68 // Some files are missing a closing body tag. |
| 61 endBodyIndex = inputFile.lastIndexOf("</html>"); | 69 endBodyIndex = inputFile.lastIndexOf("</html>"); |
| 62 } | 70 } |
| 63 if (endBodyIndex == -1) { | 71 if (endBodyIndex == -1) { |
| 64 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { | 72 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { |
| 65 console.warn("Skipping 404 file"); | 73 console.warn("Skipping 404 file: " + file); |
| 66 onDone(); | 74 onDone(); |
| 67 return; | 75 return; |
| 68 } | 76 } |
| 69 throw "Unexpected file format for " + file; | 77 throw "Unexpected file format for " + file; |
| 70 } | 78 } |
| 71 | 79 |
| 72 // Remove all easy to remove script tags to speed page load. | |
| 73 inputFile = inputFile.substring(0, endBodyIndex) + | 80 inputFile = inputFile.substring(0, endBodyIndex) + |
| 74 '<script type="text/javascript">\n' + | 81 '<script type="text/javascript">\n' + |
| 75 ' if (window.layoutTestController) {\n' + | 82 ' if (window.layoutTestController) {\n' + |
| 76 ' var controller = window.layoutTestController;\n' + | 83 ' var controller = window.layoutTestController;\n' + |
| 77 ' controller.dumpAsText();\n' + | 84 ' controller.dumpAsText();\n' + |
| 78 ' controller.waitUntilDone();\n' + | 85 ' controller.waitUntilDone();\n' + |
| 79 ' }\n' + | 86 ' }\n' + |
| 80 'window.addEventListener("message", receiveMessage, false);\n' + | 87 'window.addEventListener("message", receiveMessage, false);\n' + |
| 81 'function receiveMessage(event) {\n' + | 88 'function receiveMessage(event) {\n' + |
| 82 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + | 89 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + |
| 83 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + | 90 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + |
| 91 // We feature detect whether the browser supports layoutTestController |
| 92 // so we only clear the document content when running in the test shell |
| 93 // and not when debugging using a normal browser. |
| 84 ' if (window.layoutTestController) {\n' + | 94 ' if (window.layoutTestController) {\n' + |
| 85 ' document.documentElement.textContent = "";\n' + | 95 ' document.documentElement.textContent = "";\n' + |
| 86 ' window.layoutTestController.notifyDone();\n' + | 96 ' window.layoutTestController.notifyDone();\n' + |
| 87 ' }\n' + | 97 ' }\n' + |
| 88 '}\n' + | 98 '}\n' + |
| 89 '</script>\n' + | 99 '</script>\n' + |
| 90 (USE_VM ? | 100 (USE_VM ? |
| 91 '<script type="application/dart" src="../../extract.dart"></script>' : | 101 '<script type="application/dart" src="../../extract.dart"></script>' : |
| 92 '<script type="text/javascript" src="../../output/extract.dart.js"></scrip
t>') + | 102 '<script type="text/javascript" src="../../output/extract.dart.js">' + |
| 103 '</script>') + |
| 93 '\n' + inputFile.substring(endBodyIndex); | 104 '\n' + inputFile.substring(endBodyIndex); |
| 94 | 105 |
| 95 console.log("Processing: " + file); | 106 console.log("Processing: " + file); |
| 96 var dumpFileName = "output/extract/" + file; | 107 var absoluteDumpFileName = path.resolve("output/extract/" + file); |
| 97 var absoluteDumpFileName = path.resolve(dumpFileName); | |
| 98 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); | 108 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); |
| 99 var parseArgs = { | 109 var parseArgs = { |
| 100 type: type, | 110 type: type, |
| 101 searchResult: entry, | 111 searchResult: entry, |
| 102 dartIdl: dartIdl[type] | 112 dartIdl: dartIdl[type] |
| 103 }; | 113 }; |
| 104 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), | 114 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), |
| 105 'utf8'); | 115 'utf8'); |
| 106 | 116 |
| 117 // TODO(jacobr): Make this run on platforms other than OS X. |
| 107 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + | 118 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + |
| 108 'DumpRenderTree ' + absoluteDumpFileName; | 119 'DumpRenderTree ' + absoluteDumpFileName; |
| 109 console.log(cmd); | 120 console.log(cmd); |
| 110 var child = exec(cmd, | 121 exec(cmd, |
| 111 function (error, stdout, stderr) { | 122 function (error, stdout, stderr) { |
| 112 var msgIndex = stdout.indexOf(START_DART_MESSAGE); | 123 var msgIndex = stdout.indexOf(START_DART_MESSAGE); |
| 113 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); | |
| 114 var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); | |
| 115 console.log('all: ' + stdout); | 124 console.log('all: ' + stdout); |
| 116 console.log('stderr: ' + stderr); | 125 console.log('stderr: ' + stderr); |
| 117 if (error !== null) { | 126 if (error !== null) { |
| 118 console.log('exec error: ' + error); | 127 console.log('exec error: ' + error); |
| 119 } | 128 } |
| 120 | 129 |
| 130 // TODO(jacobr): use a regexp. |
| 131 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); |
| 132 msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); |
| 121 if (!(type in db)) { | 133 if (!(type in db)) { |
| 122 db[type] = []; | 134 db[type] = []; |
| 123 } | 135 } |
| 124 try { | 136 try { |
| 125 db[type][searchResultIndex] = JSON.parse(msg); | 137 db[type][searchResultIndex] = JSON.parse(msg); |
| 126 } catch(e) { | 138 } catch(e) { |
| 139 // Write the errors file every time there is an error so that if the |
| 140 // user aborts the script, the error file is valid. |
| 127 console.warn("error parsing result for " + type + " file= "+ file); | 141 console.warn("error parsing result for " + type + " file= "+ file); |
| 128 errorFiles.push(file); | 142 errorFiles.push(file); |
| 129 fs.writeFileSync("output/errors.json", | 143 fs.writeFileSync("output/errors.json", |
| 130 JSON.stringify(errorFiles, null, ' '), 'utf8'); | 144 JSON.stringify(errorFiles, null, ' '), 'utf8'); |
| 131 } | 145 } |
| 132 onDone(); | 146 onDone(); |
| 133 }); | 147 }); |
| 134 } | 148 } |
| 149 |
| 135 var tasks = []; | 150 var tasks = []; |
| 136 | 151 |
| 137 var numProcesses = 8; | |
| 138 // Have numProcesses extraction tasks running simultaneously to improve | |
| 139 // performance. If your machine is slow, you may need to dial back the | |
| 140 // parallelism. | |
| 141 var numPending = numProcesses; | 152 var numPending = numProcesses; |
| 142 | 153 |
| 143 function processNextTask() { | 154 function processNextTask() { |
| 144 numPending--; | 155 numPending--; |
| 145 if (tasks.length > 0) { | 156 if (tasks.length > 0) { |
| 146 numPending++; | 157 numPending++; |
| 147 var task = tasks.pop(); | 158 var task = tasks.pop(); |
| 148 task(); | 159 task(); |
| 149 } else { | 160 } else { |
| 150 if (numPending <= 0) { | 161 if (numPending <= 0) { |
| (...skipping 19 matching lines...) Expand all Loading... |
| 170 tasks.push(createTask(type, entries[j], j)); | 181 tasks.push(createTask(type, entries[j], j)); |
| 171 } | 182 } |
| 172 } else { | 183 } else { |
| 173 console.warn("No crawled files for " + type); | 184 console.warn("No crawled files for " + type); |
| 174 } | 185 } |
| 175 } | 186 } |
| 176 | 187 |
| 177 for (var p = 0; p < numProcesses; p++) { | 188 for (var p = 0; p < numProcesses; p++) { |
| 178 processNextTask(); | 189 processNextTask(); |
| 179 } | 190 } |
| OLD | NEW |