OLD | NEW |
1 var fs = require('fs'); | 1 var fs = require('fs'); |
2 var util = require('util'); | 2 var util = require('util'); |
3 var exec = require('child_process').exec; | 3 var exec = require('child_process').exec; |
4 var path = require('path'); | 4 var path = require('path'); |
5 | 5 |
| 6 // We have numProcesses extraction tasks running simultaneously to improve |
| 7 // performance. If your machine is slow, you may need to dial back the |
| 8 // parallelism. |
| 9 var numProcesses = 8; |
| 10 |
6 var db = {}; | 11 var db = {}; |
7 var metadata = {}; | 12 var metadata = {}; |
8 var USE_VM = false; | 13 var USE_VM = false; |
9 | 14 |
10 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart | 15 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart |
11 // TODO(jacobr): figure out a cleaner way to parse this data. | 16 // TODO(jacobr): figure out a cleaner way to parse this data. |
12 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 17 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
13 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 18 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
14 | 19 |
15 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString
()); | 20 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', |
16 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').to
String()); | 21 'utf8').toString()); |
17 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString()
); | 22 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', |
| 23 'utf8').toString()); |
| 24 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', |
| 25 'utf8').toString()); |
18 | 26 |
19 try { | 27 try { |
20 fs.mkdirSync('output/extract'); | 28 fs.mkdirSync('output/extract'); |
21 } catch (e) { | 29 } catch (e) { |
22 // It doesn't matter if the directories already exist. | 30 // It doesn't matter if the directories already exist. |
23 } | 31 } |
24 | 32 |
25 var errorFiles = []; | 33 var errorFiles = []; |
26 // TODO(jacobr): blacklist these types as we can't get good docs for them. | 34 // TODO(jacobr): blacklist these types as we can't get good docs for them. |
27 // ["Performance"] | 35 // ["Performance"] |
28 | 36 |
29 function parseFile(type, onDone, entry, file, searchResultIndex) { | 37 function parseFile(type, onDone, entry, file, searchResultIndex) { |
30 var inputFile; | 38 var inputFile; |
31 try { | 39 try { |
32 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); | 40 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); |
33 } catch (e) { | 41 } catch (e) { |
34 console.warn("Couldn't read: " + file); | 42 console.warn("Couldn't read: " + file); |
35 onDone(); | 43 onDone(); |
36 return; | 44 return; |
37 } | 45 } |
38 | 46 |
39 var inputFileRaw = inputFile; | 47 var inputFileRaw = inputFile; |
40 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that | 48 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that |
41 // we have valid HTML. | 49 // we have valid HTML. |
42 if (inputFile.indexOf("<!DOCTYPE") == 0) { | 50 // TODO(jacobr): use a regular expression instead of indexOf. |
43 inputFile = inputFile.substr(1); | 51 if (inputFile.toLowerCase().indexOf("<!doctype") == 0) { |
44 var matchIndex = inputFile.indexOf("<!DOCTYPE"); | 52 var matchIndex = inputFile.toLowerCase().indexOf("<!doctype", 1); |
45 if (matchIndex == -1) { | 53 if (matchIndex == -1) { |
46 // not a cached page. | 54 // not a cached page. |
47 inputFile = inputFileRaw; | 55 inputFile = inputFileRaw; |
48 } else { | 56 } else { |
49 inputFile = inputFile.substr(matchIndex); | 57 inputFile = inputFile.substr(matchIndex); |
50 } | 58 } |
51 } | 59 } |
52 | 60 |
53 // Disable all existing javascript in the input file to speedup parsing and | 61 // Disable all existing javascript in the input file to speed up parsing and |
54 // avoid conflicts between our JS and the JS in the file. | 62 // avoid conflicts between our JS and the JS in the file. |
55 inputFile = inputFile.replace(/<script type="text\/javascript"/g, | 63 inputFile = inputFile.replace(/<script type="text\/javascript"/g, |
56 '<script type="text/ignored"'); | 64 '<script type="text/ignored"'); |
57 | 65 |
58 var endBodyIndex = inputFile.lastIndexOf("</body>"); | 66 var endBodyIndex = inputFile.lastIndexOf("</body>"); |
59 if (endBodyIndex == -1) { | 67 if (endBodyIndex == -1) { |
60 // Some files are missing a closing body tag. | 68 // Some files are missing a closing body tag. |
61 endBodyIndex = inputFile.lastIndexOf("</html>"); | 69 endBodyIndex = inputFile.lastIndexOf("</html>"); |
62 } | 70 } |
63 if (endBodyIndex == -1) { | 71 if (endBodyIndex == -1) { |
64 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { | 72 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { |
65 console.warn("Skipping 404 file"); | 73 console.warn("Skipping 404 file: " + file); |
66 onDone(); | 74 onDone(); |
67 return; | 75 return; |
68 } | 76 } |
69 throw "Unexpected file format for " + file; | 77 throw "Unexpected file format for " + file; |
70 } | 78 } |
71 | 79 |
72 // Remove all easy to remove script tags to speed page load. | |
73 inputFile = inputFile.substring(0, endBodyIndex) + | 80 inputFile = inputFile.substring(0, endBodyIndex) + |
74 '<script type="text/javascript">\n' + | 81 '<script type="text/javascript">\n' + |
75 ' if (window.layoutTestController) {\n' + | 82 ' if (window.layoutTestController) {\n' + |
76 ' var controller = window.layoutTestController;\n' + | 83 ' var controller = window.layoutTestController;\n' + |
77 ' controller.dumpAsText();\n' + | 84 ' controller.dumpAsText();\n' + |
78 ' controller.waitUntilDone();\n' + | 85 ' controller.waitUntilDone();\n' + |
79 ' }\n' + | 86 ' }\n' + |
80 'window.addEventListener("message", receiveMessage, false);\n' + | 87 'window.addEventListener("message", receiveMessage, false);\n' + |
81 'function receiveMessage(event) {\n' + | 88 'function receiveMessage(event) {\n' + |
82 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + | 89 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + |
83 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + | 90 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + |
| 91 // We feature detect whether the browser supports layoutTestController |
| 92 // so we only clear the document content when running in the test shell |
| 93 // and not when debugging using a normal browser. |
84 ' if (window.layoutTestController) {\n' + | 94 ' if (window.layoutTestController) {\n' + |
85 ' document.documentElement.textContent = "";\n' + | 95 ' document.documentElement.textContent = "";\n' + |
86 ' window.layoutTestController.notifyDone();\n' + | 96 ' window.layoutTestController.notifyDone();\n' + |
87 ' }\n' + | 97 ' }\n' + |
88 '}\n' + | 98 '}\n' + |
89 '</script>\n' + | 99 '</script>\n' + |
90 (USE_VM ? | 100 (USE_VM ? |
91 '<script type="application/dart" src="../../extract.dart"></script>' : | 101 '<script type="application/dart" src="../../extract.dart"></script>' : |
92 '<script type="text/javascript" src="../../output/extract.dart.js"></scrip
t>') + | 102 '<script type="text/javascript" src="../../output/extract.dart.js">' + |
| 103 '</script>') + |
93 '\n' + inputFile.substring(endBodyIndex); | 104 '\n' + inputFile.substring(endBodyIndex); |
94 | 105 |
95 console.log("Processing: " + file); | 106 console.log("Processing: " + file); |
96 var dumpFileName = "output/extract/" + file; | 107 var absoluteDumpFileName = path.resolve("output/extract/" + file); |
97 var absoluteDumpFileName = path.resolve(dumpFileName); | |
98 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); | 108 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); |
99 var parseArgs = { | 109 var parseArgs = { |
100 type: type, | 110 type: type, |
101 searchResult: entry, | 111 searchResult: entry, |
102 dartIdl: dartIdl[type] | 112 dartIdl: dartIdl[type] |
103 }; | 113 }; |
104 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), | 114 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), |
105 'utf8'); | 115 'utf8'); |
106 | 116 |
| 117 // TODO(jacobr): Make this run on platforms other than OS X. |
107 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + | 118 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + |
108 'DumpRenderTree ' + absoluteDumpFileName; | 119 'DumpRenderTree ' + absoluteDumpFileName; |
109 console.log(cmd); | 120 console.log(cmd); |
110 var child = exec(cmd, | 121 exec(cmd, |
111 function (error, stdout, stderr) { | 122 function (error, stdout, stderr) { |
112 var msgIndex = stdout.indexOf(START_DART_MESSAGE); | 123 var msgIndex = stdout.indexOf(START_DART_MESSAGE); |
113 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); | |
114 var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); | |
115 console.log('all: ' + stdout); | 124 console.log('all: ' + stdout); |
116 console.log('stderr: ' + stderr); | 125 console.log('stderr: ' + stderr); |
117 if (error !== null) { | 126 if (error !== null) { |
118 console.log('exec error: ' + error); | 127 console.log('exec error: ' + error); |
119 } | 128 } |
120 | 129 |
| 130 // TODO(jacobr): use a regexp. |
| 131 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); |
| 132 msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); |
121 if (!(type in db)) { | 133 if (!(type in db)) { |
122 db[type] = []; | 134 db[type] = []; |
123 } | 135 } |
124 try { | 136 try { |
125 db[type][searchResultIndex] = JSON.parse(msg); | 137 db[type][searchResultIndex] = JSON.parse(msg); |
126 } catch(e) { | 138 } catch(e) { |
| 139 // Write the errors file every time there is an error so that if the |
| 140 // user aborts the script, the error file is valid. |
127 console.warn("error parsing result for " + type + " file= "+ file); | 141 console.warn("error parsing result for " + type + " file= "+ file); |
128 errorFiles.push(file); | 142 errorFiles.push(file); |
129 fs.writeFileSync("output/errors.json", | 143 fs.writeFileSync("output/errors.json", |
130 JSON.stringify(errorFiles, null, ' '), 'utf8'); | 144 JSON.stringify(errorFiles, null, ' '), 'utf8'); |
131 } | 145 } |
132 onDone(); | 146 onDone(); |
133 }); | 147 }); |
134 } | 148 } |
| 149 |
135 var tasks = []; | 150 var tasks = []; |
136 | 151 |
137 var numProcesses = 8; | |
138 // Have numProcesses extraction tasks running simultaneously to improve | |
139 // performance. If your machine is slow, you may need to dial back the | |
140 // parallelism. | |
141 var numPending = numProcesses; | 152 var numPending = numProcesses; |
142 | 153 |
143 function processNextTask() { | 154 function processNextTask() { |
144 numPending--; | 155 numPending--; |
145 if (tasks.length > 0) { | 156 if (tasks.length > 0) { |
146 numPending++; | 157 numPending++; |
147 var task = tasks.pop(); | 158 var task = tasks.pop(); |
148 task(); | 159 task(); |
149 } else { | 160 } else { |
150 if (numPending <= 0) { | 161 if (numPending <= 0) { |
(...skipping 19 matching lines...) Expand all Loading... |
170 tasks.push(createTask(type, entries[j], j)); | 181 tasks.push(createTask(type, entries[j], j)); |
171 } | 182 } |
172 } else { | 183 } else { |
173 console.warn("No crawled files for " + type); | 184 console.warn("No crawled files for " + type); |
174 } | 185 } |
175 } | 186 } |
176 | 187 |
177 for (var p = 0; p < numProcesses; p++) { | 188 for (var p = 0; p < numProcesses; p++) { |
178 processNextTask(); | 189 processNextTask(); |
179 } | 190 } |
OLD | NEW |