OLD | NEW |
---|---|
1 var fs = require('fs'); | 1 var fs = require('fs'); |
2 var util = require('util'); | 2 var util = require('util'); |
3 var exec = require('child_process').exec; | 3 var exec = require('child_process').exec; |
4 var path = require('path'); | 4 var path = require('path'); |
5 | 5 |
6 // We have numProcesses extraction tasks running simultaneously to improve | |
7 // performance. If your machine is slow, you may need to dial back the | |
8 // parallelism. | |
9 var numProcesses = 8; | |
10 | |
6 var db = {}; | 11 var db = {}; |
7 var metadata = {}; | 12 var metadata = {}; |
8 var USE_VM = false; | 13 var USE_VM = false; |
9 | 14 |
10 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart | 15 // Warning: START_DART_MESSAGE must match the value hardcoded in extract.dart |
11 // TODO(jacobr): figure out a cleaner way to parse this data. | 16 // TODO(jacobr): figure out a cleaner way to parse this data. |
12 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 17 var START_DART_MESSAGE = "START_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
13 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; | 18 var END_DART_MESSAGE = "END_DART_MESSAGE_UNIQUE_IDENTIFIER"; |
14 | 19 |
15 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8').toString ()); | 20 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', |
16 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', 'utf8').to String()); | 21 'utf8').toString()); |
17 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', 'utf8').toString() ); | 22 var cacheData = JSON.parse(fs.readFileSync('output/crawl/cache.json', |
23 'utf8').toString()); | |
24 var dartIdl = JSON.parse(fs.readFileSync('data/dartIdl.json', | |
25 'utf8').toString()); | |
18 | 26 |
19 try { | 27 try { |
20 fs.mkdirSync('output/extract'); | 28 fs.mkdirSync('output/extract'); |
21 } catch (e) { | 29 } catch (e) { |
22 // It doesn't matter if the directories already exist. | 30 // It doesn't matter if the directories already exist. |
23 } | 31 } |
24 | 32 |
25 var errorFiles = []; | 33 var errorFiles = []; |
26 // TODO(jacobr): blacklist these types as we can't get good docs for them. | 34 // TODO(jacobr): blacklist these types as we can't get good docs for them. |
27 // ["Performance"] | 35 // ["Performance"] |
28 | 36 |
29 function parseFile(type, onDone, entry, file, searchResultIndex) { | 37 function parseFile(type, onDone, entry, file, searchResultIndex) { |
30 var inputFile; | 38 var inputFile; |
31 try { | 39 try { |
32 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); | 40 inputFile = fs.readFileSync("output/crawl/" + file, 'utf8').toString(); |
33 } catch (e) { | 41 } catch (e) { |
34 console.warn("Couldn't read: " + file); | 42 console.warn("Couldn't read: " + file); |
35 onDone(); | 43 onDone(); |
36 return; | 44 return; |
37 } | 45 } |
38 | 46 |
39 var inputFileRaw = inputFile; | 47 var inputFileRaw = inputFile; |
40 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that | 48 // Cached pages have multiple DOCTYPE tags. Strip off the first one so that |
41 // we have valid HTML. | 49 // we have valid HTML. |
42 if (inputFile.indexOf("<!DOCTYPE") == 0) { | 50 // TODO(jacobr): use a regular expression instead of indexOf. |
51 if (inputFile.toLowerCase().indexOf("<!doctype") == 0) { | |
43 inputFile = inputFile.substr(1); | 52 inputFile = inputFile.substr(1); |
44 var matchIndex = inputFile.indexOf("<!DOCTYPE"); | 53 var matchIndex = inputFile.toLowerCase().indexOf("<!doctype"); |
nweiz
2012/02/02 00:16:19
I think you could still make this 'inputFile.toLow
| |
45 if (matchIndex == -1) { | 54 if (matchIndex == -1) { |
46 // not a cached page. | 55 // not a cached page. |
47 inputFile = inputFileRaw; | 56 inputFile = inputFileRaw; |
48 } else { | 57 } else { |
49 inputFile = inputFile.substr(matchIndex); | 58 inputFile = inputFile.substr(matchIndex); |
50 } | 59 } |
51 } | 60 } |
52 | 61 |
53 // Disable all existing javascript in the input file to speedup parsing and | 62 // Disable all existing javascript in the input file to speedup parsing and |
54 // avoid conflicts between our JS and the JS in the file. | 63 // avoid conflicts between our JS and the JS in the file. |
55 inputFile = inputFile.replace(/<script type="text\/javascript"/g, | 64 inputFile = inputFile.replace(/<script type="text\/javascript"/g, |
56 '<script type="text/ignored"'); | 65 '<script type="text/ignored"'); |
57 | 66 |
58 var endBodyIndex = inputFile.lastIndexOf("</body>"); | 67 var endBodyIndex = inputFile.lastIndexOf("</body>"); |
59 if (endBodyIndex == -1) { | 68 if (endBodyIndex == -1) { |
60 // Some files are missing a closing body tag. | 69 // Some files are missing a closing body tag. |
61 endBodyIndex = inputFile.lastIndexOf("</html>"); | 70 endBodyIndex = inputFile.lastIndexOf("</html>"); |
62 } | 71 } |
63 if (endBodyIndex == -1) { | 72 if (endBodyIndex == -1) { |
64 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { | 73 if (inputFile.indexOf("Error 404 (Not Found)") != -1) { |
65 console.warn("Skipping 404 file"); | 74 console.warn("Skipping 404 file: " + file); |
66 onDone(); | 75 onDone(); |
67 return; | 76 return; |
68 } | 77 } |
69 throw "Unexpected file format for " + file; | 78 throw "Unexpected file format for " + file; |
70 } | 79 } |
71 | 80 |
72 // Remove all easy to remove script tags to speed page load. | |
73 inputFile = inputFile.substring(0, endBodyIndex) + | 81 inputFile = inputFile.substring(0, endBodyIndex) + |
74 '<script type="text/javascript">\n' + | 82 '<script type="text/javascript">\n' + |
75 ' if (window.layoutTestController) {\n' + | 83 ' if (window.layoutTestController) {\n' + |
76 ' var controller = window.layoutTestController;\n' + | 84 ' var controller = window.layoutTestController;\n' + |
77 ' controller.dumpAsText();\n' + | 85 ' controller.dumpAsText();\n' + |
78 ' controller.waitUntilDone();\n' + | 86 ' controller.waitUntilDone();\n' + |
79 ' }\n' + | 87 ' }\n' + |
80 'window.addEventListener("message", receiveMessage, false);\n' + | 88 'window.addEventListener("message", receiveMessage, false);\n' + |
81 'function receiveMessage(event) {\n' + | 89 'function receiveMessage(event) {\n' + |
82 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + | 90 ' if (event.data.indexOf("' + START_DART_MESSAGE + '") != 0) return;\n' + |
83 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + | 91 ' console.log(event.data + "' + END_DART_MESSAGE + '");\n' + |
92 // We feature detect whether the browser supports layoutTestController | |
93 // so we only clear the document content when running in the test shell | |
94 // and not when debugging using a normal browser. | |
84 ' if (window.layoutTestController) {\n' + | 95 ' if (window.layoutTestController) {\n' + |
85 ' document.documentElement.textContent = "";\n' + | 96 ' document.documentElement.textContent = "";\n' + |
86 ' window.layoutTestController.notifyDone();\n' + | 97 ' window.layoutTestController.notifyDone();\n' + |
87 ' }\n' + | 98 ' }\n' + |
88 '}\n' + | 99 '}\n' + |
89 '</script>\n' + | 100 '</script>\n' + |
90 (USE_VM ? | 101 (USE_VM ? |
91 '<script type="application/dart" src="../../extract.dart"></script>' : | 102 '<script type="application/dart" src="../../extract.dart"></script>' : |
92 '<script type="text/javascript" src="../../output/extract.dart.js"></scrip t>') + | 103 '<script type="text/javascript" src="../../output/extract.dart.js">' + |
104 '</script>') + | |
93 '\n' + inputFile.substring(endBodyIndex); | 105 '\n' + inputFile.substring(endBodyIndex); |
94 | 106 |
95 console.log("Processing: " + file); | 107 console.log("Processing: " + file); |
96 var dumpFileName = "output/extract/" + file; | 108 var absoluteDumpFileName = path.resolve("output/extract/" + file); |
97 var absoluteDumpFileName = path.resolve(dumpFileName); | |
98 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); | 109 fs.writeFileSync(absoluteDumpFileName, inputFile, 'utf8'); |
99 var parseArgs = { | 110 var parseArgs = { |
100 type: type, | 111 type: type, |
101 searchResult: entry, | 112 searchResult: entry, |
102 dartIdl: dartIdl[type] | 113 dartIdl: dartIdl[type] |
103 }; | 114 }; |
104 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), | 115 fs.writeFileSync(absoluteDumpFileName + ".json", JSON.stringify(parseArgs), |
105 'utf8'); | 116 'utf8'); |
106 | 117 |
118 // TODO(jacobr): Make this run on platforms other than OS X. | |
107 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + | 119 var cmd = '../../../client/tests/drt/DumpRenderTree.app/Contents/MacOS/' + |
108 'DumpRenderTree ' + absoluteDumpFileName; | 120 'DumpRenderTree ' + absoluteDumpFileName; |
109 console.log(cmd); | 121 console.log(cmd); |
110 var child = exec(cmd, | 122 exec(cmd, |
111 function (error, stdout, stderr) { | 123 function (error, stdout, stderr) { |
112 var msgIndex = stdout.indexOf(START_DART_MESSAGE); | 124 var msgIndex = stdout.indexOf(START_DART_MESSAGE); |
113 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); | |
114 var msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); | |
115 console.log('all: ' + stdout); | 125 console.log('all: ' + stdout); |
116 console.log('stderr: ' + stderr); | 126 console.log('stderr: ' + stderr); |
117 if (error !== null) { | 127 if (error !== null) { |
118 console.log('exec error: ' + error); | 128 console.log('exec error: ' + error); |
119 } | 129 } |
120 | 130 |
131 // TODO(jacobr): use a regexp. | |
132 var msg = stdout.substring(msgIndex + START_DART_MESSAGE.length); | |
133 msg = msg.substring(0, msg.indexOf(END_DART_MESSAGE)); | |
121 if (!(type in db)) { | 134 if (!(type in db)) { |
122 db[type] = []; | 135 db[type] = []; |
123 } | 136 } |
124 try { | 137 try { |
125 db[type][searchResultIndex] = JSON.parse(msg); | 138 db[type][searchResultIndex] = JSON.parse(msg); |
126 } catch(e) { | 139 } catch(e) { |
127 console.warn("error parsing result for " + type + " file= "+ file); | 140 console.warn("error parsing result for " + type + " file= "+ file); |
128 errorFiles.push(file); | 141 errorFiles.push(file); |
129 fs.writeFileSync("output/errors.json", | 142 fs.writeFileSync("output/errors.json", |
130 JSON.stringify(errorFiles, null, ' '), 'utf8'); | 143 JSON.stringify(errorFiles, null, ' '), 'utf8'); |
131 } | 144 } |
132 onDone(); | 145 onDone(); |
133 }); | 146 }); |
134 } | 147 } |
135 var tasks = []; | 148 var tasks = []; |
136 | 149 |
137 var numProcesses = 8; | |
138 // Have numProcesses extraction tasks running simultaneously to improve | |
139 // performance. If your machine is slow, you may need to dial back the | |
140 // parallelism. | |
141 var numPending = numProcesses; | 150 var numPending = numProcesses; |
142 | 151 |
143 function processNextTask() { | 152 function processNextTask() { |
144 numPending--; | 153 numPending--; |
145 if (tasks.length > 0) { | 154 if (tasks.length > 0) { |
146 numPending++; | 155 numPending++; |
147 var task = tasks.pop(); | 156 var task = tasks.pop(); |
148 task(); | 157 task(); |
149 } else { | 158 } else { |
150 if (numPending <= 0) { | 159 if (numPending <= 0) { |
(...skipping 19 matching lines...) Expand all Loading... | |
170 tasks.push(createTask(type, entries[j], j)); | 179 tasks.push(createTask(type, entries[j], j)); |
171 } | 180 } |
172 } else { | 181 } else { |
173 console.warn("No crawled files for " + type); | 182 console.warn("No crawled files for " + type); |
174 } | 183 } |
175 } | 184 } |
176 | 185 |
177 for (var p = 0; p < numProcesses; p++) { | 186 for (var p = 0; p < numProcesses; p++) { |
178 processNextTask(); | 187 processNextTask(); |
179 } | 188 } |
OLD | NEW |