OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 2180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2191 # at 19 or higher is dependent on the specific event. | 2191 # at 19 or higher is dependent on the specific event. |
2192 EVENT_NAME = 0 | 2192 EVENT_NAME = 0 |
2193 TYPE = 1 | 2193 TYPE = 1 |
2194 PID = 9 | 2194 PID = 9 |
2195 TID = 10 | 2195 TID = 10 |
2196 PROCESSOR_ID = 11 | 2196 PROCESSOR_ID = 11 |
2197 TIMESTAMP = 16 | 2197 TIMESTAMP = 16 |
2198 NULL_GUID = '{00000000-0000-0000-0000-000000000000}' | 2198 NULL_GUID = '{00000000-0000-0000-0000-000000000000}' |
2199 USER_DATA = 19 | 2199 USER_DATA = 19 |
2200 | 2200 |
| 2201 def __init__(self, logname): |
| 2202 """Starts the log collection. |
| 2203 |
| 2204 Requires administrative access. logman.exe is synchronous so no need for a |
| 2205 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID |
| 2206 instead. The GUID constant name is SystemTraceControlGuid. Lovely. |
| 2207 |
| 2208 One can get the list of potentially interesting providers with: |
| 2209 "logman query providers | findstr /i file" |
| 2210 """ |
| 2211 super(LogmanTrace.Tracer, self).__init__(logname) |
| 2212 self._script = os.path.join(BASE_DIR, 'trace_child_process.py') |
| 2213 cmd_start = [ |
| 2214 'logman.exe', |
| 2215 'start', |
| 2216 'NT Kernel Logger', |
| 2217 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}', |
| 2218 # splitio,fileiocompletion,syscall,file,cswitch,img |
| 2219 '(process,fileio,thread)', |
| 2220 '-o', self._logname + '.etl', |
| 2221 '-ets', # Send directly to kernel |
| 2222 # Values extracted out of thin air. |
| 2223 # Event Trace Session buffer size in kb. |
| 2224 '-bs', '10240', |
| 2225 # Number of Event Trace Session buffers. |
| 2226 '-nb', '16', '256', |
| 2227 ] |
| 2228 logging.debug('Running: %s' % cmd_start) |
| 2229 try: |
| 2230 subprocess.check_call( |
| 2231 cmd_start, |
| 2232 stdin=subprocess.PIPE, |
| 2233 stdout=subprocess.PIPE, |
| 2234 stderr=subprocess.STDOUT) |
| 2235 except subprocess.CalledProcessError, e: |
| 2236 if e.returncode == -2147024891: |
| 2237 print >> sys.stderr, 'Please restart with an elevated admin prompt' |
| 2238 elif e.returncode == -2144337737: |
| 2239 print >> sys.stderr, ( |
| 2240 'A kernel trace was already running, stop it and try again') |
| 2241 raise |
| 2242 |
2201 def trace(self, cmd, cwd, tracename, output): | 2243 def trace(self, cmd, cwd, tracename, output): |
2202 """Uses logman.exe to start and stop the NT Kernel Logger while the | |
2203 executable to be traced is run. | |
2204 """ | |
2205 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) | 2244 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
2206 with self._lock: | 2245 with self._lock: |
2207 if not self._initialized: | 2246 if not self._initialized: |
2208 raise TracingFailure( | 2247 raise TracingFailure( |
2209 'Called Tracer.trace() on an unitialized object', | 2248 'Called Tracer.trace() on an unitialized object', |
2210 None, None, None, tracename) | 2249 None, None, None, tracename) |
2211 assert tracename not in (i['trace'] for i in self._traces) | 2250 assert tracename not in (i['trace'] for i in self._traces) |
2212 | 2251 |
2213 # Use "logman -?" for help. | 2252 # Use "logman -?" for help. |
2214 | 2253 |
2215 stdout = stderr = None | 2254 stdout = stderr = None |
2216 if output: | 2255 if output: |
2217 stdout = subprocess.PIPE | 2256 stdout = subprocess.PIPE |
2218 stderr = subprocess.STDOUT | 2257 stderr = subprocess.STDOUT |
2219 | 2258 |
2220 # 1. Start the log collection. | 2259 # Run the child process. |
2221 self._start_log(self._logname + '.etl') | |
2222 | |
2223 # 2. Run the child process. | |
2224 logging.debug('Running: %s' % cmd) | 2260 logging.debug('Running: %s' % cmd) |
2225 try: | 2261 # Use trace_child_process.py so we have a clear pid owner. Since |
2226 # Use trace_child_process.py so we have a clear pid owner. Since | 2262 # trace_inputs.py can be used as a library and could trace mulitple |
2227 # trace_inputs.py can be used as a library and could trace mulitple | 2263 # processes simultaneously, it makes it more complex if the executable to |
2228 # processes simultaneously, it makes it more complex if the executable | 2264 # be traced is executed directly here. It also solves issues related to |
2229 # to be traced is executed directly here. It also solves issues related | 2265 # logman.exe that needs to be executed to control the kernel trace. |
2230 # to logman.exe that needs to be executed to control the kernel trace. | 2266 child_cmd = [ |
2231 child_cmd = [ | 2267 sys.executable, |
2232 sys.executable, | 2268 self._script, |
2233 os.path.join(BASE_DIR, 'trace_child_process.py'), | 2269 tracename, |
2234 ] | 2270 ] |
2235 child = subprocess.Popen( | 2271 child = subprocess.Popen( |
2236 child_cmd + cmd, | 2272 child_cmd + cmd, |
2237 cwd=cwd, | 2273 cwd=cwd, |
2238 stdin=subprocess.PIPE, | 2274 stdin=subprocess.PIPE, |
2239 stdout=stdout, | 2275 stdout=stdout, |
2240 stderr=stderr) | 2276 stderr=stderr) |
2241 logging.debug('Started child pid: %d' % child.pid) | 2277 logging.debug('Started child pid: %d' % child.pid) |
2242 out = child.communicate()[0] | 2278 out = child.communicate()[0] |
2243 finally: | 2279 # This doesn't mean all the grand-children are done. Sadly, we don't have |
2244 # 3. Stop the log collection. | 2280 # a good way to determine that. |
2245 self._stop_log() | |
2246 | 2281 |
2247 with self._lock: | 2282 with self._lock: |
2248 assert tracename not in (i['trace'] for i in self._traces) | 2283 assert tracename not in (i['trace'] for i in self._traces) |
2249 self._traces.append({ | 2284 self._traces.append({ |
2250 'command': cmd, | 2285 'command': cmd, |
2251 'cwd': cwd, | 2286 'cwd': cwd, |
2252 'pid': child.pid, | 2287 'pid': child.pid, |
2253 'trace': tracename, | 2288 'trace': tracename, |
2254 'output': out, | 2289 'output': out, |
2255 }) | 2290 }) |
2256 | 2291 |
2257 return child.returncode, out | 2292 return child.returncode, out |
2258 | 2293 |
2259 @classmethod | 2294 def close(self, _timeout=None): |
2260 def _start_log(cls, etl): | 2295 """Stops the kernel log collection and converts the traces to text |
2261 """Starts the log collection. | 2296 representation. |
| 2297 """ |
| 2298 with self._lock: |
| 2299 if not self._initialized: |
| 2300 raise TracingFailure( |
| 2301 'Called Tracer.close() on an unitialized object', |
| 2302 None, None, None) |
| 2303 # Save metadata, add 'format' key.. |
| 2304 data = { |
| 2305 'format': 'csv', |
| 2306 'traces': self._traces, |
| 2307 } |
| 2308 write_json(self._logname, data, False) |
2262 | 2309 |
2263 One can get the list of potentially interesting providers with: | 2310 cmd_stop = [ |
2264 "logman query providers | findstr /i file" | 2311 'logman.exe', |
2265 """ | 2312 'stop', |
2266 cmd_start = [ | 2313 'NT Kernel Logger', |
2267 'logman.exe', | 2314 '-ets', # Sends the command directly to the kernel. |
2268 'start', | 2315 ] |
2269 'NT Kernel Logger', | 2316 logging.debug('Running: %s' % cmd_stop) |
2270 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}', | |
2271 # splitio,fileiocompletion,syscall,file,cswitch,img | |
2272 '(process,fileio,thread)', | |
2273 '-o', etl, | |
2274 '-ets', # Send directly to kernel | |
2275 # Values extracted out of thin air. | |
2276 '-bs', '1024', | |
2277 '-nb', '200', '512', | |
2278 ] | |
2279 logging.debug('Running: %s' % cmd_start) | |
2280 try: | |
2281 subprocess.check_call( | 2317 subprocess.check_call( |
2282 cmd_start, | 2318 cmd_stop, |
2283 stdin=subprocess.PIPE, | 2319 stdin=subprocess.PIPE, |
2284 stdout=subprocess.PIPE, | 2320 stdout=subprocess.PIPE, |
2285 stderr=subprocess.STDOUT) | 2321 stderr=subprocess.STDOUT) |
2286 except subprocess.CalledProcessError, e: | 2322 self._initialized = False |
2287 if e.returncode == -2147024891: | |
2288 print >> sys.stderr, 'Please restart with an elevated admin prompt' | |
2289 elif e.returncode == -2144337737: | |
2290 print >> sys.stderr, ( | |
2291 'A kernel trace was already running, stop it and try again') | |
2292 raise | |
2293 | |
2294 @staticmethod | |
2295 def _stop_log(): | |
2296 """Stops the kernel log collection.""" | |
2297 cmd_stop = [ | |
2298 'logman.exe', | |
2299 'stop', | |
2300 'NT Kernel Logger', | |
2301 '-ets', # Sends the command directly to the kernel. | |
2302 ] | |
2303 logging.debug('Running: %s' % cmd_stop) | |
2304 subprocess.check_call( | |
2305 cmd_stop, | |
2306 stdin=subprocess.PIPE, | |
2307 stdout=subprocess.PIPE, | |
2308 stderr=subprocess.STDOUT) | |
2309 | 2323 |
2310 def post_process_log(self): | 2324 def post_process_log(self): |
2311 """Converts the .etl file into .csv then into .json.""" | 2325 """Converts the .etl file into .csv then into .json.""" |
2312 super(LogmanTrace.Tracer, self).post_process_log() | 2326 super(LogmanTrace.Tracer, self).post_process_log() |
2313 logformat = 'csv' | 2327 logformat = 'csv' |
2314 self._convert_log(logformat) | 2328 self._convert_log(logformat) |
2315 | 2329 |
2316 if logformat == 'csv_utf16': | 2330 if logformat == 'csv_utf16': |
2317 def load_file(): | 2331 def load_file(): |
2318 def utf_8_encoder(unicode_csv_data): | 2332 def utf_8_encoder(unicode_csv_data): |
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2480 def parse_log(cls, logname, blacklist): | 2494 def parse_log(cls, logname, blacklist): |
2481 logging.info('parse_log(%s, %s)' % (logname, blacklist)) | 2495 logging.info('parse_log(%s, %s)' % (logname, blacklist)) |
2482 | 2496 |
2483 def blacklist_more(filepath): | 2497 def blacklist_more(filepath): |
2484 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. | 2498 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
2485 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) | 2499 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
2486 | 2500 |
2487 data = read_json(logname) | 2501 data = read_json(logname) |
2488 lines = read_json(logname + '.json') | 2502 lines = read_json(logname + '.json') |
2489 out = [] | 2503 out = [] |
2490 for index, item in enumerate(data['traces']): | 2504 for item in data['traces']: |
2491 print >> sys.stderr, ('%d out of %d' % (index, len(data['traces']))) | |
2492 context = cls.Context(blacklist_more, item['pid']) | 2505 context = cls.Context(blacklist_more, item['pid']) |
2493 for line in lines: | 2506 for line in lines: |
2494 context.on_line(line) | 2507 context.on_line(line) |
2495 out.append( | 2508 out.append( |
2496 { | 2509 { |
2497 'results': context.to_results(), | 2510 'results': context.to_results(), |
2498 'trace': item['trace'], | 2511 'trace': item['trace'], |
2499 'output': item['output'], | 2512 'output': item['output'], |
2500 }) | 2513 }) |
2501 return out | 2514 return out |
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2776 for fn in dir(sys.modules[__name__]) | 2789 for fn in dir(sys.modules[__name__]) |
2777 if fn.startswith('CMD'))) | 2790 if fn.startswith('CMD'))) |
2778 | 2791 |
2779 command = get_command_handler(argv[0] if argv else None) | 2792 command = get_command_handler(argv[0] if argv else None) |
2780 parser = gen_parser(command) | 2793 parser = gen_parser(command) |
2781 return command(parser, argv[1:]) | 2794 return command(parser, argv[1:]) |
2782 | 2795 |
2783 | 2796 |
2784 if __name__ == '__main__': | 2797 if __name__ == '__main__': |
2785 sys.exit(main(sys.argv[1:])) | 2798 sys.exit(main(sys.argv[1:])) |
OLD | NEW |