OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 11 matching lines...) Expand all Loading... |
22 import csv | 22 import csv |
23 import getpass | 23 import getpass |
24 import glob | 24 import glob |
25 import json | 25 import json |
26 import logging | 26 import logging |
27 import optparse | 27 import optparse |
28 import os | 28 import os |
29 import re | 29 import re |
30 import subprocess | 30 import subprocess |
31 import sys | 31 import sys |
| 32 import tempfile |
32 import threading | 33 import threading |
| 34 import time |
33 import weakref | 35 import weakref |
34 | 36 |
35 ## OS-specific imports | 37 ## OS-specific imports |
36 | 38 |
37 if sys.platform == 'win32': | 39 if sys.platform == 'win32': |
38 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p | 40 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p |
39 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 | 41 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 |
40 from ctypes.wintypes import GetLastError # pylint: disable=E0611 | 42 from ctypes.wintypes import GetLastError # pylint: disable=E0611 |
41 elif sys.platform == 'darwin': | 43 elif sys.platform == 'darwin': |
42 import Carbon.File # pylint: disable=F0401 | 44 import Carbon.File # pylint: disable=F0401 |
(...skipping 1095 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1138 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') | 1140 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
1139 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') | 1141 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') |
1140 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') | 1142 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') |
1141 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') | 1143 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
1142 | 1144 |
1143 O_DIRECTORY = 0x100000 | 1145 O_DIRECTORY = 0x100000 |
1144 | 1146 |
1145 class Process(ApiBase.Context.Process): | 1147 class Process(ApiBase.Context.Process): |
1146 pass | 1148 pass |
1147 | 1149 |
1148 def __init__(self, blacklist): | 1150 def __init__(self, blacklist, tracer_pid, initial_cwd): |
| 1151 logging.info( |
| 1152 '%s(%d, %s)' % (self.__class__.__name__, tracer_pid, initial_cwd)) |
1149 super(Dtrace.Context, self).__init__(blacklist) | 1153 super(Dtrace.Context, self).__init__(blacklist) |
1150 # Process ID of the trace_child_process.py wrapper script instance. | 1154 # Process ID of the trace_child_process.py wrapper script instance. |
1151 self._tracer_pid = None | 1155 self._tracer_pid = tracer_pid |
| 1156 self._initial_cwd = initial_cwd |
1152 # First process to be started by self._tracer_pid. | 1157 # First process to be started by self._tracer_pid. |
1153 self._initial_pid = None | 1158 self._initial_pid = None |
1154 self._initial_cwd = None | |
1155 self._line_number = 0 | 1159 self._line_number = 0 |
1156 | 1160 |
1157 def on_line(self, line): | 1161 def on_line(self, line): |
1158 self._line_number += 1 | 1162 self._line_number += 1 |
1159 match = self.RE_HEADER.match(line) | 1163 match = self.RE_HEADER.match(line) |
1160 if not match: | 1164 if not match: |
1161 raise TracingFailure( | 1165 raise TracingFailure( |
1162 'Found malformed line: %s' % line, | 1166 'Found malformed line: %s' % line, |
1163 None, | 1167 None, |
1164 self._line_number, | 1168 self._line_number, |
(...skipping 30 matching lines...) Expand all Loading... |
1195 if sorted(self.processes) != sorted(p.pid for p in process.all): | 1199 if sorted(self.processes) != sorted(p.pid for p in process.all): |
1196 raise TracingFailure( | 1200 raise TracingFailure( |
1197 'Found internal inconsitency in process lifetime detection', | 1201 'Found internal inconsitency in process lifetime detection', |
1198 None, | 1202 None, |
1199 None, | 1203 None, |
1200 None, | 1204 None, |
1201 sorted(self.processes), | 1205 sorted(self.processes), |
1202 sorted(p.pid for p in process.all)) | 1206 sorted(p.pid for p in process.all)) |
1203 return Results(process) | 1207 return Results(process) |
1204 | 1208 |
1205 def handle_dtrace_BEGIN(self, pid, args): | 1209 def handle_dtrace_BEGIN(self, _pid, args): |
1206 if self._tracer_pid or self._initial_pid: | 1210 if not self.RE_DTRACE_BEGIN.match(args): |
1207 raise TracingFailure( | 1211 raise TracingFailure( |
1208 'Found internal inconsitency in dtrace_BEGIN log line', | 1212 'Found internal inconsitency in dtrace_BEGIN log line', |
1209 None, None, None) | 1213 None, None, None) |
1210 self._tracer_pid = pid | |
1211 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) | |
1212 | 1214 |
1213 def handle_proc_start(self, pid, args): | 1215 def handle_proc_start(self, pid, args): |
1214 """Transfers cwd. | 1216 """Transfers cwd. |
1215 | 1217 |
1216 The dtrace script already takes care of only tracing the processes that | 1218 The dtrace script already takes care of only tracing the processes that |
1217 are child of the traced processes so there is no need to verify the | 1219 are child of the traced processes so there is no need to verify the |
1218 process hierarchy. | 1220 process hierarchy. |
1219 """ | 1221 """ |
1220 if pid in self.processes: | 1222 if pid in self.processes: |
1221 raise TracingFailure( | 1223 raise TracingFailure( |
1222 'Found internal inconsitency in proc_start: %d started two times' % | 1224 'Found internal inconsitency in proc_start: %d started two times' % |
1223 pid, | 1225 pid, |
1224 None, None, None) | 1226 None, None, None) |
1225 match = self.RE_PROC_START.match(args) | 1227 match = self.RE_PROC_START.match(args) |
1226 if not match: | 1228 if not match: |
1227 raise TracingFailure( | 1229 raise TracingFailure( |
1228 'Failed to parse arguments: %s' % args, | 1230 'Failed to parse arguments: %s' % args, |
1229 None, None, None) | 1231 None, None, None) |
1230 ppid = int(match.group(1)) | 1232 ppid = int(match.group(1)) |
1231 if (ppid == self._tracer_pid) == bool(self._initial_pid): | 1233 if ppid == self._tracer_pid and not self._initial_pid: |
1232 raise TracingFailure( | |
1233 ( 'Parent process is _tracer_pid(%d) but _initial_pid(%d) is ' | |
1234 'already set') % (self._tracer_pid, self._initial_pid), | |
1235 None, None, None) | |
1236 if not self._initial_pid: | |
1237 self._initial_pid = pid | 1234 self._initial_pid = pid |
1238 ppid = None | 1235 ppid = None |
1239 cwd = self._initial_cwd | 1236 cwd = self._initial_cwd |
1240 else: | 1237 elif ppid in self.processes: |
1241 parent = self.processes[ppid] | 1238 parent = self.processes[ppid] |
1242 cwd = parent.cwd | 1239 cwd = parent.cwd |
| 1240 else: |
| 1241 # Another process tree, ignore. |
| 1242 return |
1243 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) | 1243 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) |
1244 proc.cwd = cwd | 1244 proc.cwd = cwd |
1245 logging.debug('New child: %s -> %d' % (ppid, pid)) | 1245 logging.debug('New child: %s -> %d cwd:%s' % (ppid, pid, unicode(cwd))) |
1246 | 1246 |
1247 def handle_proc_exit(self, pid, _args): | 1247 def handle_proc_exit(self, pid, _args): |
1248 """Removes cwd.""" | 1248 """Removes cwd.""" |
1249 if pid != self._tracer_pid: | 1249 if pid in self.processes: |
1250 # self._tracer_pid is not traced itself. | 1250 # self._tracer_pid is not traced itself and other traces run neither. |
1251 self.processes[pid].cwd = None | 1251 self.processes[pid].cwd = None |
1252 | 1252 |
1253 def handle_execve(self, pid, args): | 1253 def handle_execve(self, pid, args): |
1254 """Sets the process' executable. | 1254 """Sets the process' executable. |
1255 | 1255 |
1256 TODO(maruel): Read command line arguments. See | 1256 TODO(maruel): Read command line arguments. See |
1257 https://discussions.apple.com/thread/1980539 for an example. | 1257 https://discussions.apple.com/thread/1980539 for an example. |
1258 https://gist.github.com/1242279 | 1258 https://gist.github.com/1242279 |
1259 | 1259 |
1260 Will have to put the answer at http://stackoverflow.com/questions/7556249. | 1260 Will have to put the answer at http://stackoverflow.com/questions/7556249. |
1261 :) | 1261 :) |
1262 """ | 1262 """ |
| 1263 if not pid in self.processes: |
| 1264 # Another process tree, ignore. |
| 1265 return |
1263 match = self.RE_EXECVE.match(args) | 1266 match = self.RE_EXECVE.match(args) |
1264 if not match: | 1267 if not match: |
1265 raise TracingFailure( | 1268 raise TracingFailure( |
1266 'Failed to parse arguments: %s' % args, | 1269 'Failed to parse arguments: %s' % args, |
1267 None, None, None) | 1270 None, None, None) |
1268 proc = self.processes[pid] | 1271 proc = self.processes[pid] |
1269 proc.executable = match.group(1) | 1272 proc.executable = match.group(1) |
1270 proc.command = process_quoted_arguments(match.group(3)) | 1273 proc.command = process_quoted_arguments(match.group(3)) |
1271 if int(match.group(2)) != len(proc.command): | 1274 if int(match.group(2)) != len(proc.command): |
1272 raise TracingFailure( | 1275 raise TracingFailure( |
1273 'Failed to parse execve() arguments: %s' % args, | 1276 'Failed to parse execve() arguments: %s' % args, |
1274 None, None, None) | 1277 None, None, None) |
1275 | 1278 |
1276 def handle_chdir(self, pid, args): | 1279 def handle_chdir(self, pid, args): |
1277 """Updates cwd.""" | 1280 """Updates cwd.""" |
1278 if not self._tracer_pid or pid not in self.processes: | 1281 if pid not in self.processes: |
1279 raise TracingFailure( | 1282 # Another process tree, ignore. |
1280 'Found inconsistency in the log, is it an old log?', | 1283 return |
1281 None, None, None) | |
1282 cwd = self.RE_CHDIR.match(args).group(1) | 1284 cwd = self.RE_CHDIR.match(args).group(1) |
1283 if not cwd.startswith('/'): | 1285 if not cwd.startswith('/'): |
1284 cwd2 = os.path.join(self.processes[pid].cwd, cwd) | 1286 cwd2 = os.path.join(self.processes[pid].cwd, cwd) |
1285 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) | 1287 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
1286 else: | 1288 else: |
1287 logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) | 1289 logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
1288 cwd2 = cwd | 1290 cwd2 = cwd |
1289 self.processes[pid].cwd = cwd2 | 1291 self.processes[pid].cwd = cwd2 |
1290 | 1292 |
1291 def handle_open_nocancel(self, pid, args): | 1293 def handle_open_nocancel(self, pid, args): |
1292 """Redirects to handle_open().""" | 1294 """Redirects to handle_open().""" |
1293 return self.handle_open(pid, args) | 1295 return self.handle_open(pid, args) |
1294 | 1296 |
1295 def handle_open(self, pid, args): | 1297 def handle_open(self, pid, args): |
| 1298 if pid not in self.processes: |
| 1299 # Another process tree, ignore. |
| 1300 return |
1296 match = self.RE_OPEN.match(args) | 1301 match = self.RE_OPEN.match(args) |
1297 if not match: | 1302 if not match: |
1298 raise TracingFailure( | 1303 raise TracingFailure( |
1299 'Failed to parse arguments: %s' % args, | 1304 'Failed to parse arguments: %s' % args, |
1300 None, None, None) | 1305 None, None, None) |
1301 flag = int(match.group(2), 16) | 1306 flag = int(match.group(2), 16) |
1302 if self.O_DIRECTORY & flag == self.O_DIRECTORY: | 1307 if self.O_DIRECTORY & flag == self.O_DIRECTORY: |
1303 # Ignore directories. | 1308 # Ignore directories. |
1304 return | 1309 return |
1305 self._handle_file(pid, match.group(1)) | 1310 self._handle_file(pid, match.group(1)) |
1306 | 1311 |
1307 def handle_rename(self, pid, args): | 1312 def handle_rename(self, pid, args): |
| 1313 if pid not in self.processes: |
| 1314 # Another process tree, ignore. |
| 1315 return |
1308 match = self.RE_RENAME.match(args) | 1316 match = self.RE_RENAME.match(args) |
1309 if not match: | 1317 if not match: |
1310 raise TracingFailure( | 1318 raise TracingFailure( |
1311 'Failed to parse arguments: %s' % args, | 1319 'Failed to parse arguments: %s' % args, |
1312 None, None, None) | 1320 None, None, None) |
1313 self._handle_file(pid, match.group(1)) | 1321 self._handle_file(pid, match.group(1)) |
1314 self._handle_file(pid, match.group(2)) | 1322 self._handle_file(pid, match.group(2)) |
1315 | 1323 |
1316 def _handle_file(self, pid, filepath): | 1324 def _handle_file(self, pid, filepath): |
1317 if not filepath.startswith('/'): | 1325 if not filepath.startswith('/'): |
1318 filepath = os.path.join(self.processes[pid].cwd, filepath) | 1326 filepath = os.path.join(self.processes[pid].cwd, filepath) |
1319 # We can get '..' in the path. | 1327 # We can get '..' in the path. |
1320 filepath = os.path.normpath(filepath) | 1328 filepath = os.path.normpath(filepath) |
1321 # Sadly, still need to filter out directories here; | 1329 # Sadly, still need to filter out directories here; |
1322 # saw open_nocancel(".", 0, 0) = 0 lines. | 1330 # saw open_nocancel(".", 0, 0) = 0 lines. |
1323 if os.path.isdir(filepath): | 1331 if os.path.isdir(filepath): |
1324 return | 1332 return |
1325 self.processes[pid].add_file(filepath) | 1333 self.processes[pid].add_file(filepath) |
1326 | 1334 |
| 1335 def handle_ftruncate(self, pid, args): |
| 1336 """Just used as a signal to kill dtrace, ignoring.""" |
| 1337 pass |
| 1338 |
1327 @staticmethod | 1339 @staticmethod |
1328 def _handle_ignored(pid, args): | 1340 def _handle_ignored(pid, args): |
1329 """Is called for all the event traces that are not handled.""" | 1341 """Is called for all the event traces that are not handled.""" |
1330 raise NotImplementedError('Please implement me') | 1342 raise NotImplementedError('Please implement me') |
1331 | 1343 |
1332 class Tracer(ApiBase.Tracer): | 1344 class Tracer(ApiBase.Tracer): |
1333 # pylint: disable=C0301 | 1345 # pylint: disable=C0301 |
1334 # | 1346 # |
1335 # To understand the following code, you'll want to take a look at: | 1347 # To understand the following code, you'll want to take a look at: |
1336 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref
.html | 1348 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref
.html |
(...skipping 28 matching lines...) Expand all Loading... |
1365 # $ grep syscall probes.txt | less | 1377 # $ grep syscall probes.txt | less |
1366 # or use dtrace directly: | 1378 # or use dtrace directly: |
1367 # $ sudo dtrace -l -P syscall | less | 1379 # $ sudo dtrace -l -P syscall | less |
1368 # | 1380 # |
1369 # trackedpid is an associative array where its value can be 0, 1 or 2. | 1381 # trackedpid is an associative array where its value can be 0, 1 or 2. |
1370 # 0 is for untracked processes and is the default value for items not | 1382 # 0 is for untracked processes and is the default value for items not |
1371 # in the associative array. | 1383 # in the associative array. |
1372 # 1 is for tracked processes. | 1384 # 1 is for tracked processes. |
1373 # 2 is for trace_child_process.py only. It is not tracked itself but | 1385 # 2 is for trace_child_process.py only. It is not tracked itself but |
1374 # all its decendants are. | 1386 # all its decendants are. |
| 1387 # |
| 1388 # The script will kill itself only once waiting_to_die == 1 and |
| 1389 # current_processes == 0, so that both getlogin() was called and that |
| 1390 # all traced processes exited. |
1375 D_CODE = """ | 1391 D_CODE = """ |
1376 dtrace:::BEGIN { | 1392 dtrace:::BEGIN { |
1377 /* Since the child process is already started, initialize | 1393 waiting_to_die = 0; |
1378 current_processes to 1. */ | 1394 current_processes = 0; |
1379 current_processes = 1; | |
1380 logindex = 0; | 1395 logindex = 0; |
1381 trackedpid[TRACER_PID] = 2; | |
1382 printf("%d %d %s_%s(\\"%s\\")\\n", | 1396 printf("%d %d %s_%s(\\"%s\\")\\n", |
1383 logindex, TRACER_PID, probeprov, probename, CWD); | 1397 logindex, PID, probeprov, probename, SCRIPT); |
1384 logindex++; | 1398 logindex++; |
1385 } | 1399 } |
1386 | 1400 |
1387 proc:::start /trackedpid[ppid]/ { | 1401 proc:::start /trackedpid[ppid]/ { |
1388 trackedpid[pid] = 1; | 1402 trackedpid[pid] = 1; |
1389 current_processes += 1; | 1403 current_processes += 1; |
1390 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", | 1404 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
1391 logindex, pid, probeprov, probename, | 1405 logindex, pid, probeprov, probename, |
1392 ppid, | 1406 ppid, |
1393 execname, | 1407 execname, |
1394 current_processes); | 1408 current_processes); |
1395 logindex++; | 1409 logindex++; |
1396 } | 1410 } |
1397 proc:::exit /trackedpid[pid] && current_processes == 1/ { | 1411 /* Should use SCRIPT but there is no access to this variable at that |
1398 /* Last process is exiting. */ | 1412 * point. */ |
| 1413 proc:::start /ppid == PID && execname == "Python"/ { |
| 1414 trackedpid[pid] = 2; |
| 1415 current_processes += 1; |
| 1416 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
| 1417 logindex, pid, probeprov, probename, |
| 1418 ppid, |
| 1419 execname, |
| 1420 current_processes); |
| 1421 logindex++; |
| 1422 } |
| 1423 proc:::exit /trackedpid[pid] && |
| 1424 current_processes == 1 && |
| 1425 waiting_to_die == 1/ { |
1399 trackedpid[pid] = 0; | 1426 trackedpid[pid] = 0; |
1400 current_processes -= 1; | 1427 current_processes -= 1; |
1401 printf("%d %d %s_%s(%d)\\n", | 1428 printf("%d %d %s_%s(%d)\\n", |
1402 logindex, pid, probeprov, probename, | 1429 logindex, pid, probeprov, probename, |
1403 current_processes); | 1430 current_processes); |
1404 logindex++; | 1431 logindex++; |
1405 exit(0); | 1432 exit(0); |
1406 } | 1433 } |
1407 proc:::exit /trackedpid[pid]/ { | 1434 proc:::exit /trackedpid[pid]/ { |
1408 trackedpid[pid] = 0; | 1435 trackedpid[pid] = 0; |
1409 current_processes -= 1; | 1436 current_processes -= 1; |
1410 printf("%d %d %s_%s(%d)\\n", | 1437 printf("%d %d %s_%s(%d)\\n", |
1411 logindex, pid, probeprov, probename, | 1438 logindex, pid, probeprov, probename, |
1412 current_processes); | 1439 current_processes); |
1413 logindex++; | 1440 logindex++; |
1414 } | 1441 } |
1415 | 1442 |
| 1443 /* Use an arcane function to detect when we need to die */ |
| 1444 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ { |
| 1445 waiting_to_die = 1; |
| 1446 printf("%d %d %s()\\n", logindex, pid, probefunc); |
| 1447 logindex++; |
| 1448 } |
| 1449 syscall::ftruncate:entry / |
| 1450 pid == PID && arg0 == FILE_ID && current_processes == 0/ { |
| 1451 exit(0); |
| 1452 } |
| 1453 |
1416 syscall::open*:entry /trackedpid[pid] == 1/ { | 1454 syscall::open*:entry /trackedpid[pid] == 1/ { |
1417 self->open_arg0 = arg0; | 1455 self->open_arg0 = arg0; |
1418 self->open_arg1 = arg1; | 1456 self->open_arg1 = arg1; |
1419 self->open_arg2 = arg2; | 1457 self->open_arg2 = arg2; |
1420 } | 1458 } |
1421 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ { | 1459 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ { |
1422 this->open_arg0 = copyinstr(self->open_arg0); | 1460 this->open_arg0 = copyinstr(self->open_arg0); |
1423 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n", | 1461 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n", |
1424 logindex, pid, probefunc, | 1462 logindex, pid, probefunc, |
1425 this->open_arg0, | 1463 this->open_arg0, |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1622 syscall::removexattr:return, | 1660 syscall::removexattr:return, |
1623 syscall::setxattr:return, | 1661 syscall::setxattr:return, |
1624 syscall::stat64:return, | 1662 syscall::stat64:return, |
1625 syscall::stat:return, | 1663 syscall::stat:return, |
1626 syscall::truncate:return, | 1664 syscall::truncate:return, |
1627 syscall::unlink:return, | 1665 syscall::unlink:return, |
1628 syscall::utimes:return, | 1666 syscall::utimes:return, |
1629 */ | 1667 */ |
1630 """ | 1668 """ |
1631 | 1669 |
1632 @classmethod | 1670 def __init__(self, logname): |
1633 def code(cls, pid, cwd): | 1671 """Starts the log collection with dtrace. |
| 1672 |
| 1673 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so |
| 1674 this needs to wait for dtrace to be "warmed up". |
| 1675 """ |
| 1676 super(Dtrace.Tracer, self).__init__(logname) |
| 1677 self._script = os.path.join(BASE_DIR, 'trace_child_process.py') |
| 1678 # This unique dummy temp file is used to signal the dtrace script that it |
| 1679 # should stop as soon as all the child processes are done. A bit hackish |
| 1680 # but works fine enough. |
| 1681 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp( |
| 1682 prefix='trace_signal_file') |
| 1683 |
| 1684 # Note: do not use the -p flag. It's useless if the initial process quits |
| 1685 # too fast, resulting in missing traces from the grand-children. The D |
| 1686 # code manages the dtrace lifetime itself. |
| 1687 trace_cmd = [ |
| 1688 'sudo', |
| 1689 'dtrace', |
| 1690 '-x', 'dynvarsize=4m', |
| 1691 '-x', 'evaltime=exec', |
| 1692 '-o', '/dev/stderr', |
| 1693 '-q', |
| 1694 '-n', self._get_dtrace_code(), |
| 1695 ] |
| 1696 with open(self._logname + '.log', 'wb') as logfile: |
| 1697 self._dtrace = subprocess.Popen( |
| 1698 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) |
| 1699 logging.debug('Started dtrace pid: %d' % self._dtrace.pid) |
| 1700 |
| 1701 # Reads until one line is printed, which signifies dtrace is up and ready. |
| 1702 with open(self._logname + '.log', 'rb') as logfile: |
| 1703 while 'dtrace_BEGIN' not in logfile.readline(): |
| 1704 if self._dtrace.poll() is not None: |
| 1705 # Do a busy wait. :/ |
| 1706 break |
| 1707 logging.debug('dtrace started') |
| 1708 |
| 1709 def _get_dtrace_code(self): |
1634 """Setups the D code to implement child process tracking. | 1710 """Setups the D code to implement child process tracking. |
1635 | 1711 |
1636 Injects the pid and the initial cwd into the trace header for context. | 1712 Injects the cookie in the script so it knows when to stop. |
1637 The reason is that the child process trace_child_process.py is already | 1713 |
1638 running at that point so: | 1714 The script will detect any trace_child_process.py instance and will start |
1639 - no proc_start() is logged for it. | 1715 tracing it. |
1640 - there is no way to figure out the absolute path of cwd in kernel on OSX | |
1641 """ | 1716 """ |
1642 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') | |
1643 return ( | 1717 return ( |
1644 'inline int TRACER_PID = %d;\n' | 1718 'inline int PID = %d;\n' |
1645 'inline string CWD = "%s";\n' | 1719 'inline string SCRIPT = "%s";\n' |
| 1720 'inline int FILE_ID = %d;\n' |
1646 '\n' | 1721 '\n' |
1647 '%s\n' | 1722 '%s\n' |
1648 '%s') % (pid, cwd, cls.D_CODE, cls.D_CODE_EXECVE) | 1723 '%s') % ( |
| 1724 os.getpid(), |
| 1725 self._script, |
| 1726 self._dummy_file_id, |
| 1727 self.D_CODE, |
| 1728 self.D_CODE_EXECVE) |
1649 | 1729 |
1650 def trace(self, cmd, cwd, tracename, output): | 1730 def trace(self, cmd, cwd, tracename, output): |
1651 """Runs dtrace on an executable. | 1731 """Runs dtrace on an executable. |
1652 | 1732 |
1653 This dtruss is broken when it starts the process itself or when tracing | 1733 This dtruss is broken when it starts the process itself or when tracing |
1654 child processes, this code starts a wrapper process | 1734 child processes, this code starts a wrapper process |
1655 trace_child_process.py, which waits for dtrace to start, then | 1735 trace_child_process.py, which waits for dtrace to start, then |
1656 trace_child_process.py starts the executable to trace. | 1736 trace_child_process.py starts the executable to trace. |
1657 """ | 1737 """ |
1658 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) | 1738 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
1659 logging.info('Running: %s' % cmd) | 1739 logging.info('Running: %s' % cmd) |
1660 signal = 'Go!' | |
1661 logging.debug('Our pid: %d' % os.getpid()) | 1740 logging.debug('Our pid: %d' % os.getpid()) |
1662 with self._lock: | 1741 with self._lock: |
1663 if not self._initialized: | 1742 if not self._initialized: |
1664 raise TracingFailure( | 1743 raise TracingFailure( |
1665 'Called Tracer.trace() on an unitialized object', | 1744 'Called Tracer.trace() on an unitialized object', |
1666 None, None, None, tracename) | 1745 None, None, None, tracename) |
1667 assert tracename not in (i['trace'] for i in self._traces) | 1746 assert tracename not in (i['trace'] for i in self._traces) |
1668 | 1747 |
1669 # Part 1: start the child process. | 1748 # Starts the script wrapper to start the child process. This signals the |
| 1749 # dtrace script that this process is to be traced. |
1670 stdout = stderr = None | 1750 stdout = stderr = None |
1671 if output: | 1751 if output: |
1672 stdout = subprocess.PIPE | 1752 stdout = subprocess.PIPE |
1673 stderr = subprocess.STDOUT | 1753 stderr = subprocess.STDOUT |
1674 child_cmd = [ | 1754 child_cmd = [ |
1675 sys.executable, | 1755 sys.executable, |
1676 os.path.join(BASE_DIR, 'trace_child_process.py'), | 1756 self._script, |
1677 '--wait', | 1757 tracename, |
1678 ] | 1758 ] |
| 1759 # Call a dummy function so that dtrace knows I'm about to launch a process |
| 1760 # that needs to be traced. |
| 1761 # Yummy. |
1679 child = subprocess.Popen( | 1762 child = subprocess.Popen( |
1680 child_cmd + cmd, | 1763 child_cmd + cmd, |
1681 stdin=subprocess.PIPE, | 1764 stdin=subprocess.PIPE, |
1682 stdout=stdout, | 1765 stdout=stdout, |
1683 stderr=stderr, | 1766 stderr=stderr, |
1684 cwd=cwd) | 1767 cwd=cwd) |
1685 logging.debug('Started child pid: %d' % child.pid) | 1768 logging.debug('Started child pid: %d' % child.pid) |
1686 | 1769 |
1687 # Part 2: start dtrace process. | 1770 out = child.communicate()[0] |
1688 # Note: do not use the -p flag. It's useless if the initial process quits | 1771 # This doesn't mean tracing is done, one of the grand-child process may |
1689 # too fast, resulting in missing traces from the grand-children. The D | 1772 # still be alive. It will be tracked with the dtrace script. |
1690 # code manages the dtrace lifetime itself. | |
1691 trace_cmd = [ | |
1692 'sudo', | |
1693 'dtrace', | |
1694 '-x', 'dynvarsize=4m', | |
1695 '-x', 'evaltime=exec', | |
1696 '-n', self.code(child.pid, cwd), | |
1697 '-o', '/dev/stderr', | |
1698 '-q', | |
1699 ] | |
1700 logname = self._logname + '.log' | |
1701 with open(logname, 'w') as logfile: | |
1702 dtrace = subprocess.Popen( | |
1703 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) | |
1704 logging.debug('Started dtrace pid: %d' % dtrace.pid) | |
1705 | |
1706 # Part 3: Read until one line is printed, which signifies dtrace is up and | |
1707 # ready. | |
1708 with open(logname, 'r') as logfile: | |
1709 while 'dtrace_BEGIN' not in logfile.readline(): | |
1710 if dtrace.poll() is not None: | |
1711 break | |
1712 | |
1713 # Part 4: We can now tell our child to go. | |
1714 # TODO(maruel): Another pipe than stdin could be used instead. This | |
1715 # would be more consistent with the other tracing methods. | |
1716 out = child.communicate(signal)[0] | |
1717 | |
1718 dtrace.wait() | |
1719 if dtrace.returncode != 0: | |
1720 print 'dtrace failure: %d' % dtrace.returncode | |
1721 with open(logname) as logfile: | |
1722 print ''.join(logfile.readlines()[-100:]) | |
1723 # Find a better way. | |
1724 os.remove(logname) | |
1725 | 1773 |
1726 with self._lock: | 1774 with self._lock: |
1727 assert tracename not in (i['trace'] for i in self._traces) | 1775 assert tracename not in (i['trace'] for i in self._traces) |
1728 self._traces.append( | 1776 self._traces.append( |
1729 { | 1777 { |
1730 'cmd': cmd, | 1778 'cmd': cmd, |
1731 'cwd': cwd, | 1779 'cwd': cwd, |
1732 # The pid of strace process, not very useful. | 1780 # The pid of strace process, not very useful. |
1733 'pid': child.pid, | 1781 'pid': child.pid, |
1734 'trace': tracename, | 1782 'trace': tracename, |
1735 'output': out, | 1783 'output': out, |
1736 }) | 1784 }) |
1737 return dtrace.returncode or child.returncode, out | 1785 return child.returncode, out |
| 1786 |
| 1787 def close(self, timeout=None): |
| 1788 """Terminates dtrace.""" |
| 1789 logging.debug('close(%s)' % timeout) |
| 1790 try: |
| 1791 try: |
| 1792 super(Dtrace.Tracer, self).close(timeout) |
| 1793 # Signal dtrace that it should stop now. |
| 1794 os.ftruncate(self._dummy_file_id, 0) |
| 1795 if timeout: |
| 1796 start = time.time() |
| 1797 # Use polling. :/ |
| 1798 while (self._dtrace.poll() is None and |
| 1799 (time.time() - start) < timeout): |
| 1800 time.sleep(0.1) |
| 1801 self._dtrace.kill() |
| 1802 self._dtrace.wait() |
| 1803 finally: |
| 1804 # Make sure to kill it in any case. |
| 1805 if self._dtrace.poll() is None: |
| 1806 try: |
| 1807 self._dtrace.kill() |
| 1808 self._dtrace.wait() |
| 1809 except OSError: |
| 1810 pass |
| 1811 |
| 1812 if self._dtrace.returncode != 0: |
| 1813 # Warn about any dtrace failure but basically ignore it. |
| 1814 print 'dtrace failure: %s' % self._dtrace.returncode |
| 1815 finally: |
| 1816 os.close(self._dummy_file_id) |
| 1817 os.remove(self._dummy_file_name) |
1738 | 1818 |
1739 def post_process_log(self): | 1819 def post_process_log(self): |
1740 """Sorts the log back in order when each call occured. | 1820 """Sorts the log back in order when each call occured. |
1741 | 1821 |
1742 dtrace doesn't save the buffer in strict order since it keeps one buffer | 1822 dtrace doesn't save the buffer in strict order since it keeps one buffer |
1743 per CPU. | 1823 per CPU. |
1744 """ | 1824 """ |
1745 super(Dtrace.Tracer, self).post_process_log() | 1825 super(Dtrace.Tracer, self).post_process_log() |
1746 logname = self._logname + '.log' | 1826 logname = self._logname + '.log' |
1747 with open(logname, 'rb') as logfile: | 1827 with open(logname, 'rb') as logfile: |
(...skipping 18 matching lines...) Expand all Loading... |
1766 for ext in ('', '.log'): | 1846 for ext in ('', '.log'): |
1767 if os.path.isfile(logname + ext): | 1847 if os.path.isfile(logname + ext): |
1768 os.remove(logname + ext) | 1848 os.remove(logname + ext) |
1769 | 1849 |
1770 @classmethod | 1850 @classmethod |
1771 def parse_log(cls, logname, blacklist): | 1851 def parse_log(cls, logname, blacklist): |
1772 logging.info('parse_log(%s, %s)' % (logname, blacklist)) | 1852 logging.info('parse_log(%s, %s)' % (logname, blacklist)) |
1773 data = read_json(logname) | 1853 data = read_json(logname) |
1774 out = [] | 1854 out = [] |
1775 for item in data['traces']: | 1855 for item in data['traces']: |
1776 context = cls.Context(blacklist) | 1856 context = cls.Context(blacklist, item['pid'], item['cwd']) |
1777 for line in open(logname + '.log', 'rb'): | 1857 for line in open(logname + '.log', 'rb'): |
1778 context.on_line(line) | 1858 context.on_line(line) |
1779 out.append( | 1859 out.append( |
1780 { | 1860 { |
1781 'results': context.to_results(), | 1861 'results': context.to_results(), |
1782 'trace': item['trace'], | 1862 'trace': item['trace'], |
1783 'output': item['output'], | 1863 'output': item['output'], |
1784 }) | 1864 }) |
1785 return out | 1865 return out |
1786 | 1866 |
(...skipping 909 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2696 for fn in dir(sys.modules[__name__]) | 2776 for fn in dir(sys.modules[__name__]) |
2697 if fn.startswith('CMD'))) | 2777 if fn.startswith('CMD'))) |
2698 | 2778 |
2699 command = get_command_handler(argv[0] if argv else None) | 2779 command = get_command_handler(argv[0] if argv else None) |
2700 parser = gen_parser(command) | 2780 parser = gen_parser(command) |
2701 return command(parser, argv[1:]) | 2781 return command(parser, argv[1:]) |
2702 | 2782 |
2703 | 2783 |
2704 if __name__ == '__main__': | 2784 if __name__ == '__main__': |
2705 sys.exit(main(sys.argv[1:])) | 2785 sys.exit(main(sys.argv[1:])) |
OLD | NEW |