OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 853 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
864 if not os.path.isdir(f) | 864 if not os.path.isdir(f) |
865 ) | 865 ) |
866 return Results.Process( | 866 return Results.Process( |
867 self.pid, | 867 self.pid, |
868 files, | 868 files, |
869 render_to_string_and_fix_case(self.executable), | 869 render_to_string_and_fix_case(self.executable), |
870 self.command, | 870 self.command, |
871 render_to_string_and_fix_case(self.initial_cwd), | 871 render_to_string_and_fix_case(self.initial_cwd), |
872 children) | 872 children) |
873 | 873 |
874 def add_file(self, filepath): | 874 def add_file(self, filepath, touch_only): |
875 if self.root().blacklist(unicode(filepath)): | 875 if self.root().blacklist(unicode(filepath)): |
876 return | 876 return |
877 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) | 877 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only)) |
878 self.files.add(filepath) | 878 if touch_only: |
| 879 self.only_touched.add(filepath) |
| 880 else: |
| 881 self.files.add(filepath) |
879 | 882 |
880 def __init__(self, blacklist): | 883 def __init__(self, blacklist): |
881 self.blacklist = blacklist | 884 self.blacklist = blacklist |
882 self.processes = {} | 885 self.processes = {} |
883 | 886 |
884 class Tracer(object): | 887 class Tracer(object): |
885 """During it's lifetime, the tracing subsystem is enabled.""" | 888 """During it's lifetime, the tracing subsystem is enabled.""" |
886 def __init__(self, logname): | 889 def __init__(self, logname): |
887 self._logname = logname | 890 self._logname = logname |
888 self._lock = threading.Lock() | 891 self._lock = threading.Lock() |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1006 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---') | 1009 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---') |
1007 # A process didn't handle a signal. Ignore any junk appearing before, | 1010 # A process didn't handle a signal. Ignore any junk appearing before, |
1008 # because the process was forcibly killed so it won't open any new file. | 1011 # because the process was forcibly killed so it won't open any new file. |
1009 RE_KILLED = re.compile(r'^.*\+\+\+ killed by ([A-Z]+) \+\+\+$') | 1012 RE_KILLED = re.compile(r'^.*\+\+\+ killed by ([A-Z]+) \+\+\+$') |
1010 # A call was canceled. Ignore any prefix. | 1013 # A call was canceled. Ignore any prefix. |
1011 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$') | 1014 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$') |
1012 # Happens when strace fails to even get the function name. | 1015 # Happens when strace fails to even get the function name. |
1013 UNNAMED_FUNCTION = '????' | 1016 UNNAMED_FUNCTION = '????' |
1014 | 1017 |
1015 # Arguments parsing. | 1018 # Arguments parsing. |
| 1019 RE_ACCESS = re.compile(r'^\"(.+?)\", R_[A-Z]+$') |
1016 RE_CHDIR = re.compile(r'^\"(.+?)\"$') | 1020 RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
1017 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$') | 1021 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$') |
1018 RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$') | 1022 RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$') |
1019 RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$') | 1023 RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$') |
| 1024 RE_READLINK = re.compile(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$') |
1020 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') | 1025 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
| 1026 RE_STAT = re.compile(r'\"(.+?)\", \{.+?, \.\.\.\}') |
1021 | 1027 |
1022 class RelativePath(object): | 1028 class RelativePath(object): |
1023 """A late-bound relative path.""" | 1029 """A late-bound relative path.""" |
1024 def __init__(self, parent, value): | 1030 def __init__(self, parent, value): |
1025 self.parent = parent | 1031 self.parent = parent |
1026 self.value = value | 1032 self.value = value |
1027 | 1033 |
1028 def render(self): | 1034 def render(self): |
1029 """Returns the current directory this instance is representing. | 1035 """Returns the current directory this instance is representing. |
1030 | 1036 |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1136 raise | 1142 raise |
1137 except (KeyError, NotImplementedError, ValueError), e: | 1143 except (KeyError, NotImplementedError, ValueError), e: |
1138 raise TracingFailure( | 1144 raise TracingFailure( |
1139 'Trace generated a %s exception: %s' % ( | 1145 'Trace generated a %s exception: %s' % ( |
1140 e.__class__.__name__, str(e)), | 1146 e.__class__.__name__, str(e)), |
1141 self.pid, | 1147 self.pid, |
1142 self._line_number, | 1148 self._line_number, |
1143 line, | 1149 line, |
1144 e) | 1150 e) |
1145 | 1151 |
| 1152 def handle_access(self, args, result): |
| 1153 if result.startswith('-1'): |
| 1154 return |
| 1155 match = self.RE_ACCESS.match(args) |
| 1156 self._handle_file(match.group(1), True) |
| 1157 |
1146 def handle_chdir(self, args, result): | 1158 def handle_chdir(self, args, result): |
1147 """Updates cwd.""" | 1159 """Updates cwd.""" |
1148 if not result.startswith('0'): | 1160 if not result.startswith('0'): |
1149 return | 1161 return |
1150 cwd = self.RE_CHDIR.match(args).group(1) | 1162 cwd = self.RE_CHDIR.match(args).group(1) |
1151 self.cwd = self.RelativePath(self, cwd) | 1163 self.cwd = self.RelativePath(self, cwd) |
1152 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd)) | 1164 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd)) |
1153 | 1165 |
1154 def handle_clone(self, _args, result): | 1166 def handle_clone(self, _args, result): |
1155 """Transfers cwd.""" | 1167 """Transfers cwd.""" |
(...skipping 20 matching lines...) Expand all Loading... |
1176 | 1188 |
1177 def handle_execve(self, args, result): | 1189 def handle_execve(self, args, result): |
1178 if result != '0': | 1190 if result != '0': |
1179 return | 1191 return |
1180 match = self.RE_EXECVE.match(args) | 1192 match = self.RE_EXECVE.match(args) |
1181 if not match: | 1193 if not match: |
1182 raise TracingFailure( | 1194 raise TracingFailure( |
1183 'Failed to process execve(%s)' % args, | 1195 'Failed to process execve(%s)' % args, |
1184 None, None, None) | 1196 None, None, None) |
1185 filepath = match.group(1) | 1197 filepath = match.group(1) |
1186 self._handle_file(filepath) | 1198 self._handle_file(filepath, False) |
1187 self.executable = self.RelativePath(self.get_cwd(), filepath) | 1199 self.executable = self.RelativePath(self.get_cwd(), filepath) |
1188 self.command = process_quoted_arguments(match.group(2)) | 1200 self.command = process_quoted_arguments(match.group(2)) |
1189 | 1201 |
1190 def handle_exit_group(self, _args, _result): | 1202 def handle_exit_group(self, _args, _result): |
1191 """Removes cwd.""" | 1203 """Removes cwd.""" |
1192 self.cwd = None | 1204 self.cwd = None |
1193 | 1205 |
1194 @staticmethod | 1206 @staticmethod |
1195 def handle_fork(_args, _result): | 1207 def handle_fork(_args, _result): |
1196 raise NotImplementedError('Unexpected/unimplemented trace fork()') | 1208 raise NotImplementedError('Unexpected/unimplemented trace fork()') |
1197 | 1209 |
| 1210 def handle_getcwd(self, _args, _result): |
| 1211 pass |
| 1212 |
| 1213 def handle_lstat(self, args, result): |
| 1214 if result.startswith('-1'): |
| 1215 return |
| 1216 match = self.RE_STAT.match(args) |
| 1217 self._handle_file(match.group(1), True) |
| 1218 |
1198 def handle_open(self, args, result): | 1219 def handle_open(self, args, result): |
1199 if result.startswith('-1'): | 1220 if result.startswith('-1'): |
1200 return | 1221 return |
1201 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups() | 1222 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups() |
1202 if 'O_DIRECTORY' in args[1]: | 1223 if 'O_DIRECTORY' in args[1]: |
1203 return | 1224 return |
1204 self._handle_file(args[0]) | 1225 self._handle_file(args[0], False) |
| 1226 |
| 1227 def handle_readlink(self, args, result): |
| 1228 if result.startswith('-1'): |
| 1229 return |
| 1230 match = self.RE_READLINK.match(args) |
| 1231 if not match: |
| 1232 raise TracingFailure( |
| 1233 'Failed to parse: readlink(%s) = %s' % (args, result), |
| 1234 None, |
| 1235 None, |
| 1236 None) |
| 1237 self._handle_file(match.group(1), False) |
1205 | 1238 |
1206 def handle_rename(self, args, result): | 1239 def handle_rename(self, args, result): |
1207 if result.startswith('-1'): | 1240 if result.startswith('-1'): |
1208 return | 1241 return |
1209 args = self.RE_RENAME.match(args).groups() | 1242 args = self.RE_RENAME.match(args).groups() |
1210 self._handle_file(args[0]) | 1243 self._handle_file(args[0], False) |
1211 self._handle_file(args[1]) | 1244 self._handle_file(args[1], False) |
| 1245 |
| 1246 def handle_stat(self, args, result): |
| 1247 if result.startswith('-1'): |
| 1248 return |
| 1249 match = self.RE_STAT.match(args) |
| 1250 self._handle_file(match.group(1), True) |
1212 | 1251 |
1213 @staticmethod | 1252 @staticmethod |
1214 def handle_stat64(_args, _result): | 1253 def handle_stat64(_args, _result): |
1215 raise NotImplementedError('Unexpected/unimplemented trace stat64()') | 1254 raise NotImplementedError('Unexpected/unimplemented trace stat64()') |
1216 | 1255 |
1217 @staticmethod | 1256 @staticmethod |
1218 def handle_vfork(_args, _result): | 1257 def handle_vfork(_args, _result): |
1219 raise NotImplementedError('Unexpected/unimplemented trace vfork()') | 1258 raise NotImplementedError('Unexpected/unimplemented trace vfork()') |
1220 | 1259 |
1221 def _handle_file(self, filepath): | 1260 def _handle_file(self, filepath, touch_only): |
1222 filepath = self.RelativePath(self.get_cwd(), filepath) | 1261 filepath = self.RelativePath(self.get_cwd(), filepath) |
1223 self.add_file(filepath) | 1262 #assert not touch_only, unicode(filepath) |
| 1263 self.add_file(filepath, touch_only) |
1224 | 1264 |
1225 def __init__(self, blacklist, initial_cwd): | 1265 def __init__(self, blacklist, initial_cwd): |
1226 super(Strace.Context, self).__init__(blacklist) | 1266 super(Strace.Context, self).__init__(blacklist) |
1227 self.initial_cwd = initial_cwd | 1267 self.initial_cwd = initial_cwd |
1228 | 1268 |
1229 def render(self): | 1269 def render(self): |
1230 """Returns the string value of the initial cwd of the root process. | 1270 """Returns the string value of the initial cwd of the root process. |
1231 | 1271 |
1232 Used by RelativePath. | 1272 Used by RelativePath. |
1233 """ | 1273 """ |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1277 | 1317 |
1278 @classmethod | 1318 @classmethod |
1279 def traces(cls): | 1319 def traces(cls): |
1280 """Returns the list of all handled traces to pass this as an argument to | 1320 """Returns the list of all handled traces to pass this as an argument to |
1281 strace. | 1321 strace. |
1282 """ | 1322 """ |
1283 prefix = 'handle_' | 1323 prefix = 'handle_' |
1284 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] | 1324 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
1285 | 1325 |
1286 class Tracer(ApiBase.Tracer): | 1326 class Tracer(ApiBase.Tracer): |
| 1327 MAX_LEN = 256 |
| 1328 |
1287 def trace(self, cmd, cwd, tracename, output): | 1329 def trace(self, cmd, cwd, tracename, output): |
1288 """Runs strace on an executable.""" | 1330 """Runs strace on an executable.""" |
1289 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) | 1331 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
1290 assert os.path.isabs(cmd[0]), cmd[0] | 1332 assert os.path.isabs(cmd[0]), cmd[0] |
1291 assert os.path.isabs(cwd), cwd | 1333 assert os.path.isabs(cwd), cwd |
1292 assert os.path.normpath(cwd) == cwd, cwd | 1334 assert os.path.normpath(cwd) == cwd, cwd |
1293 with self._lock: | 1335 with self._lock: |
1294 if not self._initialized: | 1336 if not self._initialized: |
1295 raise TracingFailure( | 1337 raise TracingFailure( |
1296 'Called Tracer.trace() on an unitialized object', | 1338 'Called Tracer.trace() on an unitialized object', |
1297 None, None, None, tracename) | 1339 None, None, None, tracename) |
1298 assert tracename not in (i['trace'] for i in self._traces) | 1340 assert tracename not in (i['trace'] for i in self._traces) |
1299 stdout = stderr = None | 1341 stdout = stderr = None |
1300 if output: | 1342 if output: |
1301 stdout = subprocess.PIPE | 1343 stdout = subprocess.PIPE |
1302 stderr = subprocess.STDOUT | 1344 stderr = subprocess.STDOUT |
1303 traces = ','.join(Strace.Context.traces()) | 1345 # Ensure all file related APIs are hooked. |
| 1346 traces = ','.join(Strace.Context.traces() + ['file']) |
1304 trace_cmd = [ | 1347 trace_cmd = [ |
1305 'strace', | 1348 'strace', |
1306 '-ff', | 1349 '-ff', |
1307 '-s', '256', | 1350 '-s', '%d' % self.MAX_LEN, |
1308 '-e', 'trace=%s' % traces, | 1351 '-e', 'trace=%s' % traces, |
1309 '-o', self._logname + '.' + tracename, | 1352 '-o', self._logname + '.' + tracename, |
1310 ] | 1353 ] |
1311 child = subprocess.Popen( | 1354 child = subprocess.Popen( |
1312 trace_cmd + cmd, | 1355 trace_cmd + cmd, |
1313 cwd=cwd, | 1356 cwd=cwd, |
1314 stdin=subprocess.PIPE, | 1357 stdin=subprocess.PIPE, |
1315 stdout=stdout, | 1358 stdout=stdout, |
1316 stderr=stderr) | 1359 stderr=stderr) |
1317 out = child.communicate()[0] | 1360 out = child.communicate()[0] |
(...skipping 258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1576 | 1619 |
1577 def _handle_file(self, pid, filepath): | 1620 def _handle_file(self, pid, filepath): |
1578 if not filepath.startswith('/'): | 1621 if not filepath.startswith('/'): |
1579 filepath = os.path.join(self.processes[pid].cwd, filepath) | 1622 filepath = os.path.join(self.processes[pid].cwd, filepath) |
1580 # We can get '..' in the path. | 1623 # We can get '..' in the path. |
1581 filepath = os.path.normpath(filepath) | 1624 filepath = os.path.normpath(filepath) |
1582 # Sadly, still need to filter out directories here; | 1625 # Sadly, still need to filter out directories here; |
1583 # saw open_nocancel(".", 0, 0) = 0 lines. | 1626 # saw open_nocancel(".", 0, 0) = 0 lines. |
1584 if os.path.isdir(filepath): | 1627 if os.path.isdir(filepath): |
1585 return | 1628 return |
1586 self.processes[pid].add_file(filepath) | 1629 self.processes[pid].add_file(filepath, False) |
1587 | 1630 |
1588 def handle_ftruncate(self, pid, args): | 1631 def handle_ftruncate(self, pid, args): |
1589 """Just used as a signal to kill dtrace, ignoring.""" | 1632 """Just used as a signal to kill dtrace, ignoring.""" |
1590 pass | 1633 pass |
1591 | 1634 |
1592 @staticmethod | 1635 @staticmethod |
1593 def _handle_ignored(pid, args): | 1636 def _handle_ignored(pid, args): |
1594 """Is called for all the event traces that are not handled.""" | 1637 """Is called for all the event traces that are not handled.""" |
1595 raise NotImplementedError('Please implement me') | 1638 raise NotImplementedError('Please implement me') |
1596 | 1639 |
(...skipping 703 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2300 #IRP = self.USER_DATA | 2343 #IRP = self.USER_DATA |
2301 TTID = self.USER_DATA + 1 # Thread ID, that's what we want. | 2344 TTID = self.USER_DATA + 1 # Thread ID, that's what we want. |
2302 FILE_OBJECT = self.USER_DATA + 2 | 2345 FILE_OBJECT = self.USER_DATA + 2 |
2303 #FILE_KEY = self.USER_DATA + 3 | 2346 #FILE_KEY = self.USER_DATA + 3 |
2304 proc = self._thread_to_process(line[TTID]) | 2347 proc = self._thread_to_process(line[TTID]) |
2305 if not proc: | 2348 if not proc: |
2306 # Not a process we care about. | 2349 # Not a process we care about. |
2307 return | 2350 return |
2308 file_object = line[FILE_OBJECT] | 2351 file_object = line[FILE_OBJECT] |
2309 if file_object in proc.file_objects: | 2352 if file_object in proc.file_objects: |
2310 proc.add_file(proc.file_objects.pop(file_object)) | 2353 proc.add_file(proc.file_objects.pop(file_object), False) |
2311 | 2354 |
2312 def handle_FileIo_Create(self, line): | 2355 def handle_FileIo_Create(self, line): |
2313 """Handles a file open. | 2356 """Handles a file open. |
2314 | 2357 |
2315 All FileIo events are described at | 2358 All FileIo events are described at |
2316 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx | 2359 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx |
2317 for some value of 'description'. | 2360 for some value of 'description'. |
2318 | 2361 |
2319 " (..) process and thread id values of the IO events (..) are not valid " | 2362 " (..) process and thread id values of the IO events (..) are not valid " |
2320 http://msdn.microsoft.com/magazine/ee358703.aspx | 2363 http://msdn.microsoft.com/magazine/ee358703.aspx |
(...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3105 return command(argv[1:]) | 3148 return command(argv[1:]) |
3106 except TracingFailure, e: | 3149 except TracingFailure, e: |
3107 sys.stderr.write('\nError: ') | 3150 sys.stderr.write('\nError: ') |
3108 sys.stderr.write(str(e)) | 3151 sys.stderr.write(str(e)) |
3109 sys.stderr.write('\n') | 3152 sys.stderr.write('\n') |
3110 return 1 | 3153 return 1 |
3111 | 3154 |
3112 | 3155 |
3113 if __name__ == '__main__': | 3156 if __name__ == '__main__': |
3114 sys.exit(main(sys.argv[1:])) | 3157 sys.exit(main(sys.argv[1:])) |
OLD | NEW |