OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
11 interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise. | 11 interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise. |
12 The OS-specific implementation is hidden in an 'API' interface. | 12 The OS-specific implementation is hidden in an 'API' interface. |
13 | 13 |
14 The list is done in two phases, the first is to do the actual trace and generate | 14 The results are embedded in a Results instance. The tracing is done in two |
15 an technique-specific log file. Then the log file is parsed to extract the | 15 phases, the first is to do the actual trace and generate an |
16 implementation-specific log file. Then the log file is parsed to extract the | |
16 information, including the individual child processes and the files accessed | 17 information, including the individual child processes and the files accessed |
17 from the log. | 18 from the log. |
18 """ | 19 """ |
19 | 20 |
20 import codecs | 21 import codecs |
21 import csv | 22 import csv |
22 import glob | 23 import glob |
23 import json | 24 import json |
24 import logging | 25 import logging |
25 import optparse | 26 import optparse |
(...skipping 944 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
970 """Uses DTrace framework through dtrace. Requires root access. | 971 """Uses DTrace framework through dtrace. Requires root access. |
971 | 972 |
972 Implies Mac OSX. | 973 Implies Mac OSX. |
973 | 974 |
974 dtruss can't be used because it has compatibility issues with python. | 975 dtruss can't be used because it has compatibility issues with python. |
975 | 976 |
976 Also, the pid->cwd handling needs to be done manually since OSX has no way to | 977 Also, the pid->cwd handling needs to be done manually since OSX has no way to |
977 get the absolute path of the 'cwd' dtrace variable from the probe. | 978 get the absolute path of the 'cwd' dtrace variable from the probe. |
978 | 979 |
979 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see | 980 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see |
980 https://discussions.apple.com/thread/1980539. | 981 https://discussions.apple.com/thread/1980539. So resort to handling execve() |
982 manually. | |
983 | |
984 errno is not printed in the log since this implementation currently care only | |
MAD
2012/06/28 13:04:57
care only -> only cares
| |
985 about files that were successfully opened. | |
981 """ | 986 """ |
982 IGNORED = ( | 987 IGNORED = ( |
983 '/.vol', | 988 '/.vol', |
984 '/Library', | 989 '/Library', |
985 '/System', | 990 '/System', |
986 '/dev', | 991 '/dev', |
987 '/etc', | 992 '/etc', |
988 '/private/var', | 993 '/private/var', |
989 '/tmp', | 994 '/tmp', |
990 '/usr', | 995 '/usr', |
991 '/var', | 996 '/var', |
992 ) | 997 ) |
993 | 998 |
994 # pylint: disable=C0301 | 999 # pylint: disable=C0301 |
1000 # | |
995 # To understand the following code, you'll want to take a look at: | 1001 # To understand the following code, you'll want to take a look at: |
996 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml | 1002 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml |
997 # https://wikis.oracle.com/display/DTrace/Variables | 1003 # https://wikis.oracle.com/display/DTrace/Variables |
998 # http://docs.oracle.com/cd/E19205-01/820-4221/ | 1004 # http://docs.oracle.com/cd/E19205-01/820-4221/ |
999 # | 1005 # |
1000 # The list of valid probes can be retrieved with: | 1006 # 0. Dump all the valid probes into a text file. It is important, you |
1001 # sudo dtrace -l -P syscall | less | 1007 # want to redirect into a file and you don't want to constantly 'sudo'. |
1008 # $ sudo dtrace -l > probes.txt | |
1009 # | |
1010 # 1. Count the number of probes: | |
1011 # $ wc -l probes.txt | |
1012 # 81823 # On OSX 10.7, including 1 header line. | |
1013 # | |
1014 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the | |
1015 # likes and skipping the header with NR>1: | |
1016 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]' | |
1017 # dtrace | |
1018 # fbt | |
1019 # io | |
1020 # ip | |
1021 # lockstat | |
1022 # mach_trap | |
1023 # proc | |
1024 # profile | |
1025 # sched | |
1026 # syscall | |
1027 # tcp | |
1028 # vminfo | |
1029 # | |
1030 # 3. List of valid probes: | |
1031 # $ grep syscall probes.txt | less | |
1032 # or use dtrace directly: | |
1033 # $ sudo dtrace -l -P syscall | less | |
1034 | |
1002 D_CODE = """ | 1035 D_CODE = """ |
1003 proc:::start /trackedpid[ppid]/ { | 1036 proc:::start /trackedpid[ppid]/ { |
1004 trackedpid[pid] = 1; | 1037 trackedpid[pid] = 1; |
1005 current_processes += 1; | 1038 current_processes += 1; |
1006 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", | 1039 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
1007 logindex, ppid, pid, probeprov, probename, execname, | 1040 logindex, pid, probeprov, probename, |
1041 ppid, | |
1042 execname, | |
1008 current_processes); | 1043 current_processes); |
1009 logindex++; | 1044 logindex++; |
1010 } | 1045 } |
1011 proc:::exit /trackedpid[pid] && current_processes == 1/ { | 1046 proc:::exit /trackedpid[pid] && current_processes == 1/ { |
1047 /* Last process is exiting. */ | |
1012 trackedpid[pid] = 0; | 1048 trackedpid[pid] = 0; |
1013 current_processes -= 1; | 1049 current_processes -= 1; |
1014 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", | 1050 printf("%d %d %s_%s(%d)\\n", |
1015 logindex, ppid, pid, probeprov, probename, execname, | 1051 logindex, pid, probeprov, probename, |
1016 current_processes); | 1052 current_processes); |
1017 logindex++; | 1053 logindex++; |
1018 exit(0); | 1054 exit(0); |
1019 } | 1055 } |
1020 proc:::exit /trackedpid[pid]/ { | 1056 proc:::exit /trackedpid[pid]/ { |
1021 trackedpid[pid] = 0; | 1057 trackedpid[pid] = 0; |
1022 current_processes -= 1; | 1058 current_processes -= 1; |
1023 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", | 1059 printf("%d %d %s_%s(%d)\\n", |
1024 logindex, ppid, pid, probeprov, probename, execname, | 1060 logindex, pid, probeprov, probename, |
1025 current_processes); | 1061 current_processes); |
1026 logindex++; | 1062 logindex++; |
1027 } | 1063 } |
1028 | 1064 |
1029 /* Finally what we care about! */ | 1065 /* Finally what we care about! */ |
1030 syscall::exec*:entry /trackedpid[pid]/ { | 1066 syscall::exec*:entry /trackedpid[pid] == 1/ { |
1031 self->e_arg0 = copyinstr(arg0); | 1067 self->e_arg0 = copyinstr(arg0); |
1032 /* Incrementally probe for a NULL in the argv parameter of execve() to | 1068 /* Incrementally probe for a NULL in the argv parameter of execve() to |
1033 * figure out argc. */ | 1069 * figure out argc. */ |
1070 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was | |
1071 * found. */ | |
1034 self->argc = 0; | 1072 self->argc = 0; |
1035 self->argv = (user_addr_t*)copyin( | 1073 self->argv = (user_addr_t*)copyin( |
1036 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1074 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1037 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1075 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1038 self->argv = (user_addr_t*)copyin( | 1076 self->argv = (user_addr_t*)copyin( |
1039 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1077 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1040 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1078 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1041 self->argv = (user_addr_t*)copyin( | 1079 self->argv = (user_addr_t*)copyin( |
1042 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1080 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1043 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1081 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1044 self->argv = (user_addr_t*)copyin( | 1082 self->argv = (user_addr_t*)copyin( |
1045 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1083 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1046 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1084 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1047 self->argv = (user_addr_t*)copyin( | 1085 self->argv = (user_addr_t*)copyin( |
1048 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1086 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1049 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1087 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1050 self->argv = (user_addr_t*)copyin( | 1088 self->argv = (user_addr_t*)copyin( |
1051 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1089 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1052 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | 1090 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
1053 self->argv = (user_addr_t*)copyin( | 1091 self->argv = (user_addr_t*)copyin( |
1054 arg1, sizeof(user_addr_t) * (self->argc + 1)); | 1092 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
1093 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | |
1094 self->argv = (user_addr_t*)copyin( | |
1095 arg1, sizeof(user_addr_t) * (self->argc + 1)); | |
1096 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | |
1097 self->argv = (user_addr_t*)copyin( | |
1098 arg1, sizeof(user_addr_t) * (self->argc + 1)); | |
MAD
2012/06/28 13:04:57
I guess you can't use a loop for this... A bit sad
| |
1099 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; | |
1100 self->argv = (user_addr_t*)copyin( | |
1101 arg1, sizeof(user_addr_t) * (self->argc + 1)); | |
1055 | 1102 |
1056 /* Copy the inputs strings since there is no guarantee they'll be | 1103 /* Copy the inputs strings since there is no guarantee they'll be |
1057 * present after the call completed. */ | 1104 * present after the call completed. */ |
1058 self->args[0] = (self->argc > 0) ? copyinstr(self->argv[0]) : ""; | 1105 self->args[0] = (self->argc > 0) ? copyinstr(self->argv[0]) : ""; |
1059 self->args[1] = (self->argc > 1) ? copyinstr(self->argv[1]) : ""; | 1106 self->args[1] = (self->argc > 1) ? copyinstr(self->argv[1]) : ""; |
1060 self->args[2] = (self->argc > 2) ? copyinstr(self->argv[2]) : ""; | 1107 self->args[2] = (self->argc > 2) ? copyinstr(self->argv[2]) : ""; |
1061 self->args[3] = (self->argc > 3) ? copyinstr(self->argv[3]) : ""; | 1108 self->args[3] = (self->argc > 3) ? copyinstr(self->argv[3]) : ""; |
1062 self->args[4] = (self->argc > 4) ? copyinstr(self->argv[4]) : ""; | 1109 self->args[4] = (self->argc > 4) ? copyinstr(self->argv[4]) : ""; |
1063 self->args[5] = (self->argc > 5) ? copyinstr(self->argv[5]) : ""; | 1110 self->args[5] = (self->argc > 5) ? copyinstr(self->argv[5]) : ""; |
1064 self->args[6] = (self->argc > 6) ? copyinstr(self->argv[6]) : ""; | 1111 self->args[6] = (self->argc > 6) ? copyinstr(self->argv[6]) : ""; |
1065 self->args[7] = (self->argc > 7) ? copyinstr(self->argv[7]) : ""; | 1112 self->args[7] = (self->argc > 7) ? copyinstr(self->argv[7]) : ""; |
1066 self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : ""; | 1113 self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : ""; |
1067 self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : ""; | 1114 self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : ""; |
1068 } | 1115 } |
1069 syscall::exec*: /trackedpid[pid] && errno == 0/ { | 1116 syscall::exec*:return /trackedpid[pid] == 1 && errno == 0/ { |
1070 /* We need to join strings here, as using multiple printf() would cause | 1117 /* We need to join strings here, as using multiple printf() would cause |
1071 * tearing when multiple threads/processes are traced. */ | 1118 * tearing when multiple threads/processes are traced. */ |
1072 this->args = ""; | 1119 this->args = ""; |
1073 this->args = strjoin(this->args, (self->argc > 0) ? ", \\"" : ""); | 1120 this->args = strjoin(this->args, (self->argc > 0) ? ", \\"" : ""); |
1074 this->args = strjoin(this->args, (self->argc > 0) ? self->args[0] : ""); | 1121 this->args = strjoin(this->args, (self->argc > 0) ? self->args[0] : ""); |
1075 this->args = strjoin(this->args, (self->argc > 0) ? "\\"" : ""); | 1122 this->args = strjoin(this->args, (self->argc > 0) ? "\\"" : ""); |
1076 | 1123 |
1077 this->args = strjoin(this->args, (self->argc > 1) ? ", \\"" : ""); | 1124 this->args = strjoin(this->args, (self->argc > 1) ? ", \\"" : ""); |
1078 this->args = strjoin(this->args, (self->argc > 1) ? self->args[1] : ""); | 1125 this->args = strjoin(this->args, (self->argc > 1) ? self->args[1] : ""); |
1079 this->args = strjoin(this->args, (self->argc > 1) ? "\\"" : ""); | 1126 this->args = strjoin(this->args, (self->argc > 1) ? "\\"" : ""); |
(...skipping 25 matching lines...) Expand all Loading... | |
1105 this->args = strjoin(this->args, (self->argc > 8) ? ", \\"" : ""); | 1152 this->args = strjoin(this->args, (self->argc > 8) ? ", \\"" : ""); |
1106 this->args = strjoin(this->args, (self->argc > 8) ? self->args[8] : ""); | 1153 this->args = strjoin(this->args, (self->argc > 8) ? self->args[8] : ""); |
1107 this->args = strjoin(this->args, (self->argc > 8) ? "\\"" : ""); | 1154 this->args = strjoin(this->args, (self->argc > 8) ? "\\"" : ""); |
1108 | 1155 |
1109 this->args = strjoin(this->args, (self->argc > 9) ? ", \\"" : ""); | 1156 this->args = strjoin(this->args, (self->argc > 9) ? ", \\"" : ""); |
1110 this->args = strjoin(this->args, (self->argc > 9) ? self->args[9]: ""); | 1157 this->args = strjoin(this->args, (self->argc > 9) ? self->args[9]: ""); |
1111 this->args = strjoin(this->args, (self->argc > 9) ? "\\"" : ""); | 1158 this->args = strjoin(this->args, (self->argc > 9) ? "\\"" : ""); |
1112 | 1159 |
1113 /* Prints self->argc to permits verifying the internal consistency since | 1160 /* Prints self->argc to permits verifying the internal consistency since |
1114 * this code is quite fishy. */ | 1161 * this code is quite fishy. */ |
1115 printf("%d %d:%d %s(\\"%s\\", [%d%s]) = %d\\n", | 1162 printf("%d %d %s(\\"%s\\", [%d%s])\\n", |
1116 logindex, ppid, pid, probefunc, | 1163 logindex, pid, probefunc, |
1117 self->e_arg0, | 1164 self->e_arg0, |
1118 self->argc, | 1165 self->argc, |
1119 this->args, | 1166 this->args); |
1120 errno); | |
1121 logindex++; | 1167 logindex++; |
1122 | 1168 } |
1123 /* TODO(maruel): Clean up memory | 1169 syscall::exec*:return /trackedpid[pid] == 1/ { |
1124 self->e_arg0 = 0; | 1170 self->e_arg0 = 0; |
1125 self->argc = 0; | 1171 self->argc = 0; |
1126 self->args[0] = 0; | 1172 self->args[0] = 0; |
1127 self->args[1] = 0; | 1173 self->args[1] = 0; |
1128 self->args[2] = 0; | 1174 self->args[2] = 0; |
1129 self->args[3] = 0; | 1175 self->args[3] = 0; |
1130 self->args[4] = 0; | 1176 self->args[4] = 0; |
1131 self->args[5] = 0; | 1177 self->args[5] = 0; |
1132 self->args[6] = 0; | 1178 self->args[6] = 0; |
1133 self->args[7] = 0; | 1179 self->args[7] = 0; |
1134 self->args[8] = 0; | 1180 self->args[8] = 0; |
1135 self->args[9] = 0; | 1181 self->args[9] = 0; |
1136 */ | |
1137 } | 1182 } |
1138 | 1183 |
1139 syscall::open*:entry /trackedpid[pid]/ { | 1184 /* These are a good learning experience, since it traces a lot of things |
MAD
2012/06/28 13:04:57
These are -> This is
| |
1140 self->arg0 = arg0; | 1185 * related to the process and child processes. |
1141 self->arg1 = arg1; | 1186 * Warning: it generates a gigantic log. For example, tracing |
1142 self->arg2 = arg2; | 1187 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes |
1188 * several minutes to execute. | |
1189 */ | |
1190 /* | |
1191 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { | |
1192 printf("%d %d %s_%s() = %d\\n", | |
1193 logindex, pid, probeprov, probefunc, errno); | |
1194 logindex++; | |
1143 } | 1195 } |
1144 syscall::open*:return /trackedpid[pid] && errno == 0/ { | 1196 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
1145 printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n", | 1197 printf("%d %d %s_%s() = %d\\n", |
1146 logindex, ppid, pid, probefunc, copyinstr(self->arg0), | 1198 logindex, pid, probeprov, probefunc, errno); |
1147 self->arg1, self->arg2, errno); | |
1148 logindex++; | 1199 logindex++; |
1149 self->arg0 = 0; | 1200 } |
1150 self->arg1 = 0; | 1201 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
1151 self->arg2 = 0; | 1202 printf("%d %d %s_%s() = %d\\n", |
1203 logindex, pid, probeprov, probefunc, errno); | |
1204 logindex++; | |
1205 } | |
1206 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { | |
1207 printf("%d %d %s_%s() = %d\\n", | |
1208 logindex, pid, probeprov, probefunc, errno); | |
1209 logindex++; | |
1210 } | |
1211 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { | |
1212 printf("%d %d %s_%s() = %d\\n", | |
1213 logindex, pid, probeprov, probefunc, errno); | |
1214 logindex++; | |
1215 } | |
1216 */ | |
1217 | |
1218 syscall::open*:entry /trackedpid[pid] == 1/ { | |
1219 self->open_arg0 = arg0; | |
1220 self->open_arg1 = arg1; | |
1221 self->open_arg2 = arg2; | |
1222 } | |
1223 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ { | |
1224 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n", | |
1225 logindex, pid, probefunc, | |
1226 copyinstr(self->open_arg0), | |
1227 self->open_arg1, | |
1228 self->open_arg2); | |
1229 logindex++; | |
1230 } | |
1231 syscall::open*:return /trackedpid[pid] == 1/ { | |
1232 self->open_arg0 = 0; | |
1233 self->open_arg1 = 0; | |
1234 self->open_arg2 = 0; | |
1152 } | 1235 } |
1153 | 1236 |
1154 syscall::rename:entry /trackedpid[pid]/ { | 1237 syscall::rename:entry /trackedpid[pid] == 1/ { |
1155 self->arg0 = arg0; | 1238 self->rename_arg0 = arg0; |
1156 self->arg1 = arg1; | 1239 self->rename_arg1 = arg1; |
1157 } | 1240 } |
1158 syscall::rename:return /trackedpid[pid]/ { | 1241 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ { |
1159 printf("%d %d:%d %s(\\"%s\\", \\"%s\\") = %d\\n", | 1242 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n", |
1160 logindex, ppid, pid, probefunc, copyinstr(self->arg0), | 1243 logindex, pid, probefunc, |
1161 copyinstr(self->arg1), errno); | 1244 copyinstr(self->rename_arg0), |
1245 copyinstr(self->rename_arg1)); | |
1162 logindex++; | 1246 logindex++; |
1163 self->arg0 = 0; | 1247 } |
1164 self->arg1 = 0; | 1248 syscall::rename:return /trackedpid[pid] == 1/ { |
1249 self->rename_arg0 = 0; | |
1250 self->rename_arg1 = 0; | |
1165 } | 1251 } |
1166 | 1252 |
1167 /* Track chdir, it's painful because it is only receiving relative path */ | 1253 /* Track chdir, it's painful because it is only receiving relative path */ |
1168 syscall::chdir:entry /trackedpid[pid]/ { | 1254 syscall::chdir:entry /trackedpid[pid] == 1/ { |
1169 self->arg0 = arg0; | 1255 self->chdir_arg0 = arg0; |
1170 } | 1256 } |
1171 syscall::chdir:return /trackedpid[pid] && errno == 0/ { | 1257 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ { |
1172 printf("%d %d:%d %s(\\"%s\\") = %d\\n", | 1258 printf("%d %d %s(\\"%s\\")\\n", |
1173 logindex, ppid, pid, probefunc, copyinstr(self->arg0), errno); | 1259 logindex, pid, probefunc, |
1260 copyinstr(self->chdir_arg0)); | |
1174 logindex++; | 1261 logindex++; |
1175 self->arg0 = 0; | 1262 } |
1263 syscall::chdir:return /trackedpid[pid] == 1/ { | |
1264 self->chdir_arg0 = 0; | |
1176 } | 1265 } |
1177 /* TODO(maruel): *stat* functions and friends | 1266 /* TODO(maruel): *stat* functions and friends |
1178 syscall::access:return, | 1267 syscall::access:return, |
1179 syscall::chdir:return, | 1268 syscall::chdir:return, |
1180 syscall::chflags:return, | 1269 syscall::chflags:return, |
1181 syscall::chown:return, | 1270 syscall::chown:return, |
1182 syscall::chroot:return, | 1271 syscall::chroot:return, |
1183 syscall::getattrlist:return, | 1272 syscall::getattrlist:return, |
1184 syscall::getxattr:return, | 1273 syscall::getxattr:return, |
1185 syscall::lchown:return, | 1274 syscall::lchown:return, |
(...skipping 10 matching lines...) Expand all Loading... | |
1196 syscall::unlink:return, | 1285 syscall::unlink:return, |
1197 syscall::utimes:return, | 1286 syscall::utimes:return, |
1198 */ | 1287 */ |
1199 """ | 1288 """ |
1200 | 1289 |
1201 @classmethod | 1290 @classmethod |
1202 def code(cls, pid, cwd): | 1291 def code(cls, pid, cwd): |
1203 """Setups the D code to implement child process tracking. | 1292 """Setups the D code to implement child process tracking. |
1204 | 1293 |
1205 Injects the pid and the initial cwd into the trace header for context. | 1294 Injects the pid and the initial cwd into the trace header for context. |
1206 The reason is that the child process is already running at that point so: | 1295 The reason is that the child process trace_child_process.py is already |
1296 running at that point so: | |
1207 - no proc_start() is logged for it. | 1297 - no proc_start() is logged for it. |
1208 - there is no way to figure out the absolute path of cwd in kernel on OSX | 1298 - there is no way to figure out the absolute path of cwd in kernel on OSX |
1209 | 1299 |
1210 Since the child process is already started, initialize current_processes to | 1300 Since the child process is already started, initialize current_processes to |
1211 1. | 1301 1. |
1302 | |
1212 """ | 1303 """ |
1213 pid = str(pid) | 1304 pid = str(pid) |
1214 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') | 1305 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') |
1215 return ( | 1306 return ( |
1216 'dtrace:::BEGIN {\n' | 1307 'dtrace:::BEGIN {\n' |
1217 ' current_processes = 1;\n' | 1308 ' current_processes = 1;\n' |
1218 ' logindex = 0;\n' | 1309 ' logindex = 0;\n' |
1219 ' trackedpid[' + pid + '] = 1;\n' | 1310 # trackedpid is an associative array where its value can be 0, 1 or 2. |
1220 ' printf("%d %d:%d %s_%s(\\"' + cwd + '\\") = 0\\n",\n' | 1311 # 0 is for untracked processes and is the default value for items not |
1221 ' logindex, ppid, ' + pid + ', probeprov, probename);\n' | 1312 # in the associative array. |
1313 # 1 is for tracked processes. | |
1314 # 2 is for trace_child_process.py only. It is not tracked itself but | |
1315 # all its decendants are. | |
1316 ' trackedpid[' + pid + '] = 2;\n' | |
1317 ' printf("%d %d %s_%s(\\"' + cwd + '\\")\\n",\n' | |
1318 ' logindex, ' + pid + ', probeprov, probename);\n' | |
1222 ' logindex++;\n' | 1319 ' logindex++;\n' |
1223 '}\n') + cls.D_CODE | 1320 '}\n') + cls.D_CODE |
1224 | 1321 |
1225 class Context(ApiBase.Context): | 1322 class Context(ApiBase.Context): |
1226 # This is the most common format. index pid function(args) = result | 1323 # This is the most common format. index pid function(args) |
1227 RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') | 1324 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$') |
1228 | 1325 |
1229 # Arguments parsing. | 1326 # Arguments parsing. |
1230 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') | 1327 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
1231 RE_CHDIR = re.compile(r'^\"(.+?)\"$') | 1328 RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
1232 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') | 1329 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
1233 RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') | 1330 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') |
1331 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') | |
1234 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') | 1332 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
1235 | 1333 |
1236 O_DIRECTORY = 0x100000 | 1334 O_DIRECTORY = 0x100000 |
1237 | 1335 |
1238 class Process(ApiBase.Context.Process): | 1336 class Process(ApiBase.Context.Process): |
1239 pass | 1337 pass |
1240 | 1338 |
1241 def __init__(self, blacklist): | 1339 def __init__(self, blacklist): |
1242 super(Dtrace.Context, self).__init__(blacklist) | 1340 super(Dtrace.Context, self).__init__(blacklist) |
1243 # Process ID of the trace_child_process.py wrapper script instance. | 1341 # Process ID of the trace_child_process.py wrapper script instance. |
1244 self._tracer_pid = None | 1342 self._tracer_pid = None |
1245 # First process to be started by self._tracer_pid. | 1343 # First process to be started by self._tracer_pid. |
1246 self._initial_pid = None | 1344 self._initial_pid = None |
1247 self._initial_cwd = None | 1345 self._initial_cwd = None |
1248 | 1346 |
1249 def on_line(self, line): | 1347 def on_line(self, line): |
1250 match = self.RE_HEADER.match(line) | 1348 match = self.RE_HEADER.match(line) |
1251 assert match, line | 1349 assert match, line |
1252 fn = getattr( | 1350 fn = getattr( |
1253 self, | 1351 self, |
1254 'handle_%s' % match.group(3).replace('-', '_'), | 1352 'handle_%s' % match.group(2).replace('-', '_'), |
1255 self._handle_ignored) | 1353 self._handle_ignored) |
1256 return fn( | 1354 return fn( |
1257 int(match.group(1)), | 1355 int(match.group(1)), |
1258 int(match.group(2)), | 1356 match.group(3)) |
1259 match.group(3), | |
1260 match.group(4), | |
1261 match.group(5)) | |
1262 | 1357 |
1263 def to_results(self): | 1358 def to_results(self): |
1264 """Uses self._initial_pid to determine the initial process.""" | 1359 """Uses self._initial_pid to determine the initial process.""" |
1265 process = self.processes[self._initial_pid].to_results_process() | 1360 process = self.processes[self._initial_pid].to_results_process() |
1266 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( | 1361 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( |
1267 sorted(self.processes), sorted(p.pid for p in process.all)) | 1362 sorted(self.processes), sorted(p.pid for p in process.all)) |
1268 return Results(process) | 1363 return Results(process) |
1269 | 1364 |
1270 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): | 1365 def handle_dtrace_BEGIN(self, pid, args): |
1271 assert not self._tracer_pid and not self._initial_pid | 1366 assert not self._tracer_pid and not self._initial_pid |
1272 self._tracer_pid = pid | 1367 self._tracer_pid = pid |
1273 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) | 1368 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
1274 | 1369 |
1275 def handle_proc_start(self, ppid, pid, _function, _args, result): | 1370 def handle_proc_start(self, pid, args): |
1276 """Transfers cwd. | 1371 """Transfers cwd. |
1277 | 1372 |
1278 The dtrace script already takes care of only tracing the processes that | 1373 The dtrace script already takes care of only tracing the processes that |
1279 are child of the traced processes so there is no need to verify the | 1374 are child of the traced processes so there is no need to verify the |
1280 process hierarchy. | 1375 process hierarchy. |
1281 """ | 1376 """ |
1282 assert result == '0' | |
1283 assert pid not in self.processes | 1377 assert pid not in self.processes |
1378 match = self.RE_PROC_START.match(args) | |
1379 ppid = int(match.group(1)) | |
1284 assert (ppid == self._tracer_pid) != bool(self._initial_pid) | 1380 assert (ppid == self._tracer_pid) != bool(self._initial_pid) |
1285 if not self._initial_pid: | 1381 if not self._initial_pid: |
1286 self._initial_pid = pid | 1382 self._initial_pid = pid |
1287 ppid = None | 1383 ppid = None |
1288 cwd = self._initial_cwd | 1384 cwd = self._initial_cwd |
1289 else: | 1385 else: |
1290 parent = self.processes[ppid] | 1386 parent = self.processes[ppid] |
1291 cwd = parent.cwd | 1387 cwd = parent.cwd |
1292 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) | 1388 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) |
1293 proc.cwd = cwd | 1389 proc.cwd = cwd |
1294 logging.debug('New child: %s -> %d' % (ppid, pid)) | 1390 logging.debug('New child: %s -> %d' % (ppid, pid)) |
1295 | 1391 |
1296 def handle_proc_exit(self, _ppid, pid, _function, _args, _result): | 1392 def handle_proc_exit(self, pid, _args): |
1297 """Removes cwd.""" | 1393 """Removes cwd.""" |
1298 if pid != self._tracer_pid: | 1394 if pid != self._tracer_pid: |
1299 # self._tracer_pid is not traced itself. | 1395 # self._tracer_pid is not traced itself. |
1300 self.processes[pid].cwd = None | 1396 self.processes[pid].cwd = None |
1301 | 1397 |
1302 def handle_execve(self, _ppid, pid, _function, args, _result): | 1398 def handle_execve(self, pid, args): |
1303 """Sets the process' executable. | 1399 """Sets the process' executable. |
1304 | 1400 |
1305 TODO(maruel): Read command line arguments. See | 1401 TODO(maruel): Read command line arguments. See |
1306 https://discussions.apple.com/thread/1980539 for an example. | 1402 https://discussions.apple.com/thread/1980539 for an example. |
1307 https://gist.github.com/1242279 | 1403 https://gist.github.com/1242279 |
1308 | 1404 |
1309 Will have to put the answer at http://stackoverflow.com/questions/7556249. | 1405 Will have to put the answer at http://stackoverflow.com/questions/7556249. |
1310 :) | 1406 :) |
1311 """ | 1407 """ |
1312 match = self.RE_EXECVE.match(args) | 1408 match = self.RE_EXECVE.match(args) |
1313 assert match, args | 1409 assert match, args |
1314 proc = self.processes[pid] | 1410 proc = self.processes[pid] |
1315 proc.executable = match.group(1) | 1411 proc.executable = match.group(1) |
1316 proc.command = process_quoted_arguments(match.group(3)) | 1412 proc.command = process_quoted_arguments(match.group(3)) |
1317 assert int(match.group(2)) == len(proc.command), args | 1413 assert int(match.group(2)) == len(proc.command), args |
1318 | 1414 |
1319 def handle_chdir(self, _ppid, pid, _function, args, result): | 1415 def handle_chdir(self, pid, args): |
1320 """Updates cwd.""" | 1416 """Updates cwd.""" |
1321 assert self._tracer_pid | 1417 assert self._tracer_pid |
1322 assert pid in self.processes | 1418 assert pid in self.processes |
1323 if result.startswith('0'): | 1419 cwd = self.RE_CHDIR.match(args).group(1) |
1324 cwd = self.RE_CHDIR.match(args).group(1) | 1420 if not cwd.startswith('/'): |
1325 if not cwd.startswith('/'): | 1421 cwd2 = os.path.join(self.processes[pid].cwd, cwd) |
1326 cwd2 = os.path.join(self.processes[pid].cwd, cwd) | 1422 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
1327 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) | |
1328 else: | |
1329 logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) | |
1330 cwd2 = cwd | |
1331 self.processes[pid].cwd = cwd2 | |
1332 else: | 1423 else: |
1333 assert False, 'Unexecpected fail: %s' % result | 1424 logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
1425 cwd2 = cwd | |
1426 self.processes[pid].cwd = cwd2 | |
1334 | 1427 |
1335 def handle_open_nocancel(self, ppid, pid, function, args, result): | 1428 def handle_open_nocancel(self, pid, args): |
1336 return self.handle_open(ppid, pid, function, args, result) | 1429 return self.handle_open(pid, args) |
1337 | 1430 |
1338 def handle_open(self, _ppid, pid, function, args, result): | 1431 def handle_open(self, pid, args): |
1339 match = self.RE_OPEN.match(args) | 1432 match = self.RE_OPEN.match(args) |
1340 assert match, (pid, function, args, result) | 1433 assert match, (pid, args) |
1341 args = match.groups() | 1434 flag = int(match.group(2), 16) |
1342 flag = int(args[1]) | |
1343 if self.O_DIRECTORY & flag == self.O_DIRECTORY: | 1435 if self.O_DIRECTORY & flag == self.O_DIRECTORY: |
1344 # Ignore directories. | 1436 # Ignore directories. |
1345 return | 1437 return |
1346 self._handle_file(pid, args[0], result) | 1438 self._handle_file(pid, match.group(1)) |
1347 | 1439 |
1348 def handle_rename(self, _ppid, pid, _function, args, result): | 1440 def handle_rename(self, pid, args): |
1349 args = self.RE_RENAME.match(args).groups() | 1441 match = self.RE_RENAME.match(args) |
1350 self._handle_file(pid, args[0], result) | 1442 assert match, (pid, args) |
1351 self._handle_file(pid, args[1], result) | 1443 self._handle_file(pid, match.group(1)) |
1444 self._handle_file(pid, match.group(2)) | |
1352 | 1445 |
1353 def _handle_file(self, pid, filepath, result): | 1446 def _handle_file(self, pid, filepath): |
1354 if result.startswith(('-1', '2')): | |
1355 return | |
1356 if not filepath.startswith('/'): | 1447 if not filepath.startswith('/'): |
1357 filepath = os.path.join(self.processes[pid].cwd, filepath) | 1448 filepath = os.path.join(self.processes[pid].cwd, filepath) |
1358 # We can get '..' in the path. | 1449 # We can get '..' in the path. |
1359 filepath = os.path.normpath(filepath) | 1450 filepath = os.path.normpath(filepath) |
1360 # Sadly, still need to filter out directories here; | 1451 # Sadly, still need to filter out directories here; |
1361 # saw open_nocancel(".", 0, 0) = 0 lines. | 1452 # saw open_nocancel(".", 0, 0) = 0 lines. |
1362 if os.path.isdir(filepath): | 1453 if os.path.isdir(filepath): |
1363 return | 1454 return |
1364 self.processes[pid].add_file(filepath) | 1455 self.processes[pid].add_file(filepath) |
1365 | 1456 |
1366 @staticmethod | 1457 @staticmethod |
1367 def _handle_ignored(_ppid, pid, function, args, result): | 1458 def _handle_ignored(pid, args): |
1368 logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) | 1459 """Is called for all the event traces that are not handled.""" |
1460 assert False, '%d ??(%s)' % (pid, args) | |
1369 | 1461 |
1370 @staticmethod | 1462 @staticmethod |
1371 def clean_trace(logname): | 1463 def clean_trace(logname): |
1372 if os.path.isfile(logname): | 1464 if os.path.isfile(logname): |
1373 os.remove(logname) | 1465 os.remove(logname) |
1374 | 1466 |
1375 @classmethod | 1467 @classmethod |
1376 def gen_trace(cls, cmd, cwd, logname, output): | 1468 def gen_trace(cls, cmd, cwd, logname, output): |
1377 """Runs dtrace on an executable. | 1469 """Runs dtrace on an executable. |
1378 | 1470 |
(...skipping 932 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2311 os.path.abspath(options.log), | 2403 os.path.abspath(options.log), |
2312 args, | 2404 args, |
2313 options.root_dir, | 2405 options.root_dir, |
2314 options.cwd, | 2406 options.cwd, |
2315 options.product_dir, | 2407 options.product_dir, |
2316 options.force) | 2408 options.force) |
2317 | 2409 |
2318 | 2410 |
2319 if __name__ == '__main__': | 2411 if __name__ == '__main__': |
2320 sys.exit(main()) | 2412 sys.exit(main()) |
OLD | NEW |