OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 15 matching lines...) Expand all Loading... |
26 import os | 26 import os |
27 import posixpath | 27 import posixpath |
28 import re | 28 import re |
29 import subprocess | 29 import subprocess |
30 import sys | 30 import sys |
31 import weakref | 31 import weakref |
32 | 32 |
33 ## OS-specific imports | 33 ## OS-specific imports |
34 | 34 |
35 if sys.platform == 'win32': | 35 if sys.platform == 'win32': |
36 from ctypes.wintypes import create_unicode_buffer | 36 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p |
37 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 | 37 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 |
38 from ctypes.wintypes import GetLastError # pylint: disable=E0611 | 38 from ctypes.wintypes import GetLastError # pylint: disable=E0611 |
39 elif sys.platform == 'darwin': | 39 elif sys.platform == 'darwin': |
40 import Carbon.File # pylint: disable=F0401 | 40 import Carbon.File # pylint: disable=F0401 |
41 | 41 |
42 | 42 |
43 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 43 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
44 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) | 44 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) |
45 | 45 |
46 KEY_TRACKED = 'isolate_dependency_tracked' | 46 KEY_TRACKED = 'isolate_dependency_tracked' |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 assert isabs(path), path | 166 assert isabs(path), path |
167 # Windows used to have an option to turn on case sensitivity on non Win32 | 167 # Windows used to have an option to turn on case sensitivity on non Win32 |
168 # subsystem but that's out of scope here and isn't supported anymore. | 168 # subsystem but that's out of scope here and isn't supported anymore. |
169 # Go figure why GetShortPathName() is needed. | 169 # Go figure why GetShortPathName() is needed. |
170 path = GetLongPathName(GetShortPathName(path)) | 170 path = GetLongPathName(GetShortPathName(path)) |
171 if path.startswith('\\\\?\\'): | 171 if path.startswith('\\\\?\\'): |
172 return path[4:] | 172 return path[4:] |
173 return path | 173 return path |
174 | 174 |
175 | 175 |
| 176 def CommandLineToArgvW(command_line): |
| 177 """Splits a commandline into argv using CommandLineToArgvW().""" |
| 178 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx |
| 179 size = c_int() |
| 180 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size)) |
| 181 try: |
| 182 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] |
| 183 finally: |
| 184 windll.kernel32.LocalFree(ptr) |
| 185 |
| 186 |
176 elif sys.platform == 'darwin': | 187 elif sys.platform == 'darwin': |
177 | 188 |
178 | 189 |
179 # On non-windows, keep the stdlib behavior. | 190 # On non-windows, keep the stdlib behavior. |
180 isabs = os.path.isabs | 191 isabs = os.path.isabs |
181 | 192 |
182 | 193 |
183 def get_native_path_case(path): | 194 def get_native_path_case(path): |
184 """Returns the native path case for an existing file.""" | 195 """Returns the native path case for an existing file.""" |
185 assert isabs(path), path | 196 assert isabs(path), path |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
248 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows.""" | 259 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows.""" |
249 if x: | 260 if x: |
250 x = x.rstrip(os.path.sep).replace(os.path.sep, '/') | 261 x = x.rstrip(os.path.sep).replace(os.path.sep, '/') |
251 if x == '.': | 262 if x == '.': |
252 x = '' | 263 x = '' |
253 if x: | 264 if x: |
254 x += '/' | 265 x += '/' |
255 return x | 266 return x |
256 | 267 |
257 | 268 |
| 269 def process_quoted_arguments(text): |
| 270 """Extracts quoted arguments on a string and return the arguments as a list. |
| 271 |
| 272 Implemented as an automaton. Supports incomplete strings in the form |
| 273 '"foo"...'. |
| 274 |
| 275 Example: |
| 276 With text = '"foo", "bar"', the function will return ['foo', 'bar'] |
| 277 |
| 278 TODO(maruel): Implement escaping. |
| 279 """ |
| 280 # All the possible states of the DFA. |
| 281 ( NEED_QUOTE, # Begining of a new arguments. |
| 282 INSIDE_STRING, # Inside an argument. |
| 283 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be |
| 284 # a serie of 3 dots or a comma. |
| 285 NEED_SPACE, # Right after a comma |
| 286 NEED_DOT_2, # Found a dot, need a second one. |
| 287 NEED_DOT_3, # Found second dot, need a third one. |
| 288 NEED_COMMA, # Found third dot, need a comma. |
| 289 ) = range(7) |
| 290 |
| 291 state = NEED_QUOTE |
| 292 current_argument = '' |
| 293 out = [] |
| 294 for i in text: |
| 295 if i == '"': |
| 296 if state == NEED_QUOTE: |
| 297 state = INSIDE_STRING |
| 298 elif state == INSIDE_STRING: |
| 299 # The argument is now closed. |
| 300 out.append(current_argument) |
| 301 current_argument = '' |
| 302 state = NEED_COMMA_OR_DOT |
| 303 else: |
| 304 assert False, text |
| 305 elif i == ',': |
| 306 if state in (NEED_COMMA_OR_DOT, NEED_COMMA): |
| 307 state = NEED_SPACE |
| 308 else: |
| 309 assert False, text |
| 310 elif i == ' ': |
| 311 if state == NEED_SPACE: |
| 312 state = NEED_QUOTE |
| 313 if state == INSIDE_STRING: |
| 314 current_argument += i |
| 315 elif i == '.': |
| 316 if state == NEED_COMMA_OR_DOT: |
| 317 # The string is incomplete, this mean the strace -s flag should be |
| 318 # increased. |
| 319 state = NEED_DOT_2 |
| 320 elif state == NEED_DOT_2: |
| 321 state = NEED_DOT_3 |
| 322 elif state == NEED_DOT_3: |
| 323 state = NEED_COMMA |
| 324 elif state == INSIDE_STRING: |
| 325 current_argument += i |
| 326 else: |
| 327 assert False, text |
| 328 else: |
| 329 if state == INSIDE_STRING: |
| 330 current_argument += i |
| 331 else: |
| 332 assert False, text |
| 333 assert state in (NEED_COMMA, NEED_COMMA_OR_DOT) |
| 334 return out |
| 335 |
| 336 |
258 class ApiBase(object): | 337 class ApiBase(object): |
259 """OS-agnostic API to trace a process and its children.""" | 338 """OS-agnostic API to trace a process and its children.""" |
260 class Context(object): | 339 class Context(object): |
261 """Processes one log line at a time and keeps the list of traced processes. | 340 """Processes one log line at a time and keeps the list of traced processes. |
262 """ | 341 """ |
263 class Process(object): | 342 class Process(object): |
264 """Keeps context for one traced child process. | 343 """Keeps context for one traced child process. |
265 | 344 |
266 Logs all the files this process touched. Ignores directories. | 345 Logs all the files this process touched. Ignores directories. |
267 """ | 346 """ |
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
745 assert childpid not in self.children | 824 assert childpid not in self.children |
746 self.children.append(childpid) | 825 self.children.append(childpid) |
747 | 826 |
748 def handle_close(self, _function, _args, _result): | 827 def handle_close(self, _function, _args, _result): |
749 pass | 828 pass |
750 | 829 |
751 def handle_execve(self, _function, args, result): | 830 def handle_execve(self, _function, args, result): |
752 if result != '0': | 831 if result != '0': |
753 return | 832 return |
754 m = self.RE_EXECVE.match(args) | 833 m = self.RE_EXECVE.match(args) |
755 self._handle_file(m.group(1), result) | 834 filepath = m.group(1) |
| 835 self._handle_file(filepath, result) |
| 836 self.executable = self.RelativePath(self.get_cwd(), filepath) |
| 837 self.command = process_quoted_arguments(m.group(2)) |
756 | 838 |
757 def handle_exit_group(self, _function, _args, _result): | 839 def handle_exit_group(self, _function, _args, _result): |
758 """Removes cwd.""" | 840 """Removes cwd.""" |
759 self.cwd = None | 841 self.cwd = None |
760 | 842 |
761 @staticmethod | 843 @staticmethod |
762 def handle_fork(_function, args, result): | 844 def handle_fork(_function, args, result): |
763 assert False, (args, result) | 845 assert False, (args, result) |
764 | 846 |
765 def handle_open(self, _function, args, result): | 847 def handle_open(self, _function, args, result): |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
940 proc:::exit /trackedpid[pid]/ { | 1022 proc:::exit /trackedpid[pid]/ { |
941 trackedpid[pid] = 0; | 1023 trackedpid[pid] = 0; |
942 current_processes -= 1; | 1024 current_processes -= 1; |
943 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", | 1025 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", |
944 logindex, ppid, pid, probeprov, probename, execname, | 1026 logindex, ppid, pid, probeprov, probename, execname, |
945 current_processes); | 1027 current_processes); |
946 logindex++; | 1028 logindex++; |
947 } | 1029 } |
948 | 1030 |
949 /* Finally what we care about! */ | 1031 /* Finally what we care about! */ |
| 1032 syscall::exec*:entry /trackedpid[pid]/ { |
| 1033 self->e_arg0 = copyinstr(arg0); |
| 1034 /* Incrementally probe for a NULL in the argv parameter of execve() to |
| 1035 * figure out argc. */ |
| 1036 self->argc = 0; |
| 1037 self->argv = (user_addr_t*)copyin( |
| 1038 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1039 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1040 self->argv = (user_addr_t*)copyin( |
| 1041 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1042 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1043 self->argv = (user_addr_t*)copyin( |
| 1044 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1045 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1046 self->argv = (user_addr_t*)copyin( |
| 1047 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1048 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1049 self->argv = (user_addr_t*)copyin( |
| 1050 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1051 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1052 self->argv = (user_addr_t*)copyin( |
| 1053 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1054 self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
| 1055 self->argv = (user_addr_t*)copyin( |
| 1056 arg1, sizeof(user_addr_t) * (self->argc + 1)); |
| 1057 |
| 1058 /* Copy the inputs strings since there is no guarantee they'll be |
| 1059 * present after the call completed. */ |
| 1060 self->args[0] = (self->argc > 0) ? copyinstr(self->argv[0]) : ""; |
| 1061 self->args[1] = (self->argc > 1) ? copyinstr(self->argv[1]) : ""; |
| 1062 self->args[2] = (self->argc > 2) ? copyinstr(self->argv[2]) : ""; |
| 1063 self->args[3] = (self->argc > 3) ? copyinstr(self->argv[3]) : ""; |
| 1064 self->args[4] = (self->argc > 4) ? copyinstr(self->argv[4]) : ""; |
| 1065 self->args[5] = (self->argc > 5) ? copyinstr(self->argv[5]) : ""; |
| 1066 self->args[6] = (self->argc > 6) ? copyinstr(self->argv[6]) : ""; |
| 1067 self->args[7] = (self->argc > 7) ? copyinstr(self->argv[7]) : ""; |
| 1068 self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : ""; |
| 1069 self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : ""; |
| 1070 } |
| 1071 syscall::exec*: /trackedpid[pid] && errno == 0/ { |
| 1072 /* We need to join strings here, as using multiple printf() would cause |
| 1073 * tearing when multiple threads/processes are traced. */ |
| 1074 this->args = ""; |
| 1075 this->args = strjoin(this->args, (self->argc > 0) ? ", \\"" : ""); |
| 1076 this->args = strjoin(this->args, (self->argc > 0) ? self->args[0] : ""); |
| 1077 this->args = strjoin(this->args, (self->argc > 0) ? "\\"" : ""); |
| 1078 |
| 1079 this->args = strjoin(this->args, (self->argc > 1) ? ", \\"" : ""); |
| 1080 this->args = strjoin(this->args, (self->argc > 1) ? self->args[1] : ""); |
| 1081 this->args = strjoin(this->args, (self->argc > 1) ? "\\"" : ""); |
| 1082 |
| 1083 this->args = strjoin(this->args, (self->argc > 2) ? ", \\"" : ""); |
| 1084 this->args = strjoin(this->args, (self->argc > 2) ? self->args[2] : ""); |
| 1085 this->args = strjoin(this->args, (self->argc > 2) ? "\\"" : ""); |
| 1086 |
| 1087 this->args = strjoin(this->args, (self->argc > 3) ? ", \\"" : ""); |
| 1088 this->args = strjoin(this->args, (self->argc > 3) ? self->args[3] : ""); |
| 1089 this->args = strjoin(this->args, (self->argc > 3) ? "\\"" : ""); |
| 1090 |
| 1091 this->args = strjoin(this->args, (self->argc > 4) ? ", \\"" : ""); |
| 1092 this->args = strjoin(this->args, (self->argc > 4) ? self->args[4] : ""); |
| 1093 this->args = strjoin(this->args, (self->argc > 4) ? "\\"" : ""); |
| 1094 |
| 1095 this->args = strjoin(this->args, (self->argc > 5) ? ", \\"" : ""); |
| 1096 this->args = strjoin(this->args, (self->argc > 5) ? self->args[5] : ""); |
| 1097 this->args = strjoin(this->args, (self->argc > 5) ? "\\"" : ""); |
| 1098 |
| 1099 this->args = strjoin(this->args, (self->argc > 6) ? ", \\"" : ""); |
| 1100 this->args = strjoin(this->args, (self->argc > 6) ? self->args[6] : ""); |
| 1101 this->args = strjoin(this->args, (self->argc > 6) ? "\\"" : ""); |
| 1102 |
| 1103 this->args = strjoin(this->args, (self->argc > 7) ? ", \\"" : ""); |
| 1104 this->args = strjoin(this->args, (self->argc > 7) ? self->args[7] : ""); |
| 1105 this->args = strjoin(this->args, (self->argc > 7) ? "\\"" : ""); |
| 1106 |
| 1107 this->args = strjoin(this->args, (self->argc > 8) ? ", \\"" : ""); |
| 1108 this->args = strjoin(this->args, (self->argc > 8) ? self->args[8] : ""); |
| 1109 this->args = strjoin(this->args, (self->argc > 8) ? "\\"" : ""); |
| 1110 |
| 1111 this->args = strjoin(this->args, (self->argc > 9) ? ", \\"" : ""); |
| 1112 this->args = strjoin(this->args, (self->argc > 9) ? self->args[9]: ""); |
| 1113 this->args = strjoin(this->args, (self->argc > 9) ? "\\"" : ""); |
| 1114 |
| 1115 /* Prints self->argc to permits verifying the internal consistency since |
| 1116 * this code is quite fishy. */ |
| 1117 printf("%d %d:%d %s(\\"%s\\", [%d%s]) = %d\\n", |
| 1118 logindex, ppid, pid, probefunc, |
| 1119 self->e_arg0, |
| 1120 self->argc, |
| 1121 this->args, |
| 1122 errno); |
| 1123 logindex++; |
| 1124 |
| 1125 /* TODO(maruel): Clean up memory |
| 1126 self->e_arg0 = 0; |
| 1127 self->argc = 0; |
| 1128 self->args[0] = 0; |
| 1129 self->args[1] = 0; |
| 1130 self->args[2] = 0; |
| 1131 self->args[3] = 0; |
| 1132 self->args[4] = 0; |
| 1133 self->args[5] = 0; |
| 1134 self->args[6] = 0; |
| 1135 self->args[7] = 0; |
| 1136 self->args[8] = 0; |
| 1137 self->args[9] = 0; |
| 1138 */ |
| 1139 } |
| 1140 |
950 syscall::open*:entry /trackedpid[pid]/ { | 1141 syscall::open*:entry /trackedpid[pid]/ { |
951 self->arg0 = arg0; | 1142 self->arg0 = arg0; |
952 self->arg1 = arg1; | 1143 self->arg1 = arg1; |
953 self->arg2 = arg2; | 1144 self->arg2 = arg2; |
954 } | 1145 } |
955 syscall::open*:return /trackedpid[pid] && errno == 0/ { | 1146 syscall::open*:return /trackedpid[pid] && errno == 0/ { |
956 printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n", | 1147 printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n", |
957 logindex, ppid, pid, probefunc, copyinstr(self->arg0), | 1148 logindex, ppid, pid, probefunc, copyinstr(self->arg0), |
958 self->arg1, self->arg2, errno); | 1149 self->arg1, self->arg2, errno); |
959 logindex++; | 1150 logindex++; |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1033 ' logindex++;\n' | 1224 ' logindex++;\n' |
1034 '}\n') + cls.D_CODE | 1225 '}\n') + cls.D_CODE |
1035 | 1226 |
1036 class Context(ApiBase.Context): | 1227 class Context(ApiBase.Context): |
1037 # This is the most common format. index pid function(args) = result | 1228 # This is the most common format. index pid function(args) = result |
1038 RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') | 1229 RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') |
1039 | 1230 |
1040 # Arguments parsing. | 1231 # Arguments parsing. |
1041 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') | 1232 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
1042 RE_CHDIR = re.compile(r'^\"(.+?)\"$') | 1233 RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
| 1234 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
1043 RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') | 1235 RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') |
1044 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') | 1236 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
1045 | 1237 |
1046 O_DIRECTORY = 0x100000 | 1238 O_DIRECTORY = 0x100000 |
1047 | 1239 |
1048 class Process(ApiBase.Context.Process): | 1240 class Process(ApiBase.Context.Process): |
1049 pass | 1241 pass |
1050 | 1242 |
1051 def __init__(self, blacklist): | 1243 def __init__(self, blacklist): |
1052 super(Dtrace.Context, self).__init__(blacklist) | 1244 super(Dtrace.Context, self).__init__(blacklist) |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1102 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) | 1294 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) |
1103 proc.cwd = cwd | 1295 proc.cwd = cwd |
1104 logging.debug('New child: %s -> %d' % (ppid, pid)) | 1296 logging.debug('New child: %s -> %d' % (ppid, pid)) |
1105 | 1297 |
1106 def handle_proc_exit(self, _ppid, pid, _function, _args, _result): | 1298 def handle_proc_exit(self, _ppid, pid, _function, _args, _result): |
1107 """Removes cwd.""" | 1299 """Removes cwd.""" |
1108 if pid != self._tracer_pid: | 1300 if pid != self._tracer_pid: |
1109 # self._tracer_pid is not traced itself. | 1301 # self._tracer_pid is not traced itself. |
1110 self.processes[pid].cwd = None | 1302 self.processes[pid].cwd = None |
1111 | 1303 |
| 1304 def handle_execve(self, _ppid, pid, _function, args, _result): |
| 1305 """Sets the process' executable. |
| 1306 |
| 1307 TODO(maruel): Read command line arguments. See |
| 1308 https://discussions.apple.com/thread/1980539 for an example. |
| 1309 https://gist.github.com/1242279 |
| 1310 |
| 1311 Will have to put the answer at http://stackoverflow.com/questions/7556249. |
| 1312 :) |
| 1313 """ |
| 1314 match = self.RE_EXECVE.match(args) |
| 1315 assert match, args |
| 1316 proc = self.processes[pid] |
| 1317 proc.executable = match.group(1) |
| 1318 proc.command = process_quoted_arguments(match.group(3)) |
| 1319 assert int(match.group(2)) == len(proc.command), args |
| 1320 |
1112 def handle_chdir(self, _ppid, pid, _function, args, result): | 1321 def handle_chdir(self, _ppid, pid, _function, args, result): |
1113 """Updates cwd.""" | 1322 """Updates cwd.""" |
1114 assert self._tracer_pid | 1323 assert self._tracer_pid |
1115 assert pid in self.processes | 1324 assert pid in self.processes |
1116 if result.startswith('0'): | 1325 if result.startswith('0'): |
1117 cwd = self.RE_CHDIR.match(args).group(1) | 1326 cwd = self.RE_CHDIR.match(args).group(1) |
1118 if not cwd.startswith('/'): | 1327 if not cwd.startswith('/'): |
1119 cwd2 = os.path.join(self.processes[pid].cwd, cwd) | 1328 cwd2 = os.path.join(self.processes[pid].cwd, cwd) |
1120 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) | 1329 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
1121 else: | 1330 else: |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1253 return context.to_results() | 1462 return context.to_results() |
1254 | 1463 |
1255 @staticmethod | 1464 @staticmethod |
1256 def _sort_log(logname): | 1465 def _sort_log(logname): |
1257 """Sorts the log back in order when each call occured. | 1466 """Sorts the log back in order when each call occured. |
1258 | 1467 |
1259 dtrace doesn't save the buffer in strict order since it keeps one buffer per | 1468 dtrace doesn't save the buffer in strict order since it keeps one buffer per |
1260 CPU. | 1469 CPU. |
1261 """ | 1470 """ |
1262 with open(logname, 'rb') as logfile: | 1471 with open(logname, 'rb') as logfile: |
1263 lines = [f for f in logfile.readlines() if f.strip()] | 1472 lines = [l for l in logfile if l.strip()] |
1264 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) | 1473 errors = [l for l in lines if l.startswith('dtrace:')] |
| 1474 if errors: |
| 1475 print >> sys.stderr, 'Failed to load: %s' % logname |
| 1476 print >> sys.stderr, '\n'.join(errors) |
| 1477 assert not errors, errors |
| 1478 try: |
| 1479 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) |
| 1480 except ValueError: |
| 1481 print >> sys.stderr, 'Failed to load: %s' % logname |
| 1482 print >> sys.stderr, '\n'.join(lines) |
| 1483 raise |
1265 with open(logname, 'wb') as logfile: | 1484 with open(logname, 'wb') as logfile: |
1266 logfile.write(''.join(lines)) | 1485 logfile.write(''.join(lines)) |
1267 | 1486 |
1268 | 1487 |
1269 class LogmanTrace(ApiBase): | 1488 class LogmanTrace(ApiBase): |
1270 """Uses the native Windows ETW based tracing functionality to trace a child | 1489 """Uses the native Windows ETW based tracing functionality to trace a child |
1271 process. | 1490 process. |
1272 | 1491 |
1273 Caveat: this implementations doesn't track cwd or initial_cwd. | 1492 Caveat: this implementations doesn't track cwd or initial_cwd. |
1274 """ | 1493 """ |
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1522 def handle_Process_Start(self, line): | 1741 def handle_Process_Start(self, line): |
1523 """Handles a new child process started by PID.""" | 1742 """Handles a new child process started by PID.""" |
1524 #UNIQUE_PROCESS_KEY = 19 | 1743 #UNIQUE_PROCESS_KEY = 19 |
1525 PROCESS_ID = 20 | 1744 PROCESS_ID = 20 |
1526 #PARENT_PID = 21 | 1745 #PARENT_PID = 21 |
1527 #SESSION_ID = 22 | 1746 #SESSION_ID = 22 |
1528 #EXIT_STATUS = 23 | 1747 #EXIT_STATUS = 23 |
1529 #DIRECTORY_TABLE_BASE = 24 | 1748 #DIRECTORY_TABLE_BASE = 24 |
1530 #USER_SID = 25 | 1749 #USER_SID = 25 |
1531 IMAGE_FILE_NAME = 26 | 1750 IMAGE_FILE_NAME = 26 |
1532 #COMMAND_LINE = 27 | 1751 COMMAND_LINE = 27 |
1533 | 1752 |
1534 ppid = line[self.PID] | 1753 ppid = line[self.PID] |
1535 pid = int(line[PROCESS_ID], 16) | 1754 pid = int(line[PROCESS_ID], 16) |
1536 if ppid == self._tracer_pid: | 1755 if ppid == self._tracer_pid: |
1537 # Need to ignore processes we don't know about because the log is | 1756 # Need to ignore processes we don't know about because the log is |
1538 # system-wide. | 1757 # system-wide. |
1539 if line[IMAGE_FILE_NAME] == '"logman.exe"': | 1758 if line[IMAGE_FILE_NAME] == '"logman.exe"': |
1540 # Skip the shutdown call when "logman.exe stop" is executed. | 1759 # Skip the shutdown call when "logman.exe stop" is executed. |
1541 return | 1760 return |
1542 self._initial_pid = self._initial_pid or pid | 1761 self._initial_pid = self._initial_pid or pid |
1543 assert pid not in self.processes | 1762 ppid = None |
1544 self.processes[pid] = self.Process(self, pid, None, None) | 1763 elif ppid not in self.processes: |
1545 logging.info( | 1764 # Ignore |
1546 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) | 1765 return |
1547 elif ppid in self.processes: | 1766 assert pid not in self.processes |
1548 # Grand-children | 1767 proc = self.processes[pid] = self.Process(self, pid, None, ppid) |
1549 assert pid not in self.processes | 1768 # TODO(maruel): Process escapes. |
1550 self.processes[pid] = self.Process(self, pid, None, ppid) | 1769 assert ( |
1551 logging.info( | 1770 line[COMMAND_LINE].startswith('"') and |
1552 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) | 1771 line[COMMAND_LINE].endswith('"')) |
| 1772 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1]) |
| 1773 assert ( |
| 1774 line[IMAGE_FILE_NAME].startswith('"') and |
| 1775 line[IMAGE_FILE_NAME].endswith('"')) |
| 1776 proc.executable = line[IMAGE_FILE_NAME][1:-1] |
| 1777 # proc.command[0] may be the absolute path of 'executable' but it may be |
| 1778 # anything else too. If it happens that command[0] ends with executable, |
| 1779 # use it, otherwise defaults to the base name. |
| 1780 cmd0 = proc.command[0].lower() |
| 1781 if not cmd0.endswith('.exe'): |
| 1782 # TODO(maruel): That's not strictly true either. |
| 1783 cmd0 += '.exe' |
| 1784 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): |
| 1785 proc.executable = get_native_path_case(cmd0) |
| 1786 logging.info( |
| 1787 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) |
1553 | 1788 |
1554 def handle_Thread_End(self, line): | 1789 def handle_Thread_End(self, line): |
1555 """Has the same parameters as Thread_Start.""" | 1790 """Has the same parameters as Thread_Start.""" |
1556 tid = int(line[self.TID], 16) | 1791 tid = int(line[self.TID], 16) |
1557 self._threads_active.pop(tid, None) | 1792 self._threads_active.pop(tid, None) |
1558 | 1793 |
1559 def handle_Thread_Start(self, line): | 1794 def handle_Thread_Start(self, line): |
1560 """Handles a new thread created. | 1795 """Handles a new thread created. |
1561 | 1796 |
1562 Do not use self.PID here since a process' initial thread is created by | 1797 Do not use self.PID here since a process' initial thread is created by |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1608 for i in vars_to_ignore: | 1843 for i in vars_to_ignore: |
1609 if os.environ.get(i): | 1844 if os.environ.get(i): |
1610 self.IGNORED.add(os.environ[i]) | 1845 self.IGNORED.add(os.environ[i]) |
1611 | 1846 |
1612 # Also add their short path name equivalents. | 1847 # Also add their short path name equivalents. |
1613 for i in list(self.IGNORED): | 1848 for i in list(self.IGNORED): |
1614 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) | 1849 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) |
1615 | 1850 |
1616 # Add these last since they have no short path name equivalent. | 1851 # Add these last since they have no short path name equivalent. |
1617 self.IGNORED.add('\\SystemRoot') | 1852 self.IGNORED.add('\\SystemRoot') |
1618 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. | |
1619 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): | |
1620 self.IGNORED.add('%s\\\$' % letter) | |
1621 # TODO(maruel): Remove the need to add these. | |
1622 self.IGNORED.add('\\\\?\\%s\\$' % letter) | |
1623 self.IGNORED = tuple(sorted(self.IGNORED)) | 1853 self.IGNORED = tuple(sorted(self.IGNORED)) |
1624 | 1854 |
1625 @staticmethod | 1855 @staticmethod |
1626 def clean_trace(logname): | 1856 def clean_trace(logname): |
1627 if os.path.isfile(logname): | 1857 if os.path.isfile(logname): |
1628 os.remove(logname) | 1858 os.remove(logname) |
1629 if os.path.isfile(logname + '.etl'): | 1859 if os.path.isfile(logname + '.etl'): |
1630 os.remove(logname + '.etl') | 1860 os.remove(logname + '.etl') |
1631 | 1861 |
1632 @classmethod | 1862 @classmethod |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1745 logging.debug('Running: %s' % cmd_convert) | 1975 logging.debug('Running: %s' % cmd_convert) |
1746 subprocess.check_call( | 1976 subprocess.check_call( |
1747 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | 1977 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
1748 | 1978 |
1749 return child.returncode, out | 1979 return child.returncode, out |
1750 | 1980 |
1751 @classmethod | 1981 @classmethod |
1752 def parse_log(cls, filename, blacklist): | 1982 def parse_log(cls, filename, blacklist): |
1753 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1983 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1754 | 1984 |
| 1985 def blacklist_more(filepath): |
| 1986 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
| 1987 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
| 1988 |
1755 # Auto-detect the log format. | 1989 # Auto-detect the log format. |
1756 with open(filename, 'rb') as f: | 1990 with open(filename, 'rb') as f: |
1757 hdr = f.read(2) | 1991 hdr = f.read(2) |
1758 assert len(hdr) == 2 | 1992 assert len(hdr) == 2 |
1759 if hdr == '<E': | 1993 if hdr == '<E': |
1760 # It starts with <Events>. | 1994 # It starts with <Events>. |
1761 logformat = 'xml' | 1995 logformat = 'xml' |
1762 elif hdr == '\xFF\xEF': | 1996 elif hdr == '\xFF\xEF': |
1763 # utf-16 BOM. | 1997 # utf-16 BOM. |
1764 logformat = 'csv_utf16' | 1998 logformat = 'csv_utf16' |
1765 else: | 1999 else: |
1766 logformat = 'csv' | 2000 logformat = 'csv' |
1767 | 2001 |
1768 context = cls.Context(blacklist) | 2002 context = cls.Context(blacklist_more) |
1769 | 2003 |
1770 if logformat == 'csv_utf16': | 2004 if logformat == 'csv_utf16': |
1771 def utf_8_encoder(unicode_csv_data): | 2005 def utf_8_encoder(unicode_csv_data): |
1772 """Encodes the unicode object as utf-8 encoded str instance""" | 2006 """Encodes the unicode object as utf-8 encoded str instance""" |
1773 for line in unicode_csv_data: | 2007 for line in unicode_csv_data: |
1774 yield line.encode('utf-8') | 2008 yield line.encode('utf-8') |
1775 | 2009 |
1776 def unicode_csv_reader(unicode_csv_data, **kwargs): | 2010 def unicode_csv_reader(unicode_csv_data, **kwargs): |
1777 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" | 2011 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" |
1778 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) | 2012 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) |
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2083 os.path.abspath(options.log), | 2317 os.path.abspath(options.log), |
2084 args, | 2318 args, |
2085 options.root_dir, | 2319 options.root_dir, |
2086 options.cwd, | 2320 options.cwd, |
2087 options.product_dir, | 2321 options.product_dir, |
2088 options.force) | 2322 options.force) |
2089 | 2323 |
2090 | 2324 |
2091 if __name__ == '__main__': | 2325 if __name__ == '__main__': |
2092 sys.exit(main()) | 2326 sys.exit(main()) |
OLD | NEW |