Index: tools/isolate/trace_inputs.py |
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
index 33a946d56f469f904d612cecf24f97b7c02478c9..3dd64bde8052bc80ad1f19f0a7322bf5ab5ef658 100755 |
--- a/tools/isolate/trace_inputs.py |
+++ b/tools/isolate/trace_inputs.py |
@@ -383,7 +383,7 @@ class Strace(ApiBase): |
# Arguments parsing. |
RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
- RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$') |
+ RE_EXECVE = re.compile(r'^\"(.+?)\", \[(.+)\], \[\/\* \d\d vars \*\/\]$') |
RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$') |
RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$') |
RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
@@ -503,13 +503,18 @@ class Strace(ApiBase): |
child.initial_cwd = self.get_cwd() |
assert child.parentid is None |
child.parentid = self.pid |
+ # It is necessary because the logs are processed out of order. |
+ assert childpid not in self.children |
self.children.append(childpid) |
def handle_close(self, _function, _args, _result): |
pass |
def handle_execve(self, _function, args, result): |
- self._handle_file(self.RE_EXECVE.match(args).group(1), result) |
+ if result != '0': |
+ return |
+ m = self.RE_EXECVE.match(args) |
+ self._handle_file(m.group(1), result) |
def handle_exit_group(self, _function, _args, _result): |
"""Removes cwd.""" |
@@ -765,8 +770,8 @@ class Dtrace(ApiBase): |
def code(cls, pid, cwd): |
"""Setups the D code to implement child process tracking. |
- Injects a fake chdir() trace to simplify parsing. The reason is that the |
- child process is already running at that point so: |
+ Injects the pid and the initial cwd into the trace header for context. |
+ The reason is that the child process is already running at that point so: |
- no proc_start() is logged for it. |
- there is no way to figure out the absolute path of cwd in kernel on OSX |
@@ -779,12 +784,9 @@ class Dtrace(ApiBase): |
'dtrace:::BEGIN {\n' |
' current_processes = 1;\n' |
' logindex = 0;\n' |
- ' trackedpid[') + pid + ('] = 1;\n' |
- ' printf("%d %d:%d chdir(\\"' + cwd + '\\") = 0\\n",\n' |
- ' logindex, 1, ' + pid + ');\n' |
- ' logindex++;\n' |
- ' printf("%d %d:%d %s_%s() = 0\\n",\n' |
- ' logindex, ppid, pid, probeprov, probename);\n' |
+ ' trackedpid[' + pid + '] = 1;\n' |
+ ' printf("%d %d:%d %s_%s(\\"' + cwd + '\\") = 0\\n",\n' |
+ ' logindex, ppid, ' + pid + ', probeprov, probename);\n' |
' logindex++;\n' |
'}\n') + cls.D_CODE |
@@ -793,6 +795,7 @@ class Dtrace(ApiBase): |
RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') |
# Arguments parsing. |
+ RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') |
RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
@@ -802,6 +805,14 @@ class Dtrace(ApiBase): |
class Process(ApiBase.Context.Process): |
pass |
+ def __init__(self, blacklist): |
+ super(Dtrace.Context, self).__init__(blacklist) |
+ # Process ID of the trace_child_process.py wrapper script instance. |
+ self._tracer_pid = None |
+ # First process to be started by self._tracer_pid. |
+ self._initial_pid = None |
+ self._initial_cwd = None |
+ |
def on_line(self, line): |
match = self.RE_HEADER.match(line) |
assert match, line |
@@ -816,23 +827,42 @@ class Dtrace(ApiBase): |
match.group(4), |
match.group(5)) |
- def handle_dtrace_BEGIN(self, _ppid, _pid, _function, args, _result): |
- pass |
+ def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
+ assert not self._tracer_pid and not self._initial_pid |
+ self._tracer_pid = pid |
+ self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
def handle_proc_start(self, ppid, pid, _function, _args, result): |
- """Transfers cwd.""" |
+ """Transfers cwd. |
+ |
+ The dtrace script already takes care of only tracing the processes that |
+ are child of the traced processes so there is no need to verify the |
+ process hierarchy. |
+ """ |
assert result == '0' |
- cwd = self.processes[ppid].cwd |
assert pid not in self.processes |
+ assert (ppid == self._tracer_pid) != bool(self._initial_pid) |
+ if not self._initial_pid: |
+ self._initial_pid = pid |
+ ppid = None |
+ cwd = self._initial_cwd |
+ else: |
+ parent = self.processes[ppid] |
+ cwd = parent.cwd |
proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) |
proc.cwd = cwd |
+ logging.debug('New child: %s -> %d' % (ppid, pid)) |
def handle_proc_exit(self, _ppid, pid, _function, _args, _result): |
"""Removes cwd.""" |
- self.processes[pid].cwd = None |
+ if pid != self._tracer_pid: |
+ # self._tracer_pid is not traced itself. |
+ self.processes[pid].cwd = None |
- def handle_chdir(self, ppid, pid, _function, args, result): |
+ def handle_chdir(self, _ppid, pid, _function, args, result): |
"""Updates cwd.""" |
+ assert self._tracer_pid |
+ assert pid in self.processes |
if result.startswith('0'): |
cwd = self.RE_CHDIR.match(args).group(1) |
if not cwd.startswith('/'): |
@@ -841,9 +871,7 @@ class Dtrace(ApiBase): |
else: |
logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
cwd2 = cwd |
- proc = self.processes.setdefault( |
- pid, self.Process(self, pid, cwd2, ppid)) |
- proc.cwd = cwd2 |
+ self.processes[pid].cwd = cwd2 |
else: |
assert False, 'Unexecpected fail: %s' % result |
@@ -996,6 +1024,8 @@ class Dtrace(ApiBase): |
class LogmanTrace(ApiBase): |
"""Uses the native Windows ETW based tracing functionality to trace a child |
process. |
+ |
+ Caveat: this implementations doesn't track cwd or initial_cwd. |
""" |
class Context(ApiBase.Context): |
"""Processes a ETW log line and keeps the list of existent and non |
@@ -1013,20 +1043,28 @@ class LogmanTrace(ApiBase): |
TIMESTAMP = 16 |
class Process(ApiBase.Context.Process): |
- pass |
+ def __init__(self, *args): |
+ super(LogmanTrace.Context.Process, self).__init__(*args) |
+ # Handle file objects that succeeded. |
+ self.file_objects = {} |
def __init__(self, blacklist): |
super(LogmanTrace.Context, self).__init__(blacklist) |
self._drive_map = DosDriveMap() |
- self._first_line = False |
# Threads mapping to the corresponding process id. |
self._threads_active = {} |
+ # Process ID of the tracer, e.g. tracer_inputs.py |
+ self._tracer_pid = None |
+ # First process to be started by self._tracer_pid. |
+ self._initial_pid = None |
+ self._line_number = 0 |
def on_csv_line(self, line): |
"""Processes a CSV Event line.""" |
# So much white space! |
line = [i.strip() for i in line] |
- if not self._first_line: |
+ self._line_number += 1 |
+ if self._line_number == 1: |
assert line == [ |
u'Event Name', |
u'Type', |
@@ -1049,7 +1087,6 @@ class LogmanTrace(ApiBase): |
u'User(ms)', # pretty much useless. |
u'User Data', |
] |
- self._first_line = True |
return |
# As you can see, the CSV is full of useful non-redundant information: |
@@ -1094,6 +1131,11 @@ class LogmanTrace(ApiBase): |
else: |
assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) |
+ def _thread_to_process(self, tid): |
+ """Finds the process from the thread id.""" |
+ tid = int(tid, 16) |
+ return self.processes.get(self._threads_active.get(tid)) |
+ |
@staticmethod |
def handle_EventTrace_Header(line): |
"""Verifies no event was dropped, e.g. no buffer overrun occured.""" |
@@ -1124,6 +1166,24 @@ class LogmanTrace(ApiBase): |
def handle_EventTrace_Any(self, line): |
pass |
+ def handle_FileIo_Cleanup(self, line): |
+ """General wisdom: if a file is closed, it's because it was opened. |
+ |
+ Note that FileIo_Close is not used since if a file was opened properly but |
+ not closed before the process exits, only Cleanup will be logged. |
+ """ |
+ #IRP = 19 |
+ TTID = 20 # Thread ID, that's what we want. |
+ FILE_OBJECT = 21 |
+ #FILE_KEY = 22 |
+ proc = self._thread_to_process(line[TTID]) |
+ if not proc: |
+ # Not a process we care about. |
+ return |
+ file_object = line[FILE_OBJECT] |
+ if file_object in proc.file_objects: |
+ proc.add_file(proc.file_objects.pop(file_object)) |
+ |
def handle_FileIo_Create(self, line): |
"""Handles a file open. |
@@ -1133,18 +1193,20 @@ class LogmanTrace(ApiBase): |
" (..) process and thread id values of the IO events (..) are not valid " |
http://msdn.microsoft.com/magazine/ee358703.aspx |
+ |
+ The FileIo.Create event doesn't return if the CreateFile() call |
+ succeeded, so keep track of the file_object and check that it is |
+ eventually closed with FileIo_Cleanup. |
""" |
#IRP = 19 |
TTID = 20 # Thread ID, that's what we want. |
- #FILE_OBJECT = 21 |
+ FILE_OBJECT = 21 |
#CREATE_OPTIONS = 22 |
#FILE_ATTRIBUTES = 23 |
#SHARE_ACCESS = 24 |
OPEN_PATH = 25 |
- # Find the process from the thread id. |
- tid = int(line[TTID], 16) |
- proc = self.processes.get(self._threads_active.get(tid)) |
+ proc = self._thread_to_process(line[TTID]) |
if not proc: |
# Not a process we care about. |
return |
@@ -1155,10 +1217,13 @@ class LogmanTrace(ApiBase): |
if raw_path.endswith(os.path.sep): |
return |
filename = self._drive_map.to_dos(raw_path) |
- # Ignore bare drive right away. |
- if len(raw_path) == 2: |
+ # Ignore bare drive right away. Some may still fall through with format |
+ # like '\\?\X:' |
+ if len(filename) == 2: |
return |
- proc.add_file(filename) |
+ file_object = line[FILE_OBJECT] |
+ # Override any stale file object |
+ proc.file_objects[file_object] = filename |
def handle_FileIo_Rename(self, line): |
# TODO(maruel): Handle? |
@@ -1190,8 +1255,8 @@ class LogmanTrace(ApiBase): |
if line[IMAGE_FILE_NAME] == '"logman.exe"': |
# logman's parent is trace_input.py or whatever tool using it as a |
# library. Trace any other children started by it. |
- assert ppid not in self.processes |
- self.processes[ppid] = self.Process(self, ppid, None, None) |
+ assert not self._tracer_pid |
+ self._tracer_pid = ppid |
logging.info('Found logman\'s parent at %d' % ppid) |
def handle_Process_End(self, line): |
@@ -1216,12 +1281,19 @@ class LogmanTrace(ApiBase): |
ppid = line[self.PID] |
pid = int(line[PROCESS_ID], 16) |
- if ppid in self.processes: |
+ if ppid == self._tracer_pid: |
# Need to ignore processes we don't know about because the log is |
# system-wide. |
if line[IMAGE_FILE_NAME] == '"logman.exe"': |
# Skip the shutdown call when "logman.exe stop" is executed. |
return |
+ self._initial_pid = self._initial_pid or pid |
+ assert pid not in self.processes |
+ self.processes[pid] = self.Process(self, pid, None, None) |
+ logging.info( |
+ 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) |
+ elif ppid in self.processes: |
+ # Grand-children |
assert pid not in self.processes |
self.processes[pid] = self.Process(self, pid, None, ppid) |
logging.info( |
@@ -1289,8 +1361,13 @@ class LogmanTrace(ApiBase): |
for i in list(self.IGNORED): |
self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) |
- # Add this one last since it has no short path name equivalent. |
+ # Add these last since they have no short path name equivalent. |
self.IGNORED.add('\\SystemRoot') |
+ # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
+ for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): |
+ self.IGNORED.add('%s\\\$' % letter) |
+ # TODO(maruel): Remove the need to add these. |
+ self.IGNORED.add('\\\\?\\%s\\$' % letter) |
self.IGNORED = tuple(sorted(self.IGNORED)) |
@staticmethod |