Chromium Code Reviews| Index: tools/isolate/trace_inputs.py |
| diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
| index 140a9cab2aaeb83956fb597ce76df6ad98530258..cc4d4ed24ae2e0a10bfc5c64ba1fb8b5190d06d1 100755 |
| --- a/tools/isolate/trace_inputs.py |
| +++ b/tools/isolate/trace_inputs.py |
| @@ -383,7 +383,7 @@ class Strace(ApiBase): |
| # Arguments parsing. |
| RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
| - RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$') |
| + RE_EXECVE = re.compile(r'^\"(.+?)\", \[(.+)\], \[\/\* \d\d vars \*\/\]$') |
| RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$') |
| RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$') |
| RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
| @@ -503,13 +503,18 @@ class Strace(ApiBase): |
| child.initial_cwd = self.get_cwd() |
| assert child.parentid is None |
| child.parentid = self.pid |
| + # It is necessary because the logs are processed out of order. |
| + assert childpid not in self.children |
| self.children.append(childpid) |
| def handle_close(self, _function, _args, _result): |
| pass |
| def handle_execve(self, _function, args, result): |
| - self._handle_file(self.RE_EXECVE.match(args).group(1), result) |
| + if result != '0': |
| + return |
| + m = self.RE_EXECVE.match(args) |
| + self._handle_file(m.group(1), result) |
| def handle_exit_group(self, _function, _args, _result): |
| """Removes cwd.""" |
| @@ -765,8 +770,8 @@ class Dtrace(ApiBase): |
| def code(cls, pid, cwd): |
| """Setups the D code to implement child process tracking. |
| - Injects a fake chdir() trace to simplify parsing. The reason is that the |
| - child process is already running at that point so: |
| + Injects the pid and the initial cwd into the trace header for context. |
| + The reason is that the child process is already running at that point so: |
| - no proc_start() is logged for it. |
| - there is no way to figure out the absolute path of cwd in kernel on OSX |
| @@ -779,12 +784,9 @@ class Dtrace(ApiBase): |
| 'dtrace:::BEGIN {\n' |
| ' current_processes = 1;\n' |
| ' logindex = 0;\n' |
| - ' trackedpid[') + pid + ('] = 1;\n' |
| - ' printf("%d %d:%d chdir(\\"' + cwd + '\\") = 0\\n",\n' |
| - ' logindex, 1, ' + pid + ');\n' |
| - ' logindex++;\n' |
| - ' printf("%d %d:%d %s_%s() = 0\\n",\n' |
| - ' logindex, ppid, pid, probeprov, probename);\n' |
| + ' trackedpid[' + pid + '] = 1;\n' |
| + ' printf("%d %d:%d %s_%s(\\"' + cwd + '\\") = 0\\n",\n' |
| + ' logindex, ppid, ' + pid + ', probeprov, probename);\n' |
| ' logindex++;\n' |
| '}\n') + cls.D_CODE |
| @@ -793,6 +795,7 @@ class Dtrace(ApiBase): |
| RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') |
| # Arguments parsing. |
| + RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
| RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
| RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') |
| RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
| @@ -802,6 +805,14 @@ class Dtrace(ApiBase): |
| class Process(ApiBase.Context.Process): |
| pass |
| + def __init__(self, blacklist): |
| + super(Dtrace.Context, self).__init__(blacklist) |
| + # Process ID of the trace_child_process.py wrapper script instance. |
| + self._tracer_pid = None |
| + # First process to be started by self._tracer_pid. |
| + self._initial_pid = None |
| + self._initial_cwd = None |
| + |
| def on_line(self, line): |
| match = self.RE_HEADER.match(line) |
| assert match, line |
| @@ -816,23 +827,42 @@ class Dtrace(ApiBase): |
| match.group(4), |
| match.group(5)) |
| - def handle_dtrace_BEGIN(self, _ppid, _pid, _function, args, _result): |
| - pass |
| + def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
| + assert not self._tracer_pid and not self._initial_pid |
| + self._tracer_pid = pid |
| + self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
| def handle_proc_start(self, ppid, pid, _function, _args, result): |
| - """Transfers cwd.""" |
| + """Transfers cwd. |
| + |
| + The dtrace script already takes care of only tracing the processes that |
| + are child of the traced processes so there is no need to verify the |
| + process hierarchy. |
| + """ |
| assert result == '0' |
| - cwd = self.processes[ppid].cwd |
| assert pid not in self.processes |
| + assert (ppid == self._tracer_pid) != bool(self._initial_pid) |
| + if not self._initial_pid: |
| + self._initial_pid = pid |
| + ppid = None |
| + cwd = self._initial_cwd |
| + else: |
| + parent = self.processes[ppid] |
| + cwd = parent.cwd |
| proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) |
| proc.cwd = cwd |
| + logging.debug('New child: %s -> %d' % (ppid, pid)) |
| def handle_proc_exit(self, _ppid, pid, _function, _args, _result): |
| """Removes cwd.""" |
| - self.processes[pid].cwd = None |
| + if pid != self._tracer_pid: |
| + # self._tracer_pid is not traced itself. |
| + self.processes[pid].cwd = None |
| - def handle_chdir(self, ppid, pid, _function, args, result): |
| + def handle_chdir(self, _ppid, pid, _function, args, result): |
| """Updates cwd.""" |
| + assert self._tracer_pid |
| + assert pid in self.processes |
| if result.startswith('0'): |
| cwd = self.RE_CHDIR.match(args).group(1) |
| if not cwd.startswith('/'): |
| @@ -841,9 +871,7 @@ class Dtrace(ApiBase): |
| else: |
| logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
| cwd2 = cwd |
| - proc = self.processes.setdefault( |
| - pid, self.Process(self, pid, cwd2, ppid)) |
| - proc.cwd = cwd2 |
| + self.processes[pid].cwd = cwd2 |
| else: |
| assert False, 'Unexecpected fail: %s' % result |
| @@ -996,6 +1024,8 @@ class Dtrace(ApiBase): |
| class LogmanTrace(ApiBase): |
| """Uses the native Windows ETW based tracing functionality to trace a child |
| process. |
| + |
| + Caveat: this implementations doesn't track cwd or initial_cwd. |
| """ |
| class Context(ApiBase.Context): |
| """Processes a ETW log line and keeps the list of existent and non |
| @@ -1013,20 +1043,28 @@ class LogmanTrace(ApiBase): |
| TIMESTAMP = 16 |
| class Process(ApiBase.Context.Process): |
| - pass |
| + def __init__(self, *args): |
| + super(LogmanTrace.Context.Process, self).__init__(*args) |
| + # Handle file objects that succeeded. |
| + self.file_objects = {} |
| def __init__(self, blacklist): |
| super(LogmanTrace.Context, self).__init__(blacklist) |
| self._drive_map = DosDriveMap() |
| - self._first_line = False |
| # Threads mapping to the corresponding process id. |
| self._threads_active = {} |
| + # Process ID of the tracer, e.g. tracer_inputs.py |
| + self._tracer_pid = None |
| + # First process to be started by self._tracer_pid. |
| + self._initial_pid = None |
| + self._line_number = 0 |
| def on_csv_line(self, line): |
| """Processes a CSV Event line.""" |
| # So much white space! |
| line = [i.strip() for i in line] |
| - if not self._first_line: |
| + self._line_number += 1 |
| + if self._line_number == 1: |
| assert line == [ |
| u'Event Name', |
| u'Type', |
| @@ -1049,7 +1087,6 @@ class LogmanTrace(ApiBase): |
| u'User(ms)', # pretty much useless. |
| u'User Data', |
| ] |
| - self._first_line = True |
| return |
| # As you can see, the CSV is full of useful non-redundant information: |
| @@ -1094,6 +1131,11 @@ class LogmanTrace(ApiBase): |
| else: |
| assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) |
| + def _thread_to_process(self, tid): |
| + """Finds the process from the thread id.""" |
| + tid = int(tid, 16) |
| + return self.processes.get(self._threads_active.get(tid)) |
| + |
| @staticmethod |
| def handle_EventTrace_Header(line): |
| """Verifies no event was dropped, e.g. no buffer overrun occured.""" |
| @@ -1124,6 +1166,24 @@ class LogmanTrace(ApiBase): |
| def handle_EventTrace_Any(self, line): |
| pass |
| + def handle_FileIo_Cleanup(self, line): |
| + """General wisdom: if a file is closed, it's because it was opened. |
|
MAD
2012/05/30 15:39:00
:-)
M-A Ruel
2012/05/30 15:57:16
Note that it is a reference to line 1197, which do
|
| + |
| + Note that FileIo_Close is not used since if a file was opened properly but |
| + not closed before the process exits, only Cleanup will be logged. |
| + """ |
| + #IRP = 19 |
| + TTID = 20 # Thread ID, that's what we want. |
| + FILE_OBJECT = 21 |
| + #FILE_KEY = 22 |
| + proc = self._thread_to_process(line[TTID]) |
| + if not proc: |
| + # Not a process we care about. |
| + return |
| + file_object = line[FILE_OBJECT] |
| + if file_object in proc.file_objects: |
| + proc.add_file(proc.file_objects.pop(file_object)) |
| + |
| def handle_FileIo_Create(self, line): |
| """Handles a file open. |
| @@ -1133,18 +1193,19 @@ class LogmanTrace(ApiBase): |
| " (..) process and thread id values of the IO events (..) are not valid " |
| http://msdn.microsoft.com/magazine/ee358703.aspx |
| + |
| + The FileIo.Create event doesn't return if the CreateFile() call |
| + succeeded so keep track of the file_object and look if it is closed. |
|
MAD
2012/05/30 15:39:00
add a coma "," after succeeded please...
"look if
M-A Ruel
2012/05/30 15:57:16
Changed the wording a little.
|
| """ |
| #IRP = 19 |
| TTID = 20 # Thread ID, that's what we want. |
| - #FILE_OBJECT = 21 |
| + FILE_OBJECT = 21 |
| #CREATE_OPTIONS = 22 |
| #FILE_ATTRIBUTES = 23 |
| #SHARE_ACCESS = 24 |
| OPEN_PATH = 25 |
| - # Find the process from the thread id. |
| - tid = int(line[TTID], 16) |
| - proc = self.processes.get(self._threads_active.get(tid)) |
| + proc = self._thread_to_process(line[TTID]) |
| if not proc: |
| # Not a process we care about. |
| return |
| @@ -1155,10 +1216,13 @@ class LogmanTrace(ApiBase): |
| if raw_path.endswith(os.path.sep): |
| return |
| filename = self._drive_map.to_dos(raw_path) |
| - # Ignore bare drive right away. |
| - if len(raw_path) == 2: |
| + # Ignore bare drive right away. Some may still fall through with format |
| + # like '\\?\X:' |
| + if len(filename) == 2: |
| return |
| - proc.add_file(filename) |
| + file_object = line[FILE_OBJECT] |
| + # Override any stale file object |
| + proc.file_objects[file_object] = filename |
| def handle_FileIo_Rename(self, line): |
| # TODO(maruel): Handle? |
| @@ -1190,8 +1254,8 @@ class LogmanTrace(ApiBase): |
| if line[IMAGE_FILE_NAME] == '"logman.exe"': |
| # logman's parent is trace_input.py or whatever tool using it as a |
| # library. Trace any other children started by it. |
| - assert ppid not in self.processes |
| - self.processes[ppid] = self.Process(self, ppid, None, None) |
| + assert not self._tracer_pid |
| + self._tracer_pid = ppid |
| logging.info('Found logman\'s parent at %d' % ppid) |
| def handle_Process_End(self, line): |
| @@ -1200,6 +1264,7 @@ class LogmanTrace(ApiBase): |
| pid = line[self.PID] |
| if pid in self.processes: |
| logging.info('Terminated: %d' % pid) |
| + self.processes[pid].cwd = None |
| def handle_Process_Start(self, line): |
| """Handles a new child process started by PID.""" |
| @@ -1215,12 +1280,19 @@ class LogmanTrace(ApiBase): |
| ppid = line[self.PID] |
| pid = int(line[PROCESS_ID], 16) |
| - if ppid in self.processes: |
| + if ppid == self._tracer_pid: |
| # Need to ignore processes we don't know about because the log is |
| # system-wide. |
| if line[IMAGE_FILE_NAME] == '"logman.exe"': |
| # Skip the shutdown call when "logman.exe stop" is executed. |
| return |
| + self._initial_pid = self._initial_pid or pid |
| + assert pid not in self.processes |
| + self.processes[pid] = self.Process(self, pid, None, None) |
| + logging.info( |
| + 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) |
| + elif ppid in self.processes: |
| + # Grand-children |
| assert pid not in self.processes |
| self.processes[pid] = self.Process(self, pid, None, ppid) |
| logging.info( |
| @@ -1288,8 +1360,13 @@ class LogmanTrace(ApiBase): |
| for i in list(self.IGNORED): |
| self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) |
| - # Add this one last since it has no short path name equivalent. |
| + # Add these last since they have no short path name equivalent. |
| self.IGNORED.add('\\SystemRoot') |
| + # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
| + for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): |
| + self.IGNORED.add('%s\\\$' % letter) |
| + # TODO(maruel): Remove the need to add these. |
| + self.IGNORED.add('\\\\?\\%s\\$' % letter) |
| self.IGNORED = tuple(sorted(self.IGNORED)) |
| @staticmethod |