Index: tools/isolate/trace_inputs.py |
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
index 0ba03f0a7222a8c73407caff41b66edb777acb62..d4f8549ba72fb256dcf1d508d8ae20cc82510650 100755 |
--- a/tools/isolate/trace_inputs.py |
+++ b/tools/isolate/trace_inputs.py |
@@ -33,7 +33,7 @@ import weakref |
## OS-specific imports |
if sys.platform == 'win32': |
- from ctypes.wintypes import create_unicode_buffer |
+ from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p |
from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 |
from ctypes.wintypes import GetLastError # pylint: disable=E0611 |
elif sys.platform == 'darwin': |
@@ -173,6 +173,17 @@ if sys.platform == 'win32': |
return path |
+ def CommandLineToArgvW(command_line): |
+ """Splits a commandline into argv using CommandLineToArgvW().""" |
+ # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx |
+ size = c_int() |
+ ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size)) |
+ try: |
+ return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] |
+ finally: |
+ windll.kernel32.LocalFree(ptr) |
+ |
+ |
elif sys.platform == 'darwin': |
@@ -255,6 +266,74 @@ def cleanup_path(x): |
return x |
+def process_quoted_arguments(text): |
+ """Extracts quoted arguments on a string and return the arguments as a list. |
+ |
+ Implemented as an automaton. Supports incomplete strings in the form |
+ '"foo"...'. |
+ |
+ Example: |
+ With text = '"foo", "bar"', the function will return ['foo', 'bar'] |
+ |
+ TODO(maruel): Implement escaping. |
+ """ |
+ # All the possible states of the DFA. |
+ ( NEED_QUOTE, # Begining of a new arguments. |
+ INSIDE_STRING, # Inside an argument. |
+ NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be |
+ # a serie of 3 dots or a comma. |
+ NEED_SPACE, # Right after a comma |
+ NEED_DOT_2, # Found a dot, need a second one. |
+ NEED_DOT_3, # Found second dot, need a third one. |
+ NEED_COMMA, # Found third dot, need a comma. |
+ ) = range(7) |
+ |
+ state = NEED_QUOTE |
+ current_argument = '' |
+ out = [] |
+ for i in text: |
+ if i == '"': |
+ if state == NEED_QUOTE: |
+ state = INSIDE_STRING |
+ elif state == INSIDE_STRING: |
+ # The argument is now closed. |
+ out.append(current_argument) |
+ current_argument = '' |
+ state = NEED_COMMA_OR_DOT |
+ else: |
+ assert False, text |
+ elif i == ',': |
+ if state in (NEED_COMMA_OR_DOT, NEED_COMMA): |
+ state = NEED_SPACE |
+ else: |
+ assert False, text |
+ elif i == ' ': |
+ if state == NEED_SPACE: |
+ state = NEED_QUOTE |
+ if state == INSIDE_STRING: |
+ current_argument += i |
+ elif i == '.': |
+ if state == NEED_COMMA_OR_DOT: |
+ # The string is incomplete, this mean the strace -s flag should be |
+ # increased. |
+ state = NEED_DOT_2 |
+ elif state == NEED_DOT_2: |
+ state = NEED_DOT_3 |
+ elif state == NEED_DOT_3: |
+ state = NEED_COMMA |
+ elif state == INSIDE_STRING: |
+ current_argument += i |
+ else: |
+ assert False, text |
+ else: |
+ if state == INSIDE_STRING: |
+ current_argument += i |
+ else: |
+ assert False, text |
+ assert state in (NEED_COMMA, NEED_COMMA_OR_DOT) |
+ return out |
+ |
+ |
class ApiBase(object): |
"""OS-agnostic API to trace a process and its children.""" |
class Context(object): |
@@ -752,7 +831,10 @@ class Strace(ApiBase): |
if result != '0': |
return |
m = self.RE_EXECVE.match(args) |
- self._handle_file(m.group(1), result) |
+ filepath = m.group(1) |
+ self._handle_file(filepath, result) |
+ self.executable = self.RelativePath(self.get_cwd(), filepath) |
+ self.command = process_quoted_arguments(m.group(2)) |
def handle_exit_group(self, _function, _args, _result): |
"""Removes cwd.""" |
@@ -947,6 +1029,115 @@ class Dtrace(ApiBase): |
} |
/* Finally what we care about! */ |
+ syscall::exec*:entry /trackedpid[pid]/ { |
+ self->e_arg0 = copyinstr(arg0); |
+ /* Incrementally probe for a NULL in the argv parameter of execve() to |
+ * figure out argc. */ |
+ self->argc = 0; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ |
+ /* Copy the inputs strings since there is no guarantee they'll be |
+ * present after the call completed. */ |
+ self->args[0] = (self->argc > 0) ? copyinstr(self->argv[0]) : ""; |
+ self->args[1] = (self->argc > 1) ? copyinstr(self->argv[1]) : ""; |
+ self->args[2] = (self->argc > 2) ? copyinstr(self->argv[2]) : ""; |
+ self->args[3] = (self->argc > 3) ? copyinstr(self->argv[3]) : ""; |
+ self->args[4] = (self->argc > 4) ? copyinstr(self->argv[4]) : ""; |
+ self->args[5] = (self->argc > 5) ? copyinstr(self->argv[5]) : ""; |
+ self->args[6] = (self->argc > 6) ? copyinstr(self->argv[6]) : ""; |
+ self->args[7] = (self->argc > 7) ? copyinstr(self->argv[7]) : ""; |
+ self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : ""; |
+ self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : ""; |
+ } |
+ syscall::exec*: /trackedpid[pid] && errno == 0/ { |
+ /* We need to join strings here, as using multiple printf() would cause |
+ * tearing when multiple threads/processes are traced. */ |
+ this->args = ""; |
+ this->args = strjoin(this->args, (self->argc > 0) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 0) ? self->args[0] : ""); |
+ this->args = strjoin(this->args, (self->argc > 0) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 1) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 1) ? self->args[1] : ""); |
+ this->args = strjoin(this->args, (self->argc > 1) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 2) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 2) ? self->args[2] : ""); |
+ this->args = strjoin(this->args, (self->argc > 2) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 3) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 3) ? self->args[3] : ""); |
+ this->args = strjoin(this->args, (self->argc > 3) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 4) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 4) ? self->args[4] : ""); |
+ this->args = strjoin(this->args, (self->argc > 4) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 5) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 5) ? self->args[5] : ""); |
+ this->args = strjoin(this->args, (self->argc > 5) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 6) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 6) ? self->args[6] : ""); |
+ this->args = strjoin(this->args, (self->argc > 6) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 7) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 7) ? self->args[7] : ""); |
+ this->args = strjoin(this->args, (self->argc > 7) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 8) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 8) ? self->args[8] : ""); |
+ this->args = strjoin(this->args, (self->argc > 8) ? "\\"" : ""); |
+ |
+ this->args = strjoin(this->args, (self->argc > 9) ? ", \\"" : ""); |
+ this->args = strjoin(this->args, (self->argc > 9) ? self->args[9]: ""); |
+ this->args = strjoin(this->args, (self->argc > 9) ? "\\"" : ""); |
+ |
+ /* Prints self->argc to permits verifying the internal consistency since |
+ * this code is quite fishy. */ |
+ printf("%d %d:%d %s(\\"%s\\", [%d%s]) = %d\\n", |
+ logindex, ppid, pid, probefunc, |
+ self->e_arg0, |
+ self->argc, |
+ this->args, |
+ errno); |
+ logindex++; |
+ |
+ /* TODO(maruel): Clean up memory |
+ self->e_arg0 = 0; |
+ self->argc = 0; |
+ self->args[0] = 0; |
+ self->args[1] = 0; |
+ self->args[2] = 0; |
+ self->args[3] = 0; |
+ self->args[4] = 0; |
+ self->args[5] = 0; |
+ self->args[6] = 0; |
+ self->args[7] = 0; |
+ self->args[8] = 0; |
+ self->args[9] = 0; |
+ */ |
+ } |
+ |
syscall::open*:entry /trackedpid[pid]/ { |
self->arg0 = arg0; |
self->arg1 = arg1; |
@@ -1040,6 +1231,7 @@ class Dtrace(ApiBase): |
# Arguments parsing. |
RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
+ RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') |
RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
@@ -1109,6 +1301,23 @@ class Dtrace(ApiBase): |
# self._tracer_pid is not traced itself. |
self.processes[pid].cwd = None |
+ def handle_execve(self, _ppid, pid, _function, args, _result): |
+ """Sets the process' executable. |
+ |
+ TODO(maruel): Read command line arguments. See |
+ https://discussions.apple.com/thread/1980539 for an example. |
+ https://gist.github.com/1242279 |
+ |
+ Will have to put the answer at http://stackoverflow.com/questions/7556249. |
+ :) |
+ """ |
+ match = self.RE_EXECVE.match(args) |
+ assert match, args |
+ proc = self.processes[pid] |
+ proc.executable = match.group(1) |
+ proc.command = process_quoted_arguments(match.group(3)) |
+ assert int(match.group(2)) == len(proc.command), args |
+ |
def handle_chdir(self, _ppid, pid, _function, args, result): |
"""Updates cwd.""" |
assert self._tracer_pid |
@@ -1260,8 +1469,18 @@ class Dtrace(ApiBase): |
CPU. |
""" |
with open(logname, 'rb') as logfile: |
- lines = [f for f in logfile.readlines() if f.strip()] |
- lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) |
+ lines = [l for l in logfile if l.strip()] |
+ errors = [l for l in lines if l.startswith('dtrace:')] |
+ if errors: |
+ print >> sys.stderr, 'Failed to load: %s' % logname |
+ print >> sys.stderr, '\n'.join(errors) |
+ assert not errors, errors |
+ try: |
+ lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) |
+ except ValueError: |
+ print >> sys.stderr, 'Failed to load: %s' % logname |
+ print >> sys.stderr, '\n'.join(lines) |
+ raise |
with open(logname, 'wb') as logfile: |
logfile.write(''.join(lines)) |
@@ -1529,7 +1748,7 @@ class LogmanTrace(ApiBase): |
#DIRECTORY_TABLE_BASE = 24 |
#USER_SID = 25 |
IMAGE_FILE_NAME = 26 |
- #COMMAND_LINE = 27 |
+ COMMAND_LINE = 27 |
ppid = line[self.PID] |
pid = int(line[PROCESS_ID], 16) |
@@ -1540,16 +1759,32 @@ class LogmanTrace(ApiBase): |
# Skip the shutdown call when "logman.exe stop" is executed. |
return |
self._initial_pid = self._initial_pid or pid |
- assert pid not in self.processes |
- self.processes[pid] = self.Process(self, pid, None, None) |
- logging.info( |
- 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) |
- elif ppid in self.processes: |
- # Grand-children |
- assert pid not in self.processes |
- self.processes[pid] = self.Process(self, pid, None, ppid) |
- logging.info( |
- 'New child: %d -> %d %s' % (ppid, pid, line[IMAGE_FILE_NAME])) |
+ ppid = None |
+ elif ppid not in self.processes: |
+ # Ignore |
+ return |
+ assert pid not in self.processes |
+ proc = self.processes[pid] = self.Process(self, pid, None, ppid) |
+ # TODO(maruel): Process escapes. |
+ assert ( |
+ line[COMMAND_LINE].startswith('"') and |
+ line[COMMAND_LINE].endswith('"')) |
+ proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1]) |
+ assert ( |
+ line[IMAGE_FILE_NAME].startswith('"') and |
+ line[IMAGE_FILE_NAME].endswith('"')) |
+ proc.executable = line[IMAGE_FILE_NAME][1:-1] |
+ # proc.command[0] may be the absolute path of 'executable' but it may be |
+ # anything else too. If it happens that command[0] ends with executable, |
+ # use it, otherwise defaults to the base name. |
+ cmd0 = proc.command[0].lower() |
+ if not cmd0.endswith('.exe'): |
+ # TODO(maruel): That's not strictly true either. |
+ cmd0 += '.exe' |
+ if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): |
+ proc.executable = get_native_path_case(cmd0) |
+ logging.info( |
+ 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) |
def handle_Thread_End(self, line): |
"""Has the same parameters as Thread_Start.""" |
@@ -1615,11 +1850,6 @@ class LogmanTrace(ApiBase): |
# Add these last since they have no short path name equivalent. |
self.IGNORED.add('\\SystemRoot') |
- # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
- for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): |
- self.IGNORED.add('%s\\\$' % letter) |
- # TODO(maruel): Remove the need to add these. |
- self.IGNORED.add('\\\\?\\%s\\$' % letter) |
self.IGNORED = tuple(sorted(self.IGNORED)) |
@staticmethod |
@@ -1752,6 +1982,10 @@ class LogmanTrace(ApiBase): |
def parse_log(cls, filename, blacklist): |
logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
+ def blacklist_more(filepath): |
+ # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
+ return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
+ |
# Auto-detect the log format. |
with open(filename, 'rb') as f: |
hdr = f.read(2) |
@@ -1765,7 +1999,7 @@ class LogmanTrace(ApiBase): |
else: |
logformat = 'csv' |
- context = cls.Context(blacklist) |
+ context = cls.Context(blacklist_more) |
if logformat == 'csv_utf16': |
def utf_8_encoder(unicode_csv_data): |