Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Unified Diff: tools/isolate/trace_inputs.py

Issue 10440073: Reduce the amount of log generated with dtrace. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/isolate/trace_inputs.py
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py
index a298c4004e9f1ce2bf0c414380b865dd1804742b..ca8cf7ad83eb587958bfe55ee8d656d309fc9a5e 100755
--- a/tools/isolate/trace_inputs.py
+++ b/tools/isolate/trace_inputs.py
@@ -11,8 +11,9 @@ The implementation uses OS-specific API. The native Kernel logger and the ETL
interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
The OS-specific implementation is hidden in an 'API' interface.
-The list is done in two phases, the first is to do the actual trace and generate
-an technique-specific log file. Then the log file is parsed to extract the
+The results are embedded in a Results instance. The tracing is done in two
+phases, the first is to do the actual trace and generate an
+implementation-specific log file. Then the log file is parsed to extract the
information, including the individual child processes and the files accessed
from the log.
"""
@@ -977,7 +978,11 @@ class Dtrace(ApiBase):
get the absolute path of the 'cwd' dtrace variable from the probe.
Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
- https://discussions.apple.com/thread/1980539.
+ https://discussions.apple.com/thread/1980539. So resort to handling execve()
+ manually.
+
+ errno is not printed in the log since this implementation currently care only
MAD 2012/06/28 13:04:57 care only -> only cares
+ about files that were successfully opened.
"""
IGNORED = (
'/.vol',
@@ -992,27 +997,58 @@ class Dtrace(ApiBase):
)
# pylint: disable=C0301
+ #
# To understand the following code, you'll want to take a look at:
# http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
# https://wikis.oracle.com/display/DTrace/Variables
# http://docs.oracle.com/cd/E19205-01/820-4221/
#
- # The list of valid probes can be retrieved with:
- # sudo dtrace -l -P syscall | less
+ # 0. Dump all the valid probes into a text file. It is important, you
+ # want to redirect into a file and you don't want to constantly 'sudo'.
+ # $ sudo dtrace -l > probes.txt
+ #
+ # 1. Count the number of probes:
+ # $ wc -l probes.txt
+ # 81823 # On OSX 10.7, including 1 header line.
+ #
+ # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
+ # likes and skipping the header with NR>1:
+ # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
+ # dtrace
+ # fbt
+ # io
+ # ip
+ # lockstat
+ # mach_trap
+ # proc
+ # profile
+ # sched
+ # syscall
+ # tcp
+ # vminfo
+ #
+ # 3. List of valid probes:
+ # $ grep syscall probes.txt | less
+ # or use dtrace directly:
+ # $ sudo dtrace -l -P syscall | less
+
D_CODE = """
proc:::start /trackedpid[ppid]/ {
trackedpid[pid] = 1;
current_processes += 1;
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
- logindex, ppid, pid, probeprov, probename, execname,
+ printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
+ logindex, pid, probeprov, probename,
+ ppid,
+ execname,
current_processes);
logindex++;
}
proc:::exit /trackedpid[pid] && current_processes == 1/ {
+ /* Last process is exiting. */
trackedpid[pid] = 0;
current_processes -= 1;
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
- logindex, ppid, pid, probeprov, probename, execname,
+ printf("%d %d %s_%s(%d)\\n",
+ logindex, pid, probeprov, probename,
current_processes);
logindex++;
exit(0);
@@ -1020,17 +1056,19 @@ class Dtrace(ApiBase):
proc:::exit /trackedpid[pid]/ {
trackedpid[pid] = 0;
current_processes -= 1;
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
- logindex, ppid, pid, probeprov, probename, execname,
+ printf("%d %d %s_%s(%d)\\n",
+ logindex, pid, probeprov, probename,
current_processes);
logindex++;
}
/* Finally what we care about! */
- syscall::exec*:entry /trackedpid[pid]/ {
+ syscall::exec*:entry /trackedpid[pid] == 1/ {
self->e_arg0 = copyinstr(arg0);
/* Incrementally probe for a NULL in the argv parameter of execve() to
* figure out argc. */
+ /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
+ * found. */
self->argc = 0;
self->argv = (user_addr_t*)copyin(
arg1, sizeof(user_addr_t) * (self->argc + 1));
@@ -1052,6 +1090,15 @@ class Dtrace(ApiBase):
self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc;
self->argv = (user_addr_t*)copyin(
arg1, sizeof(user_addr_t) * (self->argc + 1));
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc;
+ self->argv = (user_addr_t*)copyin(
+ arg1, sizeof(user_addr_t) * (self->argc + 1));
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc;
+ self->argv = (user_addr_t*)copyin(
+ arg1, sizeof(user_addr_t) * (self->argc + 1));
MAD 2012/06/28 13:04:57 I guess you can't use a loop for this... A bit sad
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc;
+ self->argv = (user_addr_t*)copyin(
+ arg1, sizeof(user_addr_t) * (self->argc + 1));
/* Copy the inputs strings since there is no guarantee they'll be
* present after the call completed. */
@@ -1066,7 +1113,7 @@ class Dtrace(ApiBase):
self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : "";
self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : "";
}
- syscall::exec*: /trackedpid[pid] && errno == 0/ {
+ syscall::exec*:return /trackedpid[pid] == 1 && errno == 0/ {
/* We need to join strings here, as using multiple printf() would cause
* tearing when multiple threads/processes are traced. */
this->args = "";
@@ -1112,15 +1159,14 @@ class Dtrace(ApiBase):
/* Prints self->argc to permits verifying the internal consistency since
* this code is quite fishy. */
- printf("%d %d:%d %s(\\"%s\\", [%d%s]) = %d\\n",
- logindex, ppid, pid, probefunc,
+ printf("%d %d %s(\\"%s\\", [%d%s])\\n",
+ logindex, pid, probefunc,
self->e_arg0,
self->argc,
- this->args,
- errno);
+ this->args);
logindex++;
-
- /* TODO(maruel): Clean up memory
+ }
+ syscall::exec*:return /trackedpid[pid] == 1/ {
self->e_arg0 = 0;
self->argc = 0;
self->args[0] = 0;
@@ -1133,46 +1179,89 @@ class Dtrace(ApiBase):
self->args[7] = 0;
self->args[8] = 0;
self->args[9] = 0;
- */
}
- syscall::open*:entry /trackedpid[pid]/ {
- self->arg0 = arg0;
- self->arg1 = arg1;
- self->arg2 = arg2;
+ /* These are a good learning experience, since it traces a lot of things
MAD 2012/06/28 13:04:57 These are -> This is
+ * related to the process and child processes.
+ * Warning: it generates a gigantic log. For example, tracing
+ * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
+ * several minutes to execute.
+ */
+ /*
+ mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
+ printf("%d %d %s_%s() = %d\\n",
+ logindex, pid, probeprov, probefunc, errno);
+ logindex++;
}
- syscall::open*:return /trackedpid[pid] && errno == 0/ {
- printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n",
- logindex, ppid, pid, probefunc, copyinstr(self->arg0),
- self->arg1, self->arg2, errno);
+ proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
+ printf("%d %d %s_%s() = %d\\n",
+ logindex, pid, probeprov, probefunc, errno);
logindex++;
- self->arg0 = 0;
- self->arg1 = 0;
- self->arg2 = 0;
}
+ sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
+ printf("%d %d %s_%s() = %d\\n",
+ logindex, pid, probeprov, probefunc, errno);
+ logindex++;
+ }
+ syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
+ printf("%d %d %s_%s() = %d\\n",
+ logindex, pid, probeprov, probefunc, errno);
+ logindex++;
+ }
+ vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
+ printf("%d %d %s_%s() = %d\\n",
+ logindex, pid, probeprov, probefunc, errno);
+ logindex++;
+ }
+ */
- syscall::rename:entry /trackedpid[pid]/ {
- self->arg0 = arg0;
- self->arg1 = arg1;
+ syscall::open*:entry /trackedpid[pid] == 1/ {
+ self->open_arg0 = arg0;
+ self->open_arg1 = arg1;
+ self->open_arg2 = arg2;
}
- syscall::rename:return /trackedpid[pid]/ {
- printf("%d %d:%d %s(\\"%s\\", \\"%s\\") = %d\\n",
- logindex, ppid, pid, probefunc, copyinstr(self->arg0),
- copyinstr(self->arg1), errno);
+ syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
+ printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
+ logindex, pid, probefunc,
+ copyinstr(self->open_arg0),
+ self->open_arg1,
+ self->open_arg2);
logindex++;
- self->arg0 = 0;
- self->arg1 = 0;
+ }
+ syscall::open*:return /trackedpid[pid] == 1/ {
+ self->open_arg0 = 0;
+ self->open_arg1 = 0;
+ self->open_arg2 = 0;
+ }
+
+ syscall::rename:entry /trackedpid[pid] == 1/ {
+ self->rename_arg0 = arg0;
+ self->rename_arg1 = arg1;
+ }
+ syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
+ printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
+ logindex, pid, probefunc,
+ copyinstr(self->rename_arg0),
+ copyinstr(self->rename_arg1));
+ logindex++;
+ }
+ syscall::rename:return /trackedpid[pid] == 1/ {
+ self->rename_arg0 = 0;
+ self->rename_arg1 = 0;
}
/* Track chdir, it's painful because it is only receiving relative path */
- syscall::chdir:entry /trackedpid[pid]/ {
- self->arg0 = arg0;
+ syscall::chdir:entry /trackedpid[pid] == 1/ {
+ self->chdir_arg0 = arg0;
}
- syscall::chdir:return /trackedpid[pid] && errno == 0/ {
- printf("%d %d:%d %s(\\"%s\\") = %d\\n",
- logindex, ppid, pid, probefunc, copyinstr(self->arg0), errno);
+ syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
+ printf("%d %d %s(\\"%s\\")\\n",
+ logindex, pid, probefunc,
+ copyinstr(self->chdir_arg0));
logindex++;
- self->arg0 = 0;
+ }
+ syscall::chdir:return /trackedpid[pid] == 1/ {
+ self->chdir_arg0 = 0;
}
/* TODO(maruel): *stat* functions and friends
syscall::access:return,
@@ -1203,12 +1292,14 @@ class Dtrace(ApiBase):
"""Setups the D code to implement child process tracking.
Injects the pid and the initial cwd into the trace header for context.
- The reason is that the child process is already running at that point so:
+ The reason is that the child process trace_child_process.py is already
+ running at that point so:
- no proc_start() is logged for it.
- there is no way to figure out the absolute path of cwd in kernel on OSX
Since the child process is already started, initialize current_processes to
1.
+
"""
pid = str(pid)
cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%')
@@ -1216,21 +1307,28 @@ class Dtrace(ApiBase):
'dtrace:::BEGIN {\n'
' current_processes = 1;\n'
' logindex = 0;\n'
- ' trackedpid[' + pid + '] = 1;\n'
- ' printf("%d %d:%d %s_%s(\\"' + cwd + '\\") = 0\\n",\n'
- ' logindex, ppid, ' + pid + ', probeprov, probename);\n'
+ # trackedpid is an associative array where its value can be 0, 1 or 2.
+ # 0 is for untracked processes and is the default value for items not
+ # in the associative array.
+ # 1 is for tracked processes.
+ # 2 is for trace_child_process.py only. It is not tracked itself but
+ # all its decendants are.
+ ' trackedpid[' + pid + '] = 2;\n'
+ ' printf("%d %d %s_%s(\\"' + cwd + '\\")\\n",\n'
+ ' logindex, ' + pid + ', probeprov, probename);\n'
' logindex++;\n'
'}\n') + cls.D_CODE
class Context(ApiBase.Context):
- # This is the most common format. index pid function(args) = result
- RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$')
+ # This is the most common format. index pid function(args)
+ RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
# Arguments parsing.
RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
RE_CHDIR = re.compile(r'^\"(.+?)\"$')
RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
- RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$')
+ RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
+ RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
O_DIRECTORY = 0x100000
@@ -1251,14 +1349,11 @@ class Dtrace(ApiBase):
assert match, line
fn = getattr(
self,
- 'handle_%s' % match.group(3).replace('-', '_'),
+ 'handle_%s' % match.group(2).replace('-', '_'),
self._handle_ignored)
return fn(
int(match.group(1)),
- int(match.group(2)),
- match.group(3),
- match.group(4),
- match.group(5))
+ match.group(3))
def to_results(self):
"""Uses self._initial_pid to determine the initial process."""
@@ -1267,20 +1362,21 @@ class Dtrace(ApiBase):
sorted(self.processes), sorted(p.pid for p in process.all))
return Results(process)
- def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result):
+ def handle_dtrace_BEGIN(self, pid, args):
assert not self._tracer_pid and not self._initial_pid
self._tracer_pid = pid
self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1)
- def handle_proc_start(self, ppid, pid, _function, _args, result):
+ def handle_proc_start(self, pid, args):
"""Transfers cwd.
The dtrace script already takes care of only tracing the processes that
are child of the traced processes so there is no need to verify the
process hierarchy.
"""
- assert result == '0'
assert pid not in self.processes
+ match = self.RE_PROC_START.match(args)
+ ppid = int(match.group(1))
assert (ppid == self._tracer_pid) != bool(self._initial_pid)
if not self._initial_pid:
self._initial_pid = pid
@@ -1293,13 +1389,13 @@ class Dtrace(ApiBase):
proc.cwd = cwd
logging.debug('New child: %s -> %d' % (ppid, pid))
- def handle_proc_exit(self, _ppid, pid, _function, _args, _result):
+ def handle_proc_exit(self, pid, _args):
"""Removes cwd."""
if pid != self._tracer_pid:
# self._tracer_pid is not traced itself.
self.processes[pid].cwd = None
- def handle_execve(self, _ppid, pid, _function, args, _result):
+ def handle_execve(self, pid, args):
"""Sets the process' executable.
TODO(maruel): Read command line arguments. See
@@ -1316,43 +1412,38 @@ class Dtrace(ApiBase):
proc.command = process_quoted_arguments(match.group(3))
assert int(match.group(2)) == len(proc.command), args
- def handle_chdir(self, _ppid, pid, _function, args, result):
+ def handle_chdir(self, pid, args):
"""Updates cwd."""
assert self._tracer_pid
assert pid in self.processes
- if result.startswith('0'):
- cwd = self.RE_CHDIR.match(args).group(1)
- if not cwd.startswith('/'):
- cwd2 = os.path.join(self.processes[pid].cwd, cwd)
- logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
- else:
- logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
- cwd2 = cwd
- self.processes[pid].cwd = cwd2
+ cwd = self.RE_CHDIR.match(args).group(1)
+ if not cwd.startswith('/'):
+ cwd2 = os.path.join(self.processes[pid].cwd, cwd)
+ logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
else:
- assert False, 'Unexecpected fail: %s' % result
+ logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
+ cwd2 = cwd
+ self.processes[pid].cwd = cwd2
- def handle_open_nocancel(self, ppid, pid, function, args, result):
- return self.handle_open(ppid, pid, function, args, result)
+ def handle_open_nocancel(self, pid, args):
+ return self.handle_open(pid, args)
- def handle_open(self, _ppid, pid, function, args, result):
+ def handle_open(self, pid, args):
match = self.RE_OPEN.match(args)
- assert match, (pid, function, args, result)
- args = match.groups()
- flag = int(args[1])
+ assert match, (pid, args)
+ flag = int(match.group(2), 16)
if self.O_DIRECTORY & flag == self.O_DIRECTORY:
# Ignore directories.
return
- self._handle_file(pid, args[0], result)
+ self._handle_file(pid, match.group(1))
- def handle_rename(self, _ppid, pid, _function, args, result):
- args = self.RE_RENAME.match(args).groups()
- self._handle_file(pid, args[0], result)
- self._handle_file(pid, args[1], result)
+ def handle_rename(self, pid, args):
+ match = self.RE_RENAME.match(args)
+ assert match, (pid, args)
+ self._handle_file(pid, match.group(1))
+ self._handle_file(pid, match.group(2))
- def _handle_file(self, pid, filepath, result):
- if result.startswith(('-1', '2')):
- return
+ def _handle_file(self, pid, filepath):
if not filepath.startswith('/'):
filepath = os.path.join(self.processes[pid].cwd, filepath)
# We can get '..' in the path.
@@ -1364,8 +1455,9 @@ class Dtrace(ApiBase):
self.processes[pid].add_file(filepath)
@staticmethod
- def _handle_ignored(_ppid, pid, function, args, result):
- logging.debug('%d %s(%s) = %s' % (pid, function, args, result))
+ def _handle_ignored(pid, args):
+ """Is called for all the event traces that are not handled."""
+ assert False, '%d ??(%s)' % (pid, args)
@staticmethod
def clean_trace(logname):
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698