Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Unified Diff: tools/isolate/trace_inputs.py

Issue 10448044: Use trace_child_process.py on Windows and save information about the trace in json. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Added more comments Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/isolate/trace_child_process.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/isolate/trace_inputs.py
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py
index d4f8549ba72fb256dcf1d508d8ae20cc82510650..bd1efddda18af3bcb475923b8309869d5e8c9171 100755
--- a/tools/isolate/trace_inputs.py
+++ b/tools/isolate/trace_inputs.py
@@ -1394,7 +1394,9 @@ class Dtrace(ApiBase):
stdout = subprocess.PIPE
stderr = subprocess.STDOUT
child_cmd = [
- sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'),
+ sys.executable,
+ os.path.join(BASE_DIR, 'trace_child_process.py'),
+ '--wait',
]
child = subprocess.Popen(
child_cmd + cmd,
@@ -1489,7 +1491,12 @@ class LogmanTrace(ApiBase):
"""Uses the native Windows ETW based tracing functionality to trace a child
process.
- Caveat: this implementations doesn't track cwd or initial_cwd.
+ Caveat: this implementations doesn't track cwd or initial_cwd. It is because
+ the Windows Kernel doesn't have a concept of 'current working directory' at
+ all. A Win32 process has a map of current directories, one per drive letter
+ and it is managed by the user mode kernel32.dll. In kernel, a file is always
+ opened relative to another file_object or as an absolute path. All the current
+ working directory logic is done in user mode.
"""
class Context(ApiBase.Context):
"""Processes a ETW log line and keeps the list of existent and non
@@ -1512,14 +1519,15 @@ class LogmanTrace(ApiBase):
# Handle file objects that succeeded.
self.file_objects = {}
- def __init__(self, blacklist):
+ def __init__(self, blacklist, tracer_pid):
super(LogmanTrace.Context, self).__init__(blacklist)
self._drive_map = DosDriveMap()
# Threads mapping to the corresponding process id.
self._threads_active = {}
# Process ID of the tracer, e.g. tracer_inputs.py
- self._tracer_pid = None
- # First process to be started by self._tracer_pid.
+ self._tracer_pid = tracer_pid
+ # First process to be started by self._tracer_pid is the executable
+ # traced.
self._initial_pid = None
self._line_number = 0
@@ -1706,33 +1714,7 @@ class LogmanTrace(ApiBase):
def handle_Process_Any(self, line):
pass
- def handle_Process_DCStart(self, line):
- """Gives historic information about the process tree.
-
- Use it to extract the pid of the trace_inputs.py parent process that
- started logman.exe.
- """
- #UNIQUE_PROCESS_KEY = 19
- #PROCESS_ID = 20
- PARENT_PID = 21
- #SESSION_ID = 22
- #EXIT_STATUS = 23
- #DIRECTORY_TABLE_BASE = 24
- #USER_SID = 25
- IMAGE_FILE_NAME = 26
- #COMMAND_LINE = 27
-
- ppid = int(line[PARENT_PID], 16)
- if line[IMAGE_FILE_NAME] == '"logman.exe"':
- # logman's parent is trace_input.py or whatever tool using it as a
- # library. Trace any other children started by it.
- assert not self._tracer_pid
- self._tracer_pid = ppid
- logging.info('Found logman\'s parent at %d' % ppid)
-
def handle_Process_End(self, line):
- # Look if it is logman terminating, if so, grab the parent's process pid
- # and inject cwd.
pid = line[self.PID]
if pid in self.processes:
logging.info('Terminated: %d' % pid)
@@ -1754,11 +1736,9 @@ class LogmanTrace(ApiBase):
pid = int(line[PROCESS_ID], 16)
if ppid == self._tracer_pid:
# Need to ignore processes we don't know about because the log is
- # system-wide.
- if line[IMAGE_FILE_NAME] == '"logman.exe"':
- # Skip the shutdown call when "logman.exe stop" is executed.
- return
- self._initial_pid = self._initial_pid or pid
+ # system-wide. self._tracer_pid shall start only one process.
+ assert not self._initial_pid
+ self._initial_pid = pid
ppid = None
elif ppid not in self.processes:
# Ignore
@@ -1854,10 +1834,9 @@ class LogmanTrace(ApiBase):
@staticmethod
def clean_trace(logname):
- if os.path.isfile(logname):
- os.remove(logname)
- if os.path.isfile(logname + '.etl'):
- os.remove(logname + '.etl')
+ for ext in ('', '.csv', '.etl', '.xml'):
+ if os.path.isfile(logname + ext):
+ os.remove(logname + ext)
@classmethod
def _start_log(cls, etl):
@@ -1914,52 +1893,31 @@ class LogmanTrace(ApiBase):
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
- @classmethod
- def gen_trace(cls, cmd, cwd, logname, output):
- """Uses logman.exe to start and stop the NT Kernel Logger while the
- executable to be traced is run.
- """
- logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))
- # Use "logman -?" for help.
-
- etl = logname + '.etl'
-
- stdout = stderr = None
- if output:
- stdout = subprocess.PIPE
- stderr = subprocess.STDOUT
+ @staticmethod
+ def _convert_log(logname, logformat, stdout, stderr):
+ """Converts the ETL trace to text representation.
- # 1. Start the log collection.
- cls._start_log(etl)
+ Normally, 'csv' is sufficient. If complex scripts are used (like eastern
+ languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
- # 2. Run the child process.
- logging.debug('Running: %s' % cmd)
- try:
- child = subprocess.Popen(
- cmd, cwd=cwd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)
- out = child.communicate()[0]
- finally:
- # 3. Stop the log collection.
- cls._stop_log()
+ Arguments:
+ - logname: Base filename to convert.
+ - logformat: Text format to be generated, csv, csv_utf16 or xml.
- # 4. Convert the traces to text representation.
- # Use "tracerpt -?" for help.
+ Use "tracerpt -?" for help.
+ """
LOCALE_INVARIANT = 0x7F
windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
cmd_convert = [
'tracerpt.exe',
- '-l', etl,
- '-o', logname,
+ '-l', logname + '.etl',
+ '-o', logname + '.' + logformat,
'-gmt', # Use UTC
'-y', # No prompt
# Use -of XML to get the header of each items after column 19, e.g. all
# the actual headers of 'User Data'.
]
- # Normally, 'csv' is sufficient. If complex scripts are used (like eastern
- # languages), use 'csv_unicode'. If localization gets in the way, use 'xml'.
- logformat = 'csv'
-
if logformat == 'csv':
# tracerpt localizes the 'Type' column, for major brainfuck
# entertainment. I can't imagine any sane reason to do that.
@@ -1973,9 +1931,58 @@ class LogmanTrace(ApiBase):
else:
assert False, logformat
logging.debug('Running: %s' % cmd_convert)
+ # This can takes tens of minutes for large logs.
subprocess.check_call(
cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)
+ @classmethod
+ def gen_trace(cls, cmd, cwd, logname, output):
+ """Uses logman.exe to start and stop the NT Kernel Logger while the
+ executable to be traced is run.
+ """
+ logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))
+ # Use "logman -?" for help.
+
+ stdout = stderr = None
+ if output:
+ stdout = subprocess.PIPE
+ stderr = subprocess.STDOUT
+
+ # 1. Start the log collection.
+ cls._start_log(logname + '.etl')
+
+ # 2. Run the child process.
+ logging.debug('Running: %s' % cmd)
+ try:
+ # Use trace_child_process.py so we have a clear pid owner. Since
+ # trace_inputs.py can be used as a library and could trace mulitple
+ # processes simultaneously, it makes it more complex if the executable to
+ # be traced is executed directly here. It also solves issues related to
+ # logman.exe that needs to be executed to control the kernel trace.
+ child_cmd = [
+ sys.executable,
+ os.path.join(BASE_DIR, 'trace_child_process.py'),
+ ]
+ child = subprocess.Popen(
+ child_cmd + cmd,
+ cwd=cwd,
+ stdin=subprocess.PIPE,
+ stdout=stdout,
+ stderr=stderr)
+ logging.debug('Started child pid: %d' % child.pid)
+ out = child.communicate()[0]
+ finally:
+ # 3. Stop the log collection.
+ cls._stop_log()
+
+ # 4. Convert the traces to text representation.
+ cls._convert_log(logname, 'csv', stdout, stderr)
+
+ # 5. Save metadata.
+ json.dump({
+ 'pid': child.pid,
+ 'format': 'csv',
+ }, open(logname, 'w'))
return child.returncode, out
@classmethod
@@ -1986,20 +1993,10 @@ class LogmanTrace(ApiBase):
# All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
- # Auto-detect the log format.
- with open(filename, 'rb') as f:
- hdr = f.read(2)
- assert len(hdr) == 2
- if hdr == '<E':
- # It starts with <Events>.
- logformat = 'xml'
- elif hdr == '\xFF\xEF':
- # utf-16 BOM.
- logformat = 'csv_utf16'
- else:
- logformat = 'csv'
+ data = json.load(open(filename))
+ logformat = data['format']
- context = cls.Context(blacklist_more)
+ context = cls.Context(blacklist_more, data['pid'])
if logformat == 'csv_utf16':
def utf_8_encoder(unicode_csv_data):
@@ -2018,7 +2015,8 @@ class LogmanTrace(ApiBase):
# python internal unicode format (utf-8). Then explicitly re-encode as
# utf8 as str instances so csv can parse it fine. Then decode the utf-8
# str back into python unicode instances. This sounds about right.
- for line in unicode_csv_reader(codecs.open(filename, 'r', 'utf-16')):
+ for line in unicode_csv_reader(
+ codecs.open(filename + '.' + logformat, 'r', 'utf-16')):
# line is a list of unicode objects
context.on_csv_line(line)
@@ -2033,7 +2031,7 @@ class LogmanTrace(ApiBase):
# The fastest and smallest format but only supports 'ANSI' file paths.
# E.g. the filenames are encoding in the 'current' encoding.
- for line in ansi_csv_reader(open(filename)):
+ for line in ansi_csv_reader(open(filename + '.' + logformat)):
# line is a list of unicode objects.
context.on_csv_line(line)
« no previous file with comments | « tools/isolate/trace_child_process.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698