Index: tools/isolate/trace_inputs.py |
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
index d4f8549ba72fb256dcf1d508d8ae20cc82510650..bd1efddda18af3bcb475923b8309869d5e8c9171 100755 |
--- a/tools/isolate/trace_inputs.py |
+++ b/tools/isolate/trace_inputs.py |
@@ -1394,7 +1394,9 @@ class Dtrace(ApiBase): |
stdout = subprocess.PIPE |
stderr = subprocess.STDOUT |
child_cmd = [ |
- sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), |
+ sys.executable, |
+ os.path.join(BASE_DIR, 'trace_child_process.py'), |
+ '--wait', |
] |
child = subprocess.Popen( |
child_cmd + cmd, |
@@ -1489,7 +1491,12 @@ class LogmanTrace(ApiBase): |
"""Uses the native Windows ETW based tracing functionality to trace a child |
process. |
- Caveat: this implementations doesn't track cwd or initial_cwd. |
+ Caveat: this implementations doesn't track cwd or initial_cwd. It is because |
+ the Windows Kernel doesn't have a concept of 'current working directory' at |
+ all. A Win32 process has a map of current directories, one per drive letter |
+ and it is managed by the user mode kernel32.dll. In kernel, a file is always |
+ opened relative to another file_object or as an absolute path. All the current |
+ working directory logic is done in user mode. |
""" |
class Context(ApiBase.Context): |
"""Processes a ETW log line and keeps the list of existent and non |
@@ -1512,14 +1519,15 @@ class LogmanTrace(ApiBase): |
# Handle file objects that succeeded. |
self.file_objects = {} |
- def __init__(self, blacklist): |
+ def __init__(self, blacklist, tracer_pid): |
super(LogmanTrace.Context, self).__init__(blacklist) |
self._drive_map = DosDriveMap() |
# Threads mapping to the corresponding process id. |
self._threads_active = {} |
# Process ID of the tracer, e.g. tracer_inputs.py |
- self._tracer_pid = None |
- # First process to be started by self._tracer_pid. |
+ self._tracer_pid = tracer_pid |
+ # First process to be started by self._tracer_pid is the executable |
+ # traced. |
self._initial_pid = None |
self._line_number = 0 |
@@ -1706,33 +1714,7 @@ class LogmanTrace(ApiBase): |
def handle_Process_Any(self, line): |
pass |
- def handle_Process_DCStart(self, line): |
- """Gives historic information about the process tree. |
- |
- Use it to extract the pid of the trace_inputs.py parent process that |
- started logman.exe. |
- """ |
- #UNIQUE_PROCESS_KEY = 19 |
- #PROCESS_ID = 20 |
- PARENT_PID = 21 |
- #SESSION_ID = 22 |
- #EXIT_STATUS = 23 |
- #DIRECTORY_TABLE_BASE = 24 |
- #USER_SID = 25 |
- IMAGE_FILE_NAME = 26 |
- #COMMAND_LINE = 27 |
- |
- ppid = int(line[PARENT_PID], 16) |
- if line[IMAGE_FILE_NAME] == '"logman.exe"': |
- # logman's parent is trace_input.py or whatever tool using it as a |
- # library. Trace any other children started by it. |
- assert not self._tracer_pid |
- self._tracer_pid = ppid |
- logging.info('Found logman\'s parent at %d' % ppid) |
- |
def handle_Process_End(self, line): |
- # Look if it is logman terminating, if so, grab the parent's process pid |
- # and inject cwd. |
pid = line[self.PID] |
if pid in self.processes: |
logging.info('Terminated: %d' % pid) |
@@ -1754,11 +1736,9 @@ class LogmanTrace(ApiBase): |
pid = int(line[PROCESS_ID], 16) |
if ppid == self._tracer_pid: |
# Need to ignore processes we don't know about because the log is |
- # system-wide. |
- if line[IMAGE_FILE_NAME] == '"logman.exe"': |
- # Skip the shutdown call when "logman.exe stop" is executed. |
- return |
- self._initial_pid = self._initial_pid or pid |
+ # system-wide. self._tracer_pid shall start only one process. |
+ assert not self._initial_pid |
+ self._initial_pid = pid |
ppid = None |
elif ppid not in self.processes: |
# Ignore |
@@ -1854,10 +1834,9 @@ class LogmanTrace(ApiBase): |
@staticmethod |
def clean_trace(logname): |
- if os.path.isfile(logname): |
- os.remove(logname) |
- if os.path.isfile(logname + '.etl'): |
- os.remove(logname + '.etl') |
+ for ext in ('', '.csv', '.etl', '.xml'): |
+ if os.path.isfile(logname + ext): |
+ os.remove(logname + ext) |
@classmethod |
def _start_log(cls, etl): |
@@ -1914,52 +1893,31 @@ class LogmanTrace(ApiBase): |
stdout=subprocess.PIPE, |
stderr=subprocess.STDOUT) |
- @classmethod |
- def gen_trace(cls, cmd, cwd, logname, output): |
- """Uses logman.exe to start and stop the NT Kernel Logger while the |
- executable to be traced is run. |
- """ |
- logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) |
- # Use "logman -?" for help. |
- |
- etl = logname + '.etl' |
- |
- stdout = stderr = None |
- if output: |
- stdout = subprocess.PIPE |
- stderr = subprocess.STDOUT |
+ @staticmethod |
+ def _convert_log(logname, logformat, stdout, stderr): |
+ """Converts the ETL trace to text representation. |
- # 1. Start the log collection. |
- cls._start_log(etl) |
+ Normally, 'csv' is sufficient. If complex scripts are used (like eastern |
+ languages), use 'csv_utf16'. If localization gets in the way, use 'xml'. |
- # 2. Run the child process. |
- logging.debug('Running: %s' % cmd) |
- try: |
- child = subprocess.Popen( |
- cmd, cwd=cwd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
- out = child.communicate()[0] |
- finally: |
- # 3. Stop the log collection. |
- cls._stop_log() |
+ Arguments: |
+ - logname: Base filename to convert. |
+ - logformat: Text format to be generated, csv, csv_utf16 or xml. |
- # 4. Convert the traces to text representation. |
- # Use "tracerpt -?" for help. |
+ Use "tracerpt -?" for help. |
+ """ |
LOCALE_INVARIANT = 0x7F |
windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) |
cmd_convert = [ |
'tracerpt.exe', |
- '-l', etl, |
- '-o', logname, |
+ '-l', logname + '.etl', |
+ '-o', logname + '.' + logformat, |
'-gmt', # Use UTC |
'-y', # No prompt |
# Use -of XML to get the header of each items after column 19, e.g. all |
# the actual headers of 'User Data'. |
] |
- # Normally, 'csv' is sufficient. If complex scripts are used (like eastern |
- # languages), use 'csv_unicode'. If localization gets in the way, use 'xml'. |
- logformat = 'csv' |
- |
if logformat == 'csv': |
# tracerpt localizes the 'Type' column, for major brainfuck |
# entertainment. I can't imagine any sane reason to do that. |
@@ -1973,9 +1931,58 @@ class LogmanTrace(ApiBase): |
else: |
assert False, logformat |
logging.debug('Running: %s' % cmd_convert) |
+ # This can takes tens of minutes for large logs. |
subprocess.check_call( |
cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
+ @classmethod |
+ def gen_trace(cls, cmd, cwd, logname, output): |
+ """Uses logman.exe to start and stop the NT Kernel Logger while the |
+ executable to be traced is run. |
+ """ |
+ logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) |
+ # Use "logman -?" for help. |
+ |
+ stdout = stderr = None |
+ if output: |
+ stdout = subprocess.PIPE |
+ stderr = subprocess.STDOUT |
+ |
+ # 1. Start the log collection. |
+ cls._start_log(logname + '.etl') |
+ |
+ # 2. Run the child process. |
+ logging.debug('Running: %s' % cmd) |
+ try: |
+ # Use trace_child_process.py so we have a clear pid owner. Since |
+ # trace_inputs.py can be used as a library and could trace mulitple |
+ # processes simultaneously, it makes it more complex if the executable to |
+ # be traced is executed directly here. It also solves issues related to |
+ # logman.exe that needs to be executed to control the kernel trace. |
+ child_cmd = [ |
+ sys.executable, |
+ os.path.join(BASE_DIR, 'trace_child_process.py'), |
+ ] |
+ child = subprocess.Popen( |
+ child_cmd + cmd, |
+ cwd=cwd, |
+ stdin=subprocess.PIPE, |
+ stdout=stdout, |
+ stderr=stderr) |
+ logging.debug('Started child pid: %d' % child.pid) |
+ out = child.communicate()[0] |
+ finally: |
+ # 3. Stop the log collection. |
+ cls._stop_log() |
+ |
+ # 4. Convert the traces to text representation. |
+ cls._convert_log(logname, 'csv', stdout, stderr) |
+ |
+ # 5. Save metadata. |
+ json.dump({ |
+ 'pid': child.pid, |
+ 'format': 'csv', |
+ }, open(logname, 'w')) |
return child.returncode, out |
@classmethod |
@@ -1986,20 +1993,10 @@ class LogmanTrace(ApiBase): |
# All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
- # Auto-detect the log format. |
- with open(filename, 'rb') as f: |
- hdr = f.read(2) |
- assert len(hdr) == 2 |
- if hdr == '<E': |
- # It starts with <Events>. |
- logformat = 'xml' |
- elif hdr == '\xFF\xEF': |
- # utf-16 BOM. |
- logformat = 'csv_utf16' |
- else: |
- logformat = 'csv' |
+ data = json.load(open(filename)) |
+ logformat = data['format'] |
- context = cls.Context(blacklist_more) |
+ context = cls.Context(blacklist_more, data['pid']) |
if logformat == 'csv_utf16': |
def utf_8_encoder(unicode_csv_data): |
@@ -2018,7 +2015,8 @@ class LogmanTrace(ApiBase): |
# python internal unicode format (utf-8). Then explicitly re-encode as |
# utf8 as str instances so csv can parse it fine. Then decode the utf-8 |
# str back into python unicode instances. This sounds about right. |
- for line in unicode_csv_reader(codecs.open(filename, 'r', 'utf-16')): |
+ for line in unicode_csv_reader( |
+ codecs.open(filename + '.' + logformat, 'r', 'utf-16')): |
# line is a list of unicode objects |
context.on_csv_line(line) |
@@ -2033,7 +2031,7 @@ class LogmanTrace(ApiBase): |
# The fastest and smallest format but only supports 'ANSI' file paths. |
# E.g. the filenames are encoding in the 'current' encoding. |
- for line in ansi_csv_reader(open(filename)): |
+ for line in ansi_csv_reader(open(filename + '.' + logformat)): |
# line is a list of unicode objects. |
context.on_csv_line(line) |