tools/isolate/trace_inputs.py - Issue 10448044: Use trace_child_process.py on Windows and save information about the trace in json.

Unified Diff: tools/isolate/trace_inputs.py

Issue 10448044: Use trace_child_process.py on Windows and save information about the trace in json. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Added more comments Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/isolate/trace_inputs.py

diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py

index d4f8549ba72fb256dcf1d508d8ae20cc82510650..bd1efddda18af3bcb475923b8309869d5e8c9171 100755

--- a/tools/isolate/trace_inputs.py

+++ b/tools/isolate/trace_inputs.py

@@ -1394,7 +1394,9 @@ class Dtrace(ApiBase):

stdout = subprocess.PIPE

stderr = subprocess.STDOUT

child_cmd = [

- sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'),

+ sys.executable,

+ os.path.join(BASE_DIR, 'trace_child_process.py'),

+ '--wait',

]

child = subprocess.Popen(

child_cmd + cmd,

@@ -1489,7 +1491,12 @@ class LogmanTrace(ApiBase):

"""Uses the native Windows ETW based tracing functionality to trace a child

process.

- Caveat: this implementations doesn't track cwd or initial_cwd.

+ Caveat: this implementations doesn't track cwd or initial_cwd. It is because

+ the Windows Kernel doesn't have a concept of 'current working directory' at

+ all. A Win32 process has a map of current directories, one per drive letter

+ and it is managed by the user mode kernel32.dll. In kernel, a file is always

+ opened relative to another file_object or as an absolute path. All the current

+ working directory logic is done in user mode.

"""

class Context(ApiBase.Context):

"""Processes a ETW log line and keeps the list of existent and non

@@ -1512,14 +1519,15 @@ class LogmanTrace(ApiBase):

# Handle file objects that succeeded.

self.file_objects = {}

- def __init__(self, blacklist):

+ def __init__(self, blacklist, tracer_pid):

super(LogmanTrace.Context, self).__init__(blacklist)

self._drive_map = DosDriveMap()

# Threads mapping to the corresponding process id.

self._threads_active = {}

# Process ID of the tracer, e.g. tracer_inputs.py

- self._tracer_pid = None

- # First process to be started by self._tracer_pid.

+ self._tracer_pid = tracer_pid

+ # First process to be started by self._tracer_pid is the executable

+ # traced.

self._initial_pid = None

self._line_number = 0

@@ -1706,33 +1714,7 @@ class LogmanTrace(ApiBase):

def handle_Process_Any(self, line):

pass

- def handle_Process_DCStart(self, line):

- """Gives historic information about the process tree.

- Use it to extract the pid of the trace_inputs.py parent process that

- started logman.exe.

- """

- #UNIQUE_PROCESS_KEY = 19

- #PROCESS_ID = 20

- PARENT_PID = 21

- #SESSION_ID = 22

- #EXIT_STATUS = 23

- #DIRECTORY_TABLE_BASE = 24

- #USER_SID = 25

- IMAGE_FILE_NAME = 26

- #COMMAND_LINE = 27

- ppid = int(line[PARENT_PID], 16)

- if line[IMAGE_FILE_NAME] == '"logman.exe"':

- # logman's parent is trace_input.py or whatever tool using it as a

- # library. Trace any other children started by it.

- assert not self._tracer_pid

- self._tracer_pid = ppid

- logging.info('Found logman\'s parent at %d' % ppid)

def handle_Process_End(self, line):

- # Look if it is logman terminating, if so, grab the parent's process pid

- # and inject cwd.

pid = line[self.PID]

if pid in self.processes:

logging.info('Terminated: %d' % pid)

@@ -1754,11 +1736,9 @@ class LogmanTrace(ApiBase):

pid = int(line[PROCESS_ID], 16)

if ppid == self._tracer_pid:

# Need to ignore processes we don't know about because the log is

- # system-wide.

- if line[IMAGE_FILE_NAME] == '"logman.exe"':

- # Skip the shutdown call when "logman.exe stop" is executed.

- return

- self._initial_pid = self._initial_pid or pid

+ # system-wide. self._tracer_pid shall start only one process.

+ assert not self._initial_pid

+ self._initial_pid = pid

ppid = None

elif ppid not in self.processes:

# Ignore

@@ -1854,10 +1834,9 @@ class LogmanTrace(ApiBase):

@staticmethod

def clean_trace(logname):

- if os.path.isfile(logname):

- os.remove(logname)

- if os.path.isfile(logname + '.etl'):

- os.remove(logname + '.etl')

+ for ext in ('', '.csv', '.etl', '.xml'):

+ if os.path.isfile(logname + ext):

+ os.remove(logname + ext)

@classmethod

def _start_log(cls, etl):

@@ -1914,52 +1893,31 @@ class LogmanTrace(ApiBase):

stdout=subprocess.PIPE,

stderr=subprocess.STDOUT)

- @classmethod

- def gen_trace(cls, cmd, cwd, logname, output):

- """Uses logman.exe to start and stop the NT Kernel Logger while the

- executable to be traced is run.

- """

- logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))

- # Use "logman -?" for help.

- etl = logname + '.etl'

- stdout = stderr = None

- if output:

- stdout = subprocess.PIPE

- stderr = subprocess.STDOUT

+ @staticmethod

+ def _convert_log(logname, logformat, stdout, stderr):

+ """Converts the ETL trace to text representation.

- # 1. Start the log collection.

- cls._start_log(etl)

+ Normally, 'csv' is sufficient. If complex scripts are used (like eastern

+ languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.

- # 2. Run the child process.

- logging.debug('Running: %s' % cmd)

- try:

- child = subprocess.Popen(

- cmd, cwd=cwd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)

- out = child.communicate()[0]

- finally:

- # 3. Stop the log collection.

- cls._stop_log()

+ Arguments:

+ - logname: Base filename to convert.

+ - logformat: Text format to be generated, csv, csv_utf16 or xml.

- # 4. Convert the traces to text representation.

- # Use "tracerpt -?" for help.

+ Use "tracerpt -?" for help.

+ """

LOCALE_INVARIANT = 0x7F

windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)

cmd_convert = [

'tracerpt.exe',

- '-l', etl,

- '-o', logname,

+ '-l', logname + '.etl',

+ '-o', logname + '.' + logformat,

'-gmt', # Use UTC

'-y', # No prompt

# Use -of XML to get the header of each items after column 19, e.g. all

# the actual headers of 'User Data'.

]

- # Normally, 'csv' is sufficient. If complex scripts are used (like eastern

- # languages), use 'csv_unicode'. If localization gets in the way, use 'xml'.

- logformat = 'csv'

if logformat == 'csv':

# tracerpt localizes the 'Type' column, for major brainfuck

# entertainment. I can't imagine any sane reason to do that.

@@ -1973,9 +1931,58 @@ class LogmanTrace(ApiBase):

else:

assert False, logformat

logging.debug('Running: %s' % cmd_convert)

+ # This can takes tens of minutes for large logs.

subprocess.check_call(

cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)

+ @classmethod

+ def gen_trace(cls, cmd, cwd, logname, output):

+ """Uses logman.exe to start and stop the NT Kernel Logger while the

+ executable to be traced is run.

+ """

+ logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))

+ # Use "logman -?" for help.

+ stdout = stderr = None

+ if output:

+ stdout = subprocess.PIPE

+ stderr = subprocess.STDOUT

+ # 1. Start the log collection.

+ cls._start_log(logname + '.etl')

+ # 2. Run the child process.

+ logging.debug('Running: %s' % cmd)

+ try:

+ # Use trace_child_process.py so we have a clear pid owner. Since

+ # trace_inputs.py can be used as a library and could trace mulitple

+ # processes simultaneously, it makes it more complex if the executable to

+ # be traced is executed directly here. It also solves issues related to

+ # logman.exe that needs to be executed to control the kernel trace.

+ child_cmd = [

+ sys.executable,

+ os.path.join(BASE_DIR, 'trace_child_process.py'),

+ ]

+ child = subprocess.Popen(

+ child_cmd + cmd,

+ cwd=cwd,

+ stdin=subprocess.PIPE,

+ stdout=stdout,

+ stderr=stderr)

+ logging.debug('Started child pid: %d' % child.pid)

+ out = child.communicate()[0]

+ finally:

+ # 3. Stop the log collection.

+ cls._stop_log()

+ # 4. Convert the traces to text representation.

+ cls._convert_log(logname, 'csv', stdout, stderr)

+ # 5. Save metadata.

+ json.dump({

+ 'pid': child.pid,

+ 'format': 'csv',

+ }, open(logname, 'w'))

return child.returncode, out

@classmethod

@@ -1986,20 +1993,10 @@ class LogmanTrace(ApiBase):

# All the NTFS metadata is in the form x:\$EXTEND or stuff like that.

return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)

- # Auto-detect the log format.

- with open(filename, 'rb') as f:

- hdr = f.read(2)

- assert len(hdr) == 2

- if hdr == '<E':

- # It starts with <Events>.

- logformat = 'xml'

- elif hdr == '\xFF\xEF':

- # utf-16 BOM.

- logformat = 'csv_utf16'

- else:

- logformat = 'csv'

+ data = json.load(open(filename))

+ logformat = data['format']

- context = cls.Context(blacklist_more)

+ context = cls.Context(blacklist_more, data['pid'])

if logformat == 'csv_utf16':

def utf_8_encoder(unicode_csv_data):

@@ -2018,7 +2015,8 @@ class LogmanTrace(ApiBase):

# python internal unicode format (utf-8). Then explicitly re-encode as

# utf8 as str instances so csv can parse it fine. Then decode the utf-8

# str back into python unicode instances. This sounds about right.

- for line in unicode_csv_reader(codecs.open(filename, 'r', 'utf-16')):

+ for line in unicode_csv_reader(

+ codecs.open(filename + '.' + logformat, 'r', 'utf-16')):

# line is a list of unicode objects

context.on_csv_line(line)

@@ -2033,7 +2031,7 @@ class LogmanTrace(ApiBase):

# The fastest and smallest format but only supports 'ANSI' file paths.

# E.g. the filenames are encoding in the 'current' encoding.

- for line in ansi_csv_reader(open(filename)):

+ for line in ansi_csv_reader(open(filename + '.' + logformat)):

# line is a list of unicode objects.

context.on_csv_line(line)

« no previous file with comments | « tools/isolate/trace_child_process.py ('k') | no next file » | no next file with comments »