tools/isolate/trace_inputs.py - Issue 10399090: Create an interface class to describe and document the interface functions.

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 10399090: Create an interface class to describe and document the interface functions. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # coding=utf-8	2 # coding=utf-8

3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

4 # Use of this source code is governed by a BSD-style license that can be	4 # Use of this source code is governed by a BSD-style license that can be

5 # found in the LICENSE file.	5 # found in the LICENSE file.

6	6

7 """Traces an executable and its child processes and extract the files accessed	7 """Traces an executable and its child processes and extract the files accessed

8 by them.	8 by them.

9	9

10 The implementation uses OS-specific API. The native Kernel logger and the ETL	10 The implementation uses OS-specific API. The native Kernel logger and the ETL

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""	214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""

215 if x:	215 if x:

216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/')	216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/')

217 if x == '.':	217 if x == '.':

218 x = ''	218 x = ''

219 if x:	219 if x:

220 x += '/'	220 x += '/'

221 return x	221 return x

222	222

223	223

224 class Strace(object):	224 class ApiBase(object):

	225 @staticmethod

	226 def clean_trace(logname):

	227 """Deletes the old log."""

	228 raise NotImplementedError()

	229

	230 @classmethod

	231 def gen_trace(cls, cmd, cwd, logname, output):

	232 """Runs the OS-specific trace program on an executable.

	233

	234 Since the logs are per pid, we need to log the list of the initial pid.

	235 """

	236 raise NotImplementedError(cls.__class__.__name__)

	237

	238 @classmethod

	239 def parse_log(cls, filename, blacklist):

	240 """Processes a trace log and returns the files opened and the files that do

	241 not exist.

	242

	243 It does not track directories.

	244

	245 Most of the time, files that do not exist are temporary test files that

	246 should be put in /tmp instead. See http://crbug.com/116251.

	247

	248 Returns a tuple (existing files, non existing files, nb_processes_created)

	249 """

	250 raise NotImplementedError(cls.__class__.__name__)

	251

	252

	253 class Strace(ApiBase):

225 """strace implies linux."""	254 """strace implies linux."""

226 IGNORED = (	255 IGNORED = (

227 '/bin',	256 '/bin',

228 '/dev',	257 '/dev',

229 '/etc',	258 '/etc',

230 '/lib',	259 '/lib',

231 '/proc',	260 '/proc',

232 '/sys',	261 '/sys',

233 '/tmp',	262 '/tmp',

234 '/usr',	263 '/usr',

(...skipping 240 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
475 filepath = unicode(filepath)	504 filepath = unicode(filepath)

476 if self.blacklist(filepath):	505 if self.blacklist(filepath):

477 return	506 return

478 if os.path.isfile(filepath):	507 if os.path.isfile(filepath):

479 self.files.add(filepath)	508 self.files.add(filepath)

480 else:	509 else:

481 self.non_existent.add(filepath)	510 self.non_existent.add(filepath)

482	511

483 @staticmethod	512 @staticmethod

484 def clean_trace(logname):	513 def clean_trace(logname):

485 """Deletes the old log."""

486 if os.path.isfile(logname):	514 if os.path.isfile(logname):

487 os.remove(logname)	515 os.remove(logname)

488 # Also delete any pid specific file from previous traces.	516 # Also delete any pid specific file from previous traces.

489 for i in glob.iglob(logname + '.*'):	517 for i in glob.iglob(logname + '.*'):

490 if i.rsplit('.', 1)[1].isdigit():	518 if i.rsplit('.', 1)[1].isdigit():

491 os.remove(i)	519 os.remove(i)

492	520

493 @classmethod	521 @classmethod

494 def gen_trace(cls, cmd, cwd, logname, output):	522 def gen_trace(cls, cmd, cwd, logname, output):

495 """Runs strace on an executable.	523 """Runs strace on an executable.

(...skipping 21 matching lines...) Expand all Loading...
517 json.dump(	545 json.dump(

518 {	546 {

519 'cwd': cwd,	547 'cwd': cwd,

520 'pid': child.pid,	548 'pid': child.pid,

521 },	549 },

522 f)	550 f)

523 return child.returncode, out	551 return child.returncode, out

524	552

525 @classmethod	553 @classmethod

526 def parse_log(cls, filename, blacklist):	554 def parse_log(cls, filename, blacklist):

527 """Processes a strace log and returns the files opened and the files that do

528 not exist.

529

530 It does not track directories.

531

532 Most of the time, files that do not exist are temporary test files that

533 should be put in /tmp instead. See http://crbug.com/116251.

534

535 Returns a tuple (existing files, non existing files, nb_processes_created)

536 """

537 logging.info('parse_log(%s, %s)' % (filename, blacklist))	555 logging.info('parse_log(%s, %s)' % (filename, blacklist))

538 with open(filename, 'r') as f:	556 with open(filename, 'r') as f:

539 data = json.load(f)	557 data = json.load(f)

540 context = cls.Context(blacklist, data['cwd'])	558 context = cls.Context(blacklist, data['cwd'])

541 for pidfile in glob.iglob(filename + '.*'):	559 for pidfile in glob.iglob(filename + '.*'):

542 pid = pidfile.rsplit('.', 1)[1]	560 pid = pidfile.rsplit('.', 1)[1]

543 if pid.isdigit():	561 if pid.isdigit():

544 pid = int(pid)	562 pid = int(pid)

545 # TODO(maruel): Load as utf-8	563 # TODO(maruel): Load as utf-8

546 for line in open(pidfile, 'rb'):	564 for line in open(pidfile, 'rb'):

547 context.on_line(pid, line)	565 context.on_line(pid, line)

548 context.resolve()	566 context.resolve()

549 # Resolve any symlink we hit.	567 # Resolve any symlink we hit.

550 return (	568 return (

551 set(os.path.realpath(f) for f in context.files),	569 set(os.path.realpath(f) for f in context.files),

552 set(os.path.realpath(f) for f in context.non_existent),	570 set(os.path.realpath(f) for f in context.non_existent),

553 len(context.processes))	571 len(context.processes))

554	572

555	573

556 class Dtrace(object):	574 class Dtrace(ApiBase):

557 """Uses DTrace framework through dtrace. Requires root access.	575 """Uses DTrace framework through dtrace. Requires root access.

558	576

559 Implies Mac OSX.	577 Implies Mac OSX.

560	578

561 dtruss can't be used because it has compatibility issues with python.	579 dtruss can't be used because it has compatibility issues with python.

562	580

563 Also, the pid->cwd handling needs to be done manually since OSX has no way to	581 Also, the pid->cwd handling needs to be done manually since OSX has no way to

564 get the absolute path of the 'cwd' dtrace variable from the probe.	582 get the absolute path of the 'cwd' dtrace variable from the probe.

565	583

566 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see	584 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see

(...skipping 245 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
812 self.files.add(filepath)	830 self.files.add(filepath)

813 else:	831 else:

814 self.non_existent.add(filepath)	832 self.non_existent.add(filepath)

815	833

816 @staticmethod	834 @staticmethod

817 def _handle_ignored(_ppid, pid, function, args, result):	835 def _handle_ignored(_ppid, pid, function, args, result):

818 logging.debug('%d %s(%s) = %s' % (pid, function, args, result))	836 logging.debug('%d %s(%s) = %s' % (pid, function, args, result))

819	837

820 @staticmethod	838 @staticmethod

821 def clean_trace(logname):	839 def clean_trace(logname):

822 """Deletes the old log."""

823 if os.path.isfile(logname):	840 if os.path.isfile(logname):

824 os.remove(logname)	841 os.remove(logname)

825	842

826 @classmethod	843 @classmethod

827 def gen_trace(cls, cmd, cwd, logname, output):	844 def gen_trace(cls, cmd, cwd, logname, output):

828 """Runs dtrace on an executable."""	845 """Runs dtrace on an executable.

	846

	847 This dtruss is broken when it starts the process itself or when tracing

	848 child processes, this code starts a wrapper process trace_child_process.py,

	849 which waits for dtrace to start, then trace_child_process.py starts the

	850 executable to trace.

	851 """

829 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))	852 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))

830 logging.info('Running: %s' % cmd)	853 logging.info('Running: %s' % cmd)

831 signal = 'Go!'	854 signal = 'Go!'

832 logging.debug('Our pid: %d' % os.getpid())	855 logging.debug('Our pid: %d' % os.getpid())

833	856

834 # Part 1: start the child process.	857 # Part 1: start the child process.

835 stdout = stderr = None	858 stdout = stderr = None

836 if output:	859 if output:

837 stdout = subprocess.PIPE	860 stdout = subprocess.PIPE

838 stderr = subprocess.STDOUT	861 stderr = subprocess.STDOUT

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
891 cls._sort_log(logname)	914 cls._sort_log(logname)

892 except KeyboardInterrupt:	915 except KeyboardInterrupt:

893 # Still sort when testing.	916 # Still sort when testing.

894 cls._sort_log(logname)	917 cls._sort_log(logname)

895 raise	918 raise

896	919

897 return dtrace.returncode or child.returncode, out	920 return dtrace.returncode or child.returncode, out

898	921

899 @classmethod	922 @classmethod

900 def parse_log(cls, filename, blacklist):	923 def parse_log(cls, filename, blacklist):

901 """Processes a dtrace log and returns the files opened and the files that do

902 not exist.

903

904 It does not track directories.

905

906 Most of the time, files that do not exist are temporary test files that

907 should be put in /tmp instead. See http://crbug.com/116251

908

909 Returns a tuple (existing files, non existing files, nb_processes_created)

910 """

911 logging.info('parse_log(%s, %s)' % (filename, blacklist))	924 logging.info('parse_log(%s, %s)' % (filename, blacklist))

912 context = cls.Context(blacklist)	925 context = cls.Context(blacklist)

913 for line in open(filename, 'rb'):	926 for line in open(filename, 'rb'):

914 context.on_line(line)	927 context.on_line(line)

915 # Resolve any symlink we hit.	928 # Resolve any symlink we hit.

916 return (	929 return (

917 set(os.path.realpath(f) for f in context.files),	930 set(os.path.realpath(f) for f in context.files),

918 set(os.path.realpath(f) for f in context.non_existent),	931 set(os.path.realpath(f) for f in context.non_existent),

919 context.nb_processes)	932 context.nb_processes)

920	933

921 @staticmethod	934 @staticmethod

922 def _sort_log(logname):	935 def _sort_log(logname):

923 """Sorts the log back in order when each call occured.	936 """Sorts the log back in order when each call occured.

924	937

925 dtrace doesn't save the buffer in strict order since it keeps one buffer per	938 dtrace doesn't save the buffer in strict order since it keeps one buffer per

926 CPU.	939 CPU.

927 """	940 """

928 with open(logname, 'rb') as logfile:	941 with open(logname, 'rb') as logfile:

929 lines = [f for f in logfile.readlines() if f.strip()]	942 lines = [f for f in logfile.readlines() if f.strip()]

930 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))	943 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))

931 with open(logname, 'wb') as logfile:	944 with open(logname, 'wb') as logfile:

932 logfile.write(''.join(lines))	945 logfile.write(''.join(lines))

933	946

934	947

935 class LogmanTrace(object):	948 class LogmanTrace(ApiBase):

936 """Uses the native Windows ETW based tracing functionality to trace a child	949 """Uses the native Windows ETW based tracing functionality to trace a child

937 process.	950 process.

938 """	951 """

939 class Context(object):	952 class Context(object):

940 """Processes a ETW log line and keeps the list of existent and non	953 """Processes a ETW log line and keeps the list of existent and non

941 existent files accessed.	954 existent files accessed.

942	955

943 Ignores directories.	956 Ignores directories.

944 """	957 """

945	958

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1111 filename in self.files or	1124 filename in self.files or

1112 filename in self.non_existent):	1125 filename in self.non_existent):

1113 return	1126 return

1114 logging.debug('_handle_file(%s)' % filename)	1127 logging.debug('_handle_file(%s)' % filename)

1115 if os.path.isfile(filename):	1128 if os.path.isfile(filename):

1116 self.files.add(filename)	1129 self.files.add(filename)

1117 else:	1130 else:

1118 self.non_existent.add(filename)	1131 self.non_existent.add(filename)

1119	1132

1120 def __init__(self):	1133 def __init__(self):

	1134 super(LogmanTrace, self).__init__()

1121 # Most ignores need to be determined at runtime.	1135 # Most ignores need to be determined at runtime.

1122 self.IGNORED = set([os.path.dirname(sys.executable)])	1136 self.IGNORED = set([os.path.dirname(sys.executable)])

1123 # Add many directories from environment variables.	1137 # Add many directories from environment variables.

1124 vars_to_ignore = (	1138 vars_to_ignore = (

1125 'APPDATA',	1139 'APPDATA',

1126 'LOCALAPPDATA',	1140 'LOCALAPPDATA',

1127 'ProgramData',	1141 'ProgramData',

1128 'ProgramFiles',	1142 'ProgramFiles',

1129 'ProgramFiles(x86)',	1143 'ProgramFiles(x86)',

1130 'ProgramW6432',	1144 'ProgramW6432',

1131 'SystemRoot',	1145 'SystemRoot',

1132 'TEMP',	1146 'TEMP',

1133 'TMP',	1147 'TMP',

1134 )	1148 )

1135 for i in vars_to_ignore:	1149 for i in vars_to_ignore:

1136 if os.environ.get(i):	1150 if os.environ.get(i):

1137 self.IGNORED.add(os.environ[i])	1151 self.IGNORED.add(os.environ[i])

1138	1152

1139 # Also add their short path name equivalents.	1153 # Also add their short path name equivalents.

1140 for i in list(self.IGNORED):	1154 for i in list(self.IGNORED):

1141 self.IGNORED.add(GetShortPathName(i))	1155 self.IGNORED.add(GetShortPathName(i))

1142	1156

1143 # Add this one last since it has no short path name equivalent.	1157 # Add this one last since it has no short path name equivalent.

1144 self.IGNORED.add('\\systemroot')	1158 self.IGNORED.add('\\systemroot')

1145 self.IGNORED = tuple(sorted(self.IGNORED))	1159 self.IGNORED = tuple(sorted(self.IGNORED))

1146	1160

1147 @staticmethod	1161 @staticmethod

1148 def clean_trace(logname):	1162 def clean_trace(logname):

1149 """Deletes the old log."""

1150 if os.path.isfile(logname):	1163 if os.path.isfile(logname):

1151 os.remove(logname)	1164 os.remove(logname)

1152 if os.path.isfile(logname + '.etl'):	1165 if os.path.isfile(logname + '.etl'):

1153 os.remove(logname + '.etl')	1166 os.remove(logname + '.etl')

1154	1167

1155 @classmethod	1168 @classmethod

1156 def gen_trace(cls, cmd, cwd, logname, output):	1169 def gen_trace(cls, cmd, cwd, logname, output):

	1170 """Uses logman.exe to start and stop the NT Kernel Logger while the

	1171 executable to be traced is run.

	1172 """

1157 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))	1173 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))

1158 # Use "logman -?" for help.	1174 # Use "logman -?" for help.

1159	1175

1160 etl = logname + '.etl'	1176 etl = logname + '.etl'

1161	1177

1162 stdout = stderr = None	1178 stdout = stderr = None

1163 if output:	1179 if output:

1164 stdout = subprocess.PIPE	1180 stdout = subprocess.PIPE

1165 stderr = subprocess.STDOUT	1181 stderr = subprocess.STDOUT

1166	1182

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1234 else:	1250 else:

1235 assert False, logformat	1251 assert False, logformat

1236 logging.debug('Running: %s' % cmd_convert)	1252 logging.debug('Running: %s' % cmd_convert)

1237 subprocess.check_call(	1253 subprocess.check_call(

1238 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)	1254 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)

1239	1255

1240 return child.returncode, out	1256 return child.returncode, out

1241	1257

1242 @classmethod	1258 @classmethod

1243 def parse_log(cls, filename, blacklist):	1259 def parse_log(cls, filename, blacklist):

1244 """Processes a ETL log and returns the files opened and the files that do

1245 not exist.

1246

1247 It does not track directories.

1248

1249 Most of the time, files that do not exist are temporary test files that

1250 should be put in /tmp instead. See http://crbug.com/116251

1251

1252 Returns a tuple (existing files, non existing files, nb_processes_created)

1253 """

1254 logging.info('parse_log(%s, %s)' % (filename, blacklist))	1260 logging.info('parse_log(%s, %s)' % (filename, blacklist))

1255	1261

1256 # Auto-detect the log format	1262 # Auto-detect the log format

1257 with open(filename, 'rb') as f:	1263 with open(filename, 'rb') as f:

1258 hdr = f.read(2)	1264 hdr = f.read(2)

1259 assert len(hdr) == 2	1265 assert len(hdr) == 2

1260 if hdr == '<E':	1266 if hdr == '<E':

1261 # It starts with <Events>	1267 # It starts with <Events>

1262 logformat = 'xml'	1268 logformat = 'xml'

1263 elif hdr == '\xFF\xEF':	1269 elif hdr == '\xFF\xEF':

(...skipping 363 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1627 os.path.abspath(options.log),	1633 os.path.abspath(options.log),

1628 args,	1634 args,

1629 options.root_dir,	1635 options.root_dir,

1630 options.cwd,	1636 options.cwd,

1631 options.product_dir,	1637 options.product_dir,

1632 options.force)	1638 options.force)

1633	1639

1634	1640

1635 if __name__ == '__main__':	1641 if __name__ == '__main__':

1636 sys.exit(main())	1642 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »