Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 10399090: Create an interface class to describe and document the interface functions. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding=utf-8 2 # coding=utf-8
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """Traces an executable and its child processes and extract the files accessed 7 """Traces an executable and its child processes and extract the files accessed
8 by them. 8 by them.
9 9
10 The implementation uses OS-specific API. The native Kernel logger and the ETL 10 The implementation uses OS-specific API. The native Kernel logger and the ETL
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows.""" 214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows."""
215 if x: 215 if x:
216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/') 216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/')
217 if x == '.': 217 if x == '.':
218 x = '' 218 x = ''
219 if x: 219 if x:
220 x += '/' 220 x += '/'
221 return x 221 return x
222 222
223 223
224 class Strace(object): 224 class ApiBase(object):
225 @staticmethod
226 def clean_trace(logname):
227 """Deletes the old log."""
228 raise NotImplementedError()
229
230 @classmethod
231 def gen_trace(cls, cmd, cwd, logname, output):
232 """Runs the OS-specific trace program on an executable.
233
234 Since the logs are per pid, we need to log the list of the initial pid.
235 """
236 raise NotImplementedError(cls.__class__.__name__)
237
238 @classmethod
239 def parse_log(cls, filename, blacklist):
240 """Processes a trace log and returns the files opened and the files that do
241 not exist.
242
243 It does not track directories.
244
245 Most of the time, files that do not exist are temporary test files that
246 should be put in /tmp instead. See http://crbug.com/116251.
247
248 Returns a tuple (existing files, non existing files, nb_processes_created)
249 """
250 raise NotImplementedError(cls.__class__.__name__)
251
252
253 class Strace(ApiBase):
225 """strace implies linux.""" 254 """strace implies linux."""
226 IGNORED = ( 255 IGNORED = (
227 '/bin', 256 '/bin',
228 '/dev', 257 '/dev',
229 '/etc', 258 '/etc',
230 '/lib', 259 '/lib',
231 '/proc', 260 '/proc',
232 '/sys', 261 '/sys',
233 '/tmp', 262 '/tmp',
234 '/usr', 263 '/usr',
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 filepath = unicode(filepath) 504 filepath = unicode(filepath)
476 if self.blacklist(filepath): 505 if self.blacklist(filepath):
477 return 506 return
478 if os.path.isfile(filepath): 507 if os.path.isfile(filepath):
479 self.files.add(filepath) 508 self.files.add(filepath)
480 else: 509 else:
481 self.non_existent.add(filepath) 510 self.non_existent.add(filepath)
482 511
483 @staticmethod 512 @staticmethod
484 def clean_trace(logname): 513 def clean_trace(logname):
485 """Deletes the old log."""
486 if os.path.isfile(logname): 514 if os.path.isfile(logname):
487 os.remove(logname) 515 os.remove(logname)
488 # Also delete any pid specific file from previous traces. 516 # Also delete any pid specific file from previous traces.
489 for i in glob.iglob(logname + '.*'): 517 for i in glob.iglob(logname + '.*'):
490 if i.rsplit('.', 1)[1].isdigit(): 518 if i.rsplit('.', 1)[1].isdigit():
491 os.remove(i) 519 os.remove(i)
492 520
493 @classmethod 521 @classmethod
494 def gen_trace(cls, cmd, cwd, logname, output): 522 def gen_trace(cls, cmd, cwd, logname, output):
495 """Runs strace on an executable. 523 """Runs strace on an executable.
(...skipping 21 matching lines...) Expand all
517 json.dump( 545 json.dump(
518 { 546 {
519 'cwd': cwd, 547 'cwd': cwd,
520 'pid': child.pid, 548 'pid': child.pid,
521 }, 549 },
522 f) 550 f)
523 return child.returncode, out 551 return child.returncode, out
524 552
525 @classmethod 553 @classmethod
526 def parse_log(cls, filename, blacklist): 554 def parse_log(cls, filename, blacklist):
527 """Processes a strace log and returns the files opened and the files that do
528 not exist.
529
530 It does not track directories.
531
532 Most of the time, files that do not exist are temporary test files that
533 should be put in /tmp instead. See http://crbug.com/116251.
534
535 Returns a tuple (existing files, non existing files, nb_processes_created)
536 """
537 logging.info('parse_log(%s, %s)' % (filename, blacklist)) 555 logging.info('parse_log(%s, %s)' % (filename, blacklist))
538 with open(filename, 'r') as f: 556 with open(filename, 'r') as f:
539 data = json.load(f) 557 data = json.load(f)
540 context = cls.Context(blacklist, data['cwd']) 558 context = cls.Context(blacklist, data['cwd'])
541 for pidfile in glob.iglob(filename + '.*'): 559 for pidfile in glob.iglob(filename + '.*'):
542 pid = pidfile.rsplit('.', 1)[1] 560 pid = pidfile.rsplit('.', 1)[1]
543 if pid.isdigit(): 561 if pid.isdigit():
544 pid = int(pid) 562 pid = int(pid)
545 # TODO(maruel): Load as utf-8 563 # TODO(maruel): Load as utf-8
546 for line in open(pidfile, 'rb'): 564 for line in open(pidfile, 'rb'):
547 context.on_line(pid, line) 565 context.on_line(pid, line)
548 context.resolve() 566 context.resolve()
549 # Resolve any symlink we hit. 567 # Resolve any symlink we hit.
550 return ( 568 return (
551 set(os.path.realpath(f) for f in context.files), 569 set(os.path.realpath(f) for f in context.files),
552 set(os.path.realpath(f) for f in context.non_existent), 570 set(os.path.realpath(f) for f in context.non_existent),
553 len(context.processes)) 571 len(context.processes))
554 572
555 573
556 class Dtrace(object): 574 class Dtrace(ApiBase):
557 """Uses DTrace framework through dtrace. Requires root access. 575 """Uses DTrace framework through dtrace. Requires root access.
558 576
559 Implies Mac OSX. 577 Implies Mac OSX.
560 578
561 dtruss can't be used because it has compatibility issues with python. 579 dtruss can't be used because it has compatibility issues with python.
562 580
563 Also, the pid->cwd handling needs to be done manually since OSX has no way to 581 Also, the pid->cwd handling needs to be done manually since OSX has no way to
564 get the absolute path of the 'cwd' dtrace variable from the probe. 582 get the absolute path of the 'cwd' dtrace variable from the probe.
565 583
566 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see 584 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
812 self.files.add(filepath) 830 self.files.add(filepath)
813 else: 831 else:
814 self.non_existent.add(filepath) 832 self.non_existent.add(filepath)
815 833
816 @staticmethod 834 @staticmethod
817 def _handle_ignored(_ppid, pid, function, args, result): 835 def _handle_ignored(_ppid, pid, function, args, result):
818 logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) 836 logging.debug('%d %s(%s) = %s' % (pid, function, args, result))
819 837
820 @staticmethod 838 @staticmethod
821 def clean_trace(logname): 839 def clean_trace(logname):
822 """Deletes the old log."""
823 if os.path.isfile(logname): 840 if os.path.isfile(logname):
824 os.remove(logname) 841 os.remove(logname)
825 842
826 @classmethod 843 @classmethod
827 def gen_trace(cls, cmd, cwd, logname, output): 844 def gen_trace(cls, cmd, cwd, logname, output):
828 """Runs dtrace on an executable.""" 845 """Runs dtrace on an executable.
846
847 This dtruss is broken when it starts the process itself or when tracing
848 child processes, this code starts a wrapper process trace_child_process.py,
849 which waits for dtrace to start, then trace_child_process.py starts the
850 executable to trace.
851 """
829 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) 852 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))
830 logging.info('Running: %s' % cmd) 853 logging.info('Running: %s' % cmd)
831 signal = 'Go!' 854 signal = 'Go!'
832 logging.debug('Our pid: %d' % os.getpid()) 855 logging.debug('Our pid: %d' % os.getpid())
833 856
834 # Part 1: start the child process. 857 # Part 1: start the child process.
835 stdout = stderr = None 858 stdout = stderr = None
836 if output: 859 if output:
837 stdout = subprocess.PIPE 860 stdout = subprocess.PIPE
838 stderr = subprocess.STDOUT 861 stderr = subprocess.STDOUT
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
891 cls._sort_log(logname) 914 cls._sort_log(logname)
892 except KeyboardInterrupt: 915 except KeyboardInterrupt:
893 # Still sort when testing. 916 # Still sort when testing.
894 cls._sort_log(logname) 917 cls._sort_log(logname)
895 raise 918 raise
896 919
897 return dtrace.returncode or child.returncode, out 920 return dtrace.returncode or child.returncode, out
898 921
899 @classmethod 922 @classmethod
900 def parse_log(cls, filename, blacklist): 923 def parse_log(cls, filename, blacklist):
901 """Processes a dtrace log and returns the files opened and the files that do
902 not exist.
903
904 It does not track directories.
905
906 Most of the time, files that do not exist are temporary test files that
907 should be put in /tmp instead. See http://crbug.com/116251
908
909 Returns a tuple (existing files, non existing files, nb_processes_created)
910 """
911 logging.info('parse_log(%s, %s)' % (filename, blacklist)) 924 logging.info('parse_log(%s, %s)' % (filename, blacklist))
912 context = cls.Context(blacklist) 925 context = cls.Context(blacklist)
913 for line in open(filename, 'rb'): 926 for line in open(filename, 'rb'):
914 context.on_line(line) 927 context.on_line(line)
915 # Resolve any symlink we hit. 928 # Resolve any symlink we hit.
916 return ( 929 return (
917 set(os.path.realpath(f) for f in context.files), 930 set(os.path.realpath(f) for f in context.files),
918 set(os.path.realpath(f) for f in context.non_existent), 931 set(os.path.realpath(f) for f in context.non_existent),
919 context.nb_processes) 932 context.nb_processes)
920 933
921 @staticmethod 934 @staticmethod
922 def _sort_log(logname): 935 def _sort_log(logname):
923 """Sorts the log back in order when each call occured. 936 """Sorts the log back in order when each call occured.
924 937
925 dtrace doesn't save the buffer in strict order since it keeps one buffer per 938 dtrace doesn't save the buffer in strict order since it keeps one buffer per
926 CPU. 939 CPU.
927 """ 940 """
928 with open(logname, 'rb') as logfile: 941 with open(logname, 'rb') as logfile:
929 lines = [f for f in logfile.readlines() if f.strip()] 942 lines = [f for f in logfile.readlines() if f.strip()]
930 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) 943 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
931 with open(logname, 'wb') as logfile: 944 with open(logname, 'wb') as logfile:
932 logfile.write(''.join(lines)) 945 logfile.write(''.join(lines))
933 946
934 947
935 class LogmanTrace(object): 948 class LogmanTrace(ApiBase):
936 """Uses the native Windows ETW based tracing functionality to trace a child 949 """Uses the native Windows ETW based tracing functionality to trace a child
937 process. 950 process.
938 """ 951 """
939 class Context(object): 952 class Context(object):
940 """Processes a ETW log line and keeps the list of existent and non 953 """Processes a ETW log line and keeps the list of existent and non
941 existent files accessed. 954 existent files accessed.
942 955
943 Ignores directories. 956 Ignores directories.
944 """ 957 """
945 958
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
1111 filename in self.files or 1124 filename in self.files or
1112 filename in self.non_existent): 1125 filename in self.non_existent):
1113 return 1126 return
1114 logging.debug('_handle_file(%s)' % filename) 1127 logging.debug('_handle_file(%s)' % filename)
1115 if os.path.isfile(filename): 1128 if os.path.isfile(filename):
1116 self.files.add(filename) 1129 self.files.add(filename)
1117 else: 1130 else:
1118 self.non_existent.add(filename) 1131 self.non_existent.add(filename)
1119 1132
1120 def __init__(self): 1133 def __init__(self):
1134 super(LogmanTrace, self).__init__()
1121 # Most ignores need to be determined at runtime. 1135 # Most ignores need to be determined at runtime.
1122 self.IGNORED = set([os.path.dirname(sys.executable)]) 1136 self.IGNORED = set([os.path.dirname(sys.executable)])
1123 # Add many directories from environment variables. 1137 # Add many directories from environment variables.
1124 vars_to_ignore = ( 1138 vars_to_ignore = (
1125 'APPDATA', 1139 'APPDATA',
1126 'LOCALAPPDATA', 1140 'LOCALAPPDATA',
1127 'ProgramData', 1141 'ProgramData',
1128 'ProgramFiles', 1142 'ProgramFiles',
1129 'ProgramFiles(x86)', 1143 'ProgramFiles(x86)',
1130 'ProgramW6432', 1144 'ProgramW6432',
1131 'SystemRoot', 1145 'SystemRoot',
1132 'TEMP', 1146 'TEMP',
1133 'TMP', 1147 'TMP',
1134 ) 1148 )
1135 for i in vars_to_ignore: 1149 for i in vars_to_ignore:
1136 if os.environ.get(i): 1150 if os.environ.get(i):
1137 self.IGNORED.add(os.environ[i]) 1151 self.IGNORED.add(os.environ[i])
1138 1152
1139 # Also add their short path name equivalents. 1153 # Also add their short path name equivalents.
1140 for i in list(self.IGNORED): 1154 for i in list(self.IGNORED):
1141 self.IGNORED.add(GetShortPathName(i)) 1155 self.IGNORED.add(GetShortPathName(i))
1142 1156
1143 # Add this one last since it has no short path name equivalent. 1157 # Add this one last since it has no short path name equivalent.
1144 self.IGNORED.add('\\systemroot') 1158 self.IGNORED.add('\\systemroot')
1145 self.IGNORED = tuple(sorted(self.IGNORED)) 1159 self.IGNORED = tuple(sorted(self.IGNORED))
1146 1160
1147 @staticmethod 1161 @staticmethod
1148 def clean_trace(logname): 1162 def clean_trace(logname):
1149 """Deletes the old log."""
1150 if os.path.isfile(logname): 1163 if os.path.isfile(logname):
1151 os.remove(logname) 1164 os.remove(logname)
1152 if os.path.isfile(logname + '.etl'): 1165 if os.path.isfile(logname + '.etl'):
1153 os.remove(logname + '.etl') 1166 os.remove(logname + '.etl')
1154 1167
1155 @classmethod 1168 @classmethod
1156 def gen_trace(cls, cmd, cwd, logname, output): 1169 def gen_trace(cls, cmd, cwd, logname, output):
1170 """Uses logman.exe to start and stop the NT Kernel Logger while the
1171 executable to be traced is run.
1172 """
1157 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) 1173 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output))
1158 # Use "logman -?" for help. 1174 # Use "logman -?" for help.
1159 1175
1160 etl = logname + '.etl' 1176 etl = logname + '.etl'
1161 1177
1162 stdout = stderr = None 1178 stdout = stderr = None
1163 if output: 1179 if output:
1164 stdout = subprocess.PIPE 1180 stdout = subprocess.PIPE
1165 stderr = subprocess.STDOUT 1181 stderr = subprocess.STDOUT
1166 1182
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
1234 else: 1250 else:
1235 assert False, logformat 1251 assert False, logformat
1236 logging.debug('Running: %s' % cmd_convert) 1252 logging.debug('Running: %s' % cmd_convert)
1237 subprocess.check_call( 1253 subprocess.check_call(
1238 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) 1254 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)
1239 1255
1240 return child.returncode, out 1256 return child.returncode, out
1241 1257
1242 @classmethod 1258 @classmethod
1243 def parse_log(cls, filename, blacklist): 1259 def parse_log(cls, filename, blacklist):
1244 """Processes a ETL log and returns the files opened and the files that do
1245 not exist.
1246
1247 It does not track directories.
1248
1249 Most of the time, files that do not exist are temporary test files that
1250 should be put in /tmp instead. See http://crbug.com/116251
1251
1252 Returns a tuple (existing files, non existing files, nb_processes_created)
1253 """
1254 logging.info('parse_log(%s, %s)' % (filename, blacklist)) 1260 logging.info('parse_log(%s, %s)' % (filename, blacklist))
1255 1261
1256 # Auto-detect the log format 1262 # Auto-detect the log format
1257 with open(filename, 'rb') as f: 1263 with open(filename, 'rb') as f:
1258 hdr = f.read(2) 1264 hdr = f.read(2)
1259 assert len(hdr) == 2 1265 assert len(hdr) == 2
1260 if hdr == '<E': 1266 if hdr == '<E':
1261 # It starts with <Events> 1267 # It starts with <Events>
1262 logformat = 'xml' 1268 logformat = 'xml'
1263 elif hdr == '\xFF\xEF': 1269 elif hdr == '\xFF\xEF':
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
1627 os.path.abspath(options.log), 1633 os.path.abspath(options.log),
1628 args, 1634 args,
1629 options.root_dir, 1635 options.root_dir,
1630 options.cwd, 1636 options.cwd,
1631 options.product_dir, 1637 options.product_dir,
1632 options.force) 1638 options.force)
1633 1639
1634 1640
1635 if __name__ == '__main__': 1641 if __name__ == '__main__':
1636 sys.exit(main()) 1642 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698