OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows.""" | 214 """Cleans up a relative path. Converts any os.path.sep to '/' on Windows.""" |
215 if x: | 215 if x: |
216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/') | 216 x = x.rstrip(os.path.sep).replace(os.path.sep, '/') |
217 if x == '.': | 217 if x == '.': |
218 x = '' | 218 x = '' |
219 if x: | 219 if x: |
220 x += '/' | 220 x += '/' |
221 return x | 221 return x |
222 | 222 |
223 | 223 |
224 class Strace(object): | 224 class ApiBase(object): |
| 225 @staticmethod |
| 226 def clean_trace(logname): |
| 227 """Deletes the old log.""" |
| 228 raise NotImplementedError() |
| 229 |
| 230 @classmethod |
| 231 def gen_trace(cls, cmd, cwd, logname, output): |
| 232 """Runs the OS-specific trace program on an executable. |
| 233 |
| 234 Since the logs are per pid, we need to log the list of the initial pid. |
| 235 """ |
| 236 raise NotImplementedError(cls.__class__.__name__) |
| 237 |
| 238 @classmethod |
| 239 def parse_log(cls, filename, blacklist): |
| 240 """Processes a trace log and returns the files opened and the files that do |
| 241 not exist. |
| 242 |
| 243 It does not track directories. |
| 244 |
| 245 Most of the time, files that do not exist are temporary test files that |
| 246 should be put in /tmp instead. See http://crbug.com/116251. |
| 247 |
| 248 Returns a tuple (existing files, non existing files, nb_processes_created) |
| 249 """ |
| 250 raise NotImplementedError(cls.__class__.__name__) |
| 251 |
| 252 |
| 253 class Strace(ApiBase): |
225 """strace implies linux.""" | 254 """strace implies linux.""" |
226 IGNORED = ( | 255 IGNORED = ( |
227 '/bin', | 256 '/bin', |
228 '/dev', | 257 '/dev', |
229 '/etc', | 258 '/etc', |
230 '/lib', | 259 '/lib', |
231 '/proc', | 260 '/proc', |
232 '/sys', | 261 '/sys', |
233 '/tmp', | 262 '/tmp', |
234 '/usr', | 263 '/usr', |
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
475 filepath = unicode(filepath) | 504 filepath = unicode(filepath) |
476 if self.blacklist(filepath): | 505 if self.blacklist(filepath): |
477 return | 506 return |
478 if os.path.isfile(filepath): | 507 if os.path.isfile(filepath): |
479 self.files.add(filepath) | 508 self.files.add(filepath) |
480 else: | 509 else: |
481 self.non_existent.add(filepath) | 510 self.non_existent.add(filepath) |
482 | 511 |
483 @staticmethod | 512 @staticmethod |
484 def clean_trace(logname): | 513 def clean_trace(logname): |
485 """Deletes the old log.""" | |
486 if os.path.isfile(logname): | 514 if os.path.isfile(logname): |
487 os.remove(logname) | 515 os.remove(logname) |
488 # Also delete any pid specific file from previous traces. | 516 # Also delete any pid specific file from previous traces. |
489 for i in glob.iglob(logname + '.*'): | 517 for i in glob.iglob(logname + '.*'): |
490 if i.rsplit('.', 1)[1].isdigit(): | 518 if i.rsplit('.', 1)[1].isdigit(): |
491 os.remove(i) | 519 os.remove(i) |
492 | 520 |
493 @classmethod | 521 @classmethod |
494 def gen_trace(cls, cmd, cwd, logname, output): | 522 def gen_trace(cls, cmd, cwd, logname, output): |
495 """Runs strace on an executable. | 523 """Runs strace on an executable. |
(...skipping 21 matching lines...) Expand all Loading... |
517 json.dump( | 545 json.dump( |
518 { | 546 { |
519 'cwd': cwd, | 547 'cwd': cwd, |
520 'pid': child.pid, | 548 'pid': child.pid, |
521 }, | 549 }, |
522 f) | 550 f) |
523 return child.returncode, out | 551 return child.returncode, out |
524 | 552 |
525 @classmethod | 553 @classmethod |
526 def parse_log(cls, filename, blacklist): | 554 def parse_log(cls, filename, blacklist): |
527 """Processes a strace log and returns the files opened and the files that do | |
528 not exist. | |
529 | |
530 It does not track directories. | |
531 | |
532 Most of the time, files that do not exist are temporary test files that | |
533 should be put in /tmp instead. See http://crbug.com/116251. | |
534 | |
535 Returns a tuple (existing files, non existing files, nb_processes_created) | |
536 """ | |
537 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 555 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
538 with open(filename, 'r') as f: | 556 with open(filename, 'r') as f: |
539 data = json.load(f) | 557 data = json.load(f) |
540 context = cls.Context(blacklist, data['cwd']) | 558 context = cls.Context(blacklist, data['cwd']) |
541 for pidfile in glob.iglob(filename + '.*'): | 559 for pidfile in glob.iglob(filename + '.*'): |
542 pid = pidfile.rsplit('.', 1)[1] | 560 pid = pidfile.rsplit('.', 1)[1] |
543 if pid.isdigit(): | 561 if pid.isdigit(): |
544 pid = int(pid) | 562 pid = int(pid) |
545 # TODO(maruel): Load as utf-8 | 563 # TODO(maruel): Load as utf-8 |
546 for line in open(pidfile, 'rb'): | 564 for line in open(pidfile, 'rb'): |
547 context.on_line(pid, line) | 565 context.on_line(pid, line) |
548 context.resolve() | 566 context.resolve() |
549 # Resolve any symlink we hit. | 567 # Resolve any symlink we hit. |
550 return ( | 568 return ( |
551 set(os.path.realpath(f) for f in context.files), | 569 set(os.path.realpath(f) for f in context.files), |
552 set(os.path.realpath(f) for f in context.non_existent), | 570 set(os.path.realpath(f) for f in context.non_existent), |
553 len(context.processes)) | 571 len(context.processes)) |
554 | 572 |
555 | 573 |
556 class Dtrace(object): | 574 class Dtrace(ApiBase): |
557 """Uses DTrace framework through dtrace. Requires root access. | 575 """Uses DTrace framework through dtrace. Requires root access. |
558 | 576 |
559 Implies Mac OSX. | 577 Implies Mac OSX. |
560 | 578 |
561 dtruss can't be used because it has compatibility issues with python. | 579 dtruss can't be used because it has compatibility issues with python. |
562 | 580 |
563 Also, the pid->cwd handling needs to be done manually since OSX has no way to | 581 Also, the pid->cwd handling needs to be done manually since OSX has no way to |
564 get the absolute path of the 'cwd' dtrace variable from the probe. | 582 get the absolute path of the 'cwd' dtrace variable from the probe. |
565 | 583 |
566 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see | 584 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see |
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
812 self.files.add(filepath) | 830 self.files.add(filepath) |
813 else: | 831 else: |
814 self.non_existent.add(filepath) | 832 self.non_existent.add(filepath) |
815 | 833 |
816 @staticmethod | 834 @staticmethod |
817 def _handle_ignored(_ppid, pid, function, args, result): | 835 def _handle_ignored(_ppid, pid, function, args, result): |
818 logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) | 836 logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) |
819 | 837 |
820 @staticmethod | 838 @staticmethod |
821 def clean_trace(logname): | 839 def clean_trace(logname): |
822 """Deletes the old log.""" | |
823 if os.path.isfile(logname): | 840 if os.path.isfile(logname): |
824 os.remove(logname) | 841 os.remove(logname) |
825 | 842 |
826 @classmethod | 843 @classmethod |
827 def gen_trace(cls, cmd, cwd, logname, output): | 844 def gen_trace(cls, cmd, cwd, logname, output): |
828 """Runs dtrace on an executable.""" | 845 """Runs dtrace on an executable. |
| 846 |
| 847 This dtruss is broken when it starts the process itself or when tracing |
| 848 child processes, this code starts a wrapper process trace_child_process.py, |
| 849 which waits for dtrace to start, then trace_child_process.py starts the |
| 850 executable to trace. |
| 851 """ |
829 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | 852 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) |
830 logging.info('Running: %s' % cmd) | 853 logging.info('Running: %s' % cmd) |
831 signal = 'Go!' | 854 signal = 'Go!' |
832 logging.debug('Our pid: %d' % os.getpid()) | 855 logging.debug('Our pid: %d' % os.getpid()) |
833 | 856 |
834 # Part 1: start the child process. | 857 # Part 1: start the child process. |
835 stdout = stderr = None | 858 stdout = stderr = None |
836 if output: | 859 if output: |
837 stdout = subprocess.PIPE | 860 stdout = subprocess.PIPE |
838 stderr = subprocess.STDOUT | 861 stderr = subprocess.STDOUT |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
891 cls._sort_log(logname) | 914 cls._sort_log(logname) |
892 except KeyboardInterrupt: | 915 except KeyboardInterrupt: |
893 # Still sort when testing. | 916 # Still sort when testing. |
894 cls._sort_log(logname) | 917 cls._sort_log(logname) |
895 raise | 918 raise |
896 | 919 |
897 return dtrace.returncode or child.returncode, out | 920 return dtrace.returncode or child.returncode, out |
898 | 921 |
899 @classmethod | 922 @classmethod |
900 def parse_log(cls, filename, blacklist): | 923 def parse_log(cls, filename, blacklist): |
901 """Processes a dtrace log and returns the files opened and the files that do | |
902 not exist. | |
903 | |
904 It does not track directories. | |
905 | |
906 Most of the time, files that do not exist are temporary test files that | |
907 should be put in /tmp instead. See http://crbug.com/116251 | |
908 | |
909 Returns a tuple (existing files, non existing files, nb_processes_created) | |
910 """ | |
911 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 924 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
912 context = cls.Context(blacklist) | 925 context = cls.Context(blacklist) |
913 for line in open(filename, 'rb'): | 926 for line in open(filename, 'rb'): |
914 context.on_line(line) | 927 context.on_line(line) |
915 # Resolve any symlink we hit. | 928 # Resolve any symlink we hit. |
916 return ( | 929 return ( |
917 set(os.path.realpath(f) for f in context.files), | 930 set(os.path.realpath(f) for f in context.files), |
918 set(os.path.realpath(f) for f in context.non_existent), | 931 set(os.path.realpath(f) for f in context.non_existent), |
919 context.nb_processes) | 932 context.nb_processes) |
920 | 933 |
921 @staticmethod | 934 @staticmethod |
922 def _sort_log(logname): | 935 def _sort_log(logname): |
923 """Sorts the log back in order when each call occured. | 936 """Sorts the log back in order when each call occured. |
924 | 937 |
925 dtrace doesn't save the buffer in strict order since it keeps one buffer per | 938 dtrace doesn't save the buffer in strict order since it keeps one buffer per |
926 CPU. | 939 CPU. |
927 """ | 940 """ |
928 with open(logname, 'rb') as logfile: | 941 with open(logname, 'rb') as logfile: |
929 lines = [f for f in logfile.readlines() if f.strip()] | 942 lines = [f for f in logfile.readlines() if f.strip()] |
930 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) | 943 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) |
931 with open(logname, 'wb') as logfile: | 944 with open(logname, 'wb') as logfile: |
932 logfile.write(''.join(lines)) | 945 logfile.write(''.join(lines)) |
933 | 946 |
934 | 947 |
935 class LogmanTrace(object): | 948 class LogmanTrace(ApiBase): |
936 """Uses the native Windows ETW based tracing functionality to trace a child | 949 """Uses the native Windows ETW based tracing functionality to trace a child |
937 process. | 950 process. |
938 """ | 951 """ |
939 class Context(object): | 952 class Context(object): |
940 """Processes a ETW log line and keeps the list of existent and non | 953 """Processes a ETW log line and keeps the list of existent and non |
941 existent files accessed. | 954 existent files accessed. |
942 | 955 |
943 Ignores directories. | 956 Ignores directories. |
944 """ | 957 """ |
945 | 958 |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1111 filename in self.files or | 1124 filename in self.files or |
1112 filename in self.non_existent): | 1125 filename in self.non_existent): |
1113 return | 1126 return |
1114 logging.debug('_handle_file(%s)' % filename) | 1127 logging.debug('_handle_file(%s)' % filename) |
1115 if os.path.isfile(filename): | 1128 if os.path.isfile(filename): |
1116 self.files.add(filename) | 1129 self.files.add(filename) |
1117 else: | 1130 else: |
1118 self.non_existent.add(filename) | 1131 self.non_existent.add(filename) |
1119 | 1132 |
1120 def __init__(self): | 1133 def __init__(self): |
| 1134 super(LogmanTrace, self).__init__() |
1121 # Most ignores need to be determined at runtime. | 1135 # Most ignores need to be determined at runtime. |
1122 self.IGNORED = set([os.path.dirname(sys.executable)]) | 1136 self.IGNORED = set([os.path.dirname(sys.executable)]) |
1123 # Add many directories from environment variables. | 1137 # Add many directories from environment variables. |
1124 vars_to_ignore = ( | 1138 vars_to_ignore = ( |
1125 'APPDATA', | 1139 'APPDATA', |
1126 'LOCALAPPDATA', | 1140 'LOCALAPPDATA', |
1127 'ProgramData', | 1141 'ProgramData', |
1128 'ProgramFiles', | 1142 'ProgramFiles', |
1129 'ProgramFiles(x86)', | 1143 'ProgramFiles(x86)', |
1130 'ProgramW6432', | 1144 'ProgramW6432', |
1131 'SystemRoot', | 1145 'SystemRoot', |
1132 'TEMP', | 1146 'TEMP', |
1133 'TMP', | 1147 'TMP', |
1134 ) | 1148 ) |
1135 for i in vars_to_ignore: | 1149 for i in vars_to_ignore: |
1136 if os.environ.get(i): | 1150 if os.environ.get(i): |
1137 self.IGNORED.add(os.environ[i]) | 1151 self.IGNORED.add(os.environ[i]) |
1138 | 1152 |
1139 # Also add their short path name equivalents. | 1153 # Also add their short path name equivalents. |
1140 for i in list(self.IGNORED): | 1154 for i in list(self.IGNORED): |
1141 self.IGNORED.add(GetShortPathName(i)) | 1155 self.IGNORED.add(GetShortPathName(i)) |
1142 | 1156 |
1143 # Add this one last since it has no short path name equivalent. | 1157 # Add this one last since it has no short path name equivalent. |
1144 self.IGNORED.add('\\systemroot') | 1158 self.IGNORED.add('\\systemroot') |
1145 self.IGNORED = tuple(sorted(self.IGNORED)) | 1159 self.IGNORED = tuple(sorted(self.IGNORED)) |
1146 | 1160 |
1147 @staticmethod | 1161 @staticmethod |
1148 def clean_trace(logname): | 1162 def clean_trace(logname): |
1149 """Deletes the old log.""" | |
1150 if os.path.isfile(logname): | 1163 if os.path.isfile(logname): |
1151 os.remove(logname) | 1164 os.remove(logname) |
1152 if os.path.isfile(logname + '.etl'): | 1165 if os.path.isfile(logname + '.etl'): |
1153 os.remove(logname + '.etl') | 1166 os.remove(logname + '.etl') |
1154 | 1167 |
1155 @classmethod | 1168 @classmethod |
1156 def gen_trace(cls, cmd, cwd, logname, output): | 1169 def gen_trace(cls, cmd, cwd, logname, output): |
| 1170 """Uses logman.exe to start and stop the NT Kernel Logger while the |
| 1171 executable to be traced is run. |
| 1172 """ |
1157 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | 1173 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) |
1158 # Use "logman -?" for help. | 1174 # Use "logman -?" for help. |
1159 | 1175 |
1160 etl = logname + '.etl' | 1176 etl = logname + '.etl' |
1161 | 1177 |
1162 stdout = stderr = None | 1178 stdout = stderr = None |
1163 if output: | 1179 if output: |
1164 stdout = subprocess.PIPE | 1180 stdout = subprocess.PIPE |
1165 stderr = subprocess.STDOUT | 1181 stderr = subprocess.STDOUT |
1166 | 1182 |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1234 else: | 1250 else: |
1235 assert False, logformat | 1251 assert False, logformat |
1236 logging.debug('Running: %s' % cmd_convert) | 1252 logging.debug('Running: %s' % cmd_convert) |
1237 subprocess.check_call( | 1253 subprocess.check_call( |
1238 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | 1254 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
1239 | 1255 |
1240 return child.returncode, out | 1256 return child.returncode, out |
1241 | 1257 |
1242 @classmethod | 1258 @classmethod |
1243 def parse_log(cls, filename, blacklist): | 1259 def parse_log(cls, filename, blacklist): |
1244 """Processes a ETL log and returns the files opened and the files that do | |
1245 not exist. | |
1246 | |
1247 It does not track directories. | |
1248 | |
1249 Most of the time, files that do not exist are temporary test files that | |
1250 should be put in /tmp instead. See http://crbug.com/116251 | |
1251 | |
1252 Returns a tuple (existing files, non existing files, nb_processes_created) | |
1253 """ | |
1254 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1260 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1255 | 1261 |
1256 # Auto-detect the log format | 1262 # Auto-detect the log format |
1257 with open(filename, 'rb') as f: | 1263 with open(filename, 'rb') as f: |
1258 hdr = f.read(2) | 1264 hdr = f.read(2) |
1259 assert len(hdr) == 2 | 1265 assert len(hdr) == 2 |
1260 if hdr == '<E': | 1266 if hdr == '<E': |
1261 # It starts with <Events> | 1267 # It starts with <Events> |
1262 logformat = 'xml' | 1268 logformat = 'xml' |
1263 elif hdr == '\xFF\xEF': | 1269 elif hdr == '\xFF\xEF': |
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1627 os.path.abspath(options.log), | 1633 os.path.abspath(options.log), |
1628 args, | 1634 args, |
1629 options.root_dir, | 1635 options.root_dir, |
1630 options.cwd, | 1636 options.cwd, |
1631 options.product_dir, | 1637 options.product_dir, |
1632 options.force) | 1638 options.force) |
1633 | 1639 |
1634 | 1640 |
1635 if __name__ == '__main__': | 1641 if __name__ == '__main__': |
1636 sys.exit(main()) | 1642 sys.exit(main()) |
OLD | NEW |