OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
636 self._initialized = False | 636 self._initialized = False |
637 | 637 |
638 def __enter__(self): | 638 def __enter__(self): |
639 """Enables 'with' statement.""" | 639 """Enables 'with' statement.""" |
640 return self | 640 return self |
641 | 641 |
642 def __exit__(self, exc_type, exc_value, traceback): | 642 def __exit__(self, exc_type, exc_value, traceback): |
643 """Enables 'with' statement.""" | 643 """Enables 'with' statement.""" |
644 self.close() | 644 self.close() |
645 | 645 |
646 def get_tracer(self, logname=None): | 646 def get_tracer(self, logname): |
647 """Returns an ApiBase.Tracer instance. | 647 """Returns an ApiBase.Tracer instance. |
648 | 648 |
649 Initializes the tracing subsystem, which is a requirement for kernel-based | 649 Initializes the tracing subsystem, which is a requirement for kernel-based |
650 tracers. Only one tracer instance should be live at a time! | 650 tracers. Only one tracer instance should be live at a time! |
651 | 651 |
652 logname is the filepath to the json file that will contain the meta-data | 652 logname is the filepath to the json file that will contain the meta-data |
653 about the logs. | 653 about the logs. |
654 """ | 654 """ |
655 return self.Tracer(logname) | 655 return self.Tracer(logname) |
656 | 656 |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
981 | 981 |
982 @classmethod | 982 @classmethod |
983 def traces(cls): | 983 def traces(cls): |
984 """Returns the list of all handled traces to pass this as an argument to | 984 """Returns the list of all handled traces to pass this as an argument to |
985 strace. | 985 strace. |
986 """ | 986 """ |
987 prefix = 'handle_' | 987 prefix = 'handle_' |
988 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] | 988 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
989 | 989 |
990 class Tracer(ApiBase.Tracer): | 990 class Tracer(ApiBase.Tracer): |
991 @classmethod | 991 def trace(self, cmd, cwd, tracename, output): |
992 def trace(cls, cmd, cwd, logname, output): | 992 """Runs strace on an executable.""" |
993 """Runs strace on an executable. | 993 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
994 | |
995 Since the logs are per pid, we need to log the list of the initial pid. | |
996 """ | |
997 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | |
998 stdout = stderr = None | 994 stdout = stderr = None |
999 if output: | 995 if output: |
1000 stdout = subprocess.PIPE | 996 stdout = subprocess.PIPE |
1001 stderr = subprocess.STDOUT | 997 stderr = subprocess.STDOUT |
1002 traces = ','.join(Strace.Context.traces()) | 998 traces = ','.join(Strace.Context.traces()) |
1003 trace_cmd = [ | 999 trace_cmd = [ |
1004 'strace', | 1000 'strace', |
1005 '-ff', | 1001 '-ff', |
1006 '-s', '256', | 1002 '-s', '256', |
1007 '-e', 'trace=%s' % traces, | 1003 '-e', 'trace=%s' % traces, |
1008 '-o', logname, | 1004 '-o', self._logname, |
1009 ] | 1005 ] |
1010 child = subprocess.Popen( | 1006 child = subprocess.Popen( |
1011 trace_cmd + cmd, | 1007 trace_cmd + cmd, |
1012 cwd=cwd, | 1008 cwd=cwd, |
1013 stdin=subprocess.PIPE, | 1009 stdin=subprocess.PIPE, |
1014 stdout=stdout, | 1010 stdout=stdout, |
1015 stderr=stderr) | 1011 stderr=stderr) |
1016 out = child.communicate()[0] | 1012 out = child.communicate()[0] |
1017 # Once it's done, write metadata into the log file to be able to follow | 1013 # Once it's done, write metadata into the log file to be able to follow |
1018 # the pid files. | 1014 # the pid files. |
1019 value = { | 1015 value = { |
1020 'cwd': cwd, | 1016 'cwd': cwd, |
1021 # The pid of strace process, not very useful. | 1017 # The pid of strace process, not very useful. |
1022 'pid': child.pid, | 1018 'pid': child.pid, |
1023 } | 1019 } |
1024 write_json(logname, value, False) | 1020 write_json(self._logname, value, False) |
1025 return child.returncode, out | 1021 return child.returncode, out |
1026 | 1022 |
1027 @staticmethod | 1023 @staticmethod |
1028 def clean_trace(logname): | 1024 def clean_trace(logname): |
1029 if os.path.isfile(logname): | 1025 if os.path.isfile(logname): |
1030 os.remove(logname) | 1026 os.remove(logname) |
1031 # Also delete any pid specific file from previous traces. | 1027 # Also delete any pid specific file from previous traces. |
1032 for i in glob.iglob(logname + '.*'): | 1028 for i in glob.iglob(logname + '.*'): |
1033 if i.rsplit('.', 1)[1].isdigit(): | 1029 if i.rsplit('.', 1)[1].isdigit(): |
1034 os.remove(i) | 1030 os.remove(i) |
(...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1590 - there is no way to figure out the absolute path of cwd in kernel on OSX | 1586 - there is no way to figure out the absolute path of cwd in kernel on OSX |
1591 """ | 1587 """ |
1592 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') | 1588 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') |
1593 return ( | 1589 return ( |
1594 'inline int TRACER_PID = %d;\n' | 1590 'inline int TRACER_PID = %d;\n' |
1595 'inline string CWD = "%s";\n' | 1591 'inline string CWD = "%s";\n' |
1596 '\n' | 1592 '\n' |
1597 '%s\n' | 1593 '%s\n' |
1598 '%s') % (pid, cwd, cls.D_CODE, cls.D_CODE_EXECVE) | 1594 '%s') % (pid, cwd, cls.D_CODE, cls.D_CODE_EXECVE) |
1599 | 1595 |
1600 @classmethod | 1596 def trace(self, cmd, cwd, tracename, output): |
1601 def trace(cls, cmd, cwd, logname, output): | |
1602 """Runs dtrace on an executable. | 1597 """Runs dtrace on an executable. |
1603 | 1598 |
1604 This dtruss is broken when it starts the process itself or when tracing | 1599 This dtruss is broken when it starts the process itself or when tracing |
1605 child processes, this code starts a wrapper process | 1600 child processes, this code starts a wrapper process |
1606 trace_child_process.py, which waits for dtrace to start, then | 1601 trace_child_process.py, which waits for dtrace to start, then |
1607 trace_child_process.py starts the executable to trace. | 1602 trace_child_process.py starts the executable to trace. |
1608 """ | 1603 """ |
1609 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | 1604 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
1610 logging.info('Running: %s' % cmd) | 1605 logging.info('Running: %s' % cmd) |
1611 signal = 'Go!' | 1606 signal = 'Go!' |
1612 logging.debug('Our pid: %d' % os.getpid()) | 1607 logging.debug('Our pid: %d' % os.getpid()) |
1613 | 1608 |
1614 # Part 1: start the child process. | 1609 # Part 1: start the child process. |
1615 stdout = stderr = None | 1610 stdout = stderr = None |
1616 if output: | 1611 if output: |
1617 stdout = subprocess.PIPE | 1612 stdout = subprocess.PIPE |
1618 stderr = subprocess.STDOUT | 1613 stderr = subprocess.STDOUT |
1619 child_cmd = [ | 1614 child_cmd = [ |
(...skipping 11 matching lines...) Expand all Loading... |
1631 | 1626 |
1632 # Part 2: start dtrace process. | 1627 # Part 2: start dtrace process. |
1633 # Note: do not use the -p flag. It's useless if the initial process quits | 1628 # Note: do not use the -p flag. It's useless if the initial process quits |
1634 # too fast, resulting in missing traces from the grand-children. The D | 1629 # too fast, resulting in missing traces from the grand-children. The D |
1635 # code manages the dtrace lifetime itself. | 1630 # code manages the dtrace lifetime itself. |
1636 trace_cmd = [ | 1631 trace_cmd = [ |
1637 'sudo', | 1632 'sudo', |
1638 'dtrace', | 1633 'dtrace', |
1639 '-x', 'dynvarsize=4m', | 1634 '-x', 'dynvarsize=4m', |
1640 '-x', 'evaltime=exec', | 1635 '-x', 'evaltime=exec', |
1641 '-n', cls.code(child.pid, cwd), | 1636 '-n', self.code(child.pid, cwd), |
1642 '-o', '/dev/stderr', | 1637 '-o', '/dev/stderr', |
1643 '-q', | 1638 '-q', |
1644 ] | 1639 ] |
1645 with open(logname, 'w') as logfile: | 1640 with open(self._logname, 'w') as logfile: |
1646 dtrace = subprocess.Popen( | 1641 dtrace = subprocess.Popen( |
1647 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) | 1642 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) |
1648 logging.debug('Started dtrace pid: %d' % dtrace.pid) | 1643 logging.debug('Started dtrace pid: %d' % dtrace.pid) |
1649 | 1644 |
1650 # Part 3: Read until one line is printed, which signifies dtrace is up and | 1645 # Part 3: Read until one line is printed, which signifies dtrace is up and |
1651 # ready. | 1646 # ready. |
1652 with open(logname, 'r') as logfile: | 1647 with open(self._logname, 'r') as logfile: |
1653 while 'dtrace_BEGIN' not in logfile.readline(): | 1648 while 'dtrace_BEGIN' not in logfile.readline(): |
1654 if dtrace.poll() is not None: | 1649 if dtrace.poll() is not None: |
1655 break | 1650 break |
1656 | 1651 |
1657 try: | 1652 try: |
1658 # Part 4: We can now tell our child to go. | 1653 # Part 4: We can now tell our child to go. |
1659 # TODO(maruel): Another pipe than stdin could be used instead. This | 1654 # TODO(maruel): Another pipe than stdin could be used instead. This |
1660 # would be more consistent with the other tracing methods. | 1655 # would be more consistent with the other tracing methods. |
1661 out = child.communicate(signal)[0] | 1656 out = child.communicate(signal)[0] |
1662 | 1657 |
1663 dtrace.wait() | 1658 dtrace.wait() |
1664 if dtrace.returncode != 0: | 1659 if dtrace.returncode != 0: |
1665 print 'dtrace failure: %d' % dtrace.returncode | 1660 print 'dtrace failure: %d' % dtrace.returncode |
1666 with open(logname) as logfile: | 1661 with open(self._logname) as logfile: |
1667 print ''.join(logfile.readlines()[-100:]) | 1662 print ''.join(logfile.readlines()[-100:]) |
1668 # Find a better way. | 1663 # Find a better way. |
1669 os.remove(logname) | 1664 os.remove(self._logname) |
1670 else: | 1665 else: |
1671 # Short the log right away to simplify our life. There isn't much | 1666 # Short the log right away to simplify our life. There isn't much |
1672 # advantage in keeping it out of order. | 1667 # advantage in keeping it out of order. |
1673 cls._sort_log(logname) | 1668 self._sort_log(self._logname) |
1674 except KeyboardInterrupt: | 1669 except KeyboardInterrupt: |
1675 # Still sort when testing. | 1670 # Still sort when testing. |
1676 cls._sort_log(logname) | 1671 self._sort_log(self._logname) |
1677 raise | 1672 raise |
1678 | 1673 |
1679 return dtrace.returncode or child.returncode, out | 1674 return dtrace.returncode or child.returncode, out |
1680 | 1675 |
1681 @staticmethod | 1676 @staticmethod |
1682 def _sort_log(logname): | 1677 def _sort_log(logname): |
1683 """Sorts the log back in order when each call occured. | 1678 """Sorts the log back in order when each call occured. |
1684 | 1679 |
1685 dtrace doesn't save the buffer in strict order since it keeps one buffer | 1680 dtrace doesn't save the buffer in strict order since it keeps one buffer |
1686 per CPU. | 1681 per CPU. |
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2042 def supported_events(cls): | 2037 def supported_events(cls): |
2043 """Returns all the procesed events.""" | 2038 """Returns all the procesed events.""" |
2044 out = [] | 2039 out = [] |
2045 for member in dir(cls): | 2040 for member in dir(cls): |
2046 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member) | 2041 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member) |
2047 if match: | 2042 if match: |
2048 out.append(match.groups()) | 2043 out.append(match.groups()) |
2049 return out | 2044 return out |
2050 | 2045 |
2051 class Tracer(ApiBase.Tracer): | 2046 class Tracer(ApiBase.Tracer): |
2052 @classmethod | 2047 def trace(self, cmd, cwd, tracename, output): |
2053 def trace(cls, cmd, cwd, logname, output): | |
2054 """Uses logman.exe to start and stop the NT Kernel Logger while the | 2048 """Uses logman.exe to start and stop the NT Kernel Logger while the |
2055 executable to be traced is run. | 2049 executable to be traced is run. |
2056 """ | 2050 """ |
2057 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | 2051 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
2058 # Use "logman -?" for help. | 2052 # Use "logman -?" for help. |
2059 | 2053 |
2060 stdout = stderr = None | 2054 stdout = stderr = None |
2061 if output: | 2055 if output: |
2062 stdout = subprocess.PIPE | 2056 stdout = subprocess.PIPE |
2063 stderr = subprocess.STDOUT | 2057 stderr = subprocess.STDOUT |
2064 | 2058 |
2065 # 1. Start the log collection. | 2059 # 1. Start the log collection. |
2066 cls._start_log(logname + '.etl') | 2060 self._start_log(self._logname + '.etl') |
2067 | 2061 |
2068 # 2. Run the child process. | 2062 # 2. Run the child process. |
2069 logging.debug('Running: %s' % cmd) | 2063 logging.debug('Running: %s' % cmd) |
2070 try: | 2064 try: |
2071 # Use trace_child_process.py so we have a clear pid owner. Since | 2065 # Use trace_child_process.py so we have a clear pid owner. Since |
2072 # trace_inputs.py can be used as a library and could trace mulitple | 2066 # trace_inputs.py can be used as a library and could trace mulitple |
2073 # processes simultaneously, it makes it more complex if the executable | 2067 # processes simultaneously, it makes it more complex if the executable |
2074 # to be traced is executed directly here. It also solves issues related | 2068 # to be traced is executed directly here. It also solves issues related |
2075 # to logman.exe that needs to be executed to control the kernel trace. | 2069 # to logman.exe that needs to be executed to control the kernel trace. |
2076 child_cmd = [ | 2070 child_cmd = [ |
2077 sys.executable, | 2071 sys.executable, |
2078 os.path.join(BASE_DIR, 'trace_child_process.py'), | 2072 os.path.join(BASE_DIR, 'trace_child_process.py'), |
2079 ] | 2073 ] |
2080 child = subprocess.Popen( | 2074 child = subprocess.Popen( |
2081 child_cmd + cmd, | 2075 child_cmd + cmd, |
2082 cwd=cwd, | 2076 cwd=cwd, |
2083 stdin=subprocess.PIPE, | 2077 stdin=subprocess.PIPE, |
2084 stdout=stdout, | 2078 stdout=stdout, |
2085 stderr=stderr) | 2079 stderr=stderr) |
2086 logging.debug('Started child pid: %d' % child.pid) | 2080 logging.debug('Started child pid: %d' % child.pid) |
2087 out = child.communicate()[0] | 2081 out = child.communicate()[0] |
2088 finally: | 2082 finally: |
2089 # 3. Stop the log collection. | 2083 # 3. Stop the log collection. |
2090 cls._stop_log() | 2084 self._stop_log() |
2091 | 2085 |
2092 # 4. Convert log | 2086 # 4. Convert log |
2093 # TODO(maruel): Temporary, remove me. | 2087 # TODO(maruel): Temporary, remove me. |
2094 LogmanTrace.process_log(logname) | 2088 LogmanTrace.process_log(self._logname) |
2095 | 2089 |
2096 # 5. Save metadata. | 2090 # 5. Save metadata. |
2097 value = { | 2091 value = { |
2098 'pid': child.pid, | 2092 'pid': child.pid, |
2099 'format': 'csv', | 2093 'format': 'csv', |
2100 } | 2094 } |
2101 write_json(logname, value, False) | 2095 write_json(self._logname, value, False) |
2102 return child.returncode, out | 2096 return child.returncode, out |
2103 | 2097 |
2104 @classmethod | 2098 @classmethod |
2105 def _start_log(cls, etl): | 2099 def _start_log(cls, etl): |
2106 """Starts the log collection. | 2100 """Starts the log collection. |
2107 | 2101 |
2108 One can get the list of potentially interesting providers with: | 2102 One can get the list of potentially interesting providers with: |
2109 "logman query providers | findstr /i file" | 2103 "logman query providers | findstr /i file" |
2110 """ | 2104 """ |
2111 cmd_start = [ | 2105 cmd_start = [ |
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2433 Arguments: | 2427 Arguments: |
2434 - logfile: file to write to. | 2428 - logfile: file to write to. |
2435 - cmd: command to run. | 2429 - cmd: command to run. |
2436 - cwd: current directory to start the process in. | 2430 - cwd: current directory to start the process in. |
2437 - api: a tracing api instance. | 2431 - api: a tracing api instance. |
2438 - output: if True, returns output, otherwise prints it at the console. | 2432 - output: if True, returns output, otherwise prints it at the console. |
2439 """ | 2433 """ |
2440 cmd = fix_python_path(cmd) | 2434 cmd = fix_python_path(cmd) |
2441 assert os.path.isabs(cmd[0]), cmd[0] | 2435 assert os.path.isabs(cmd[0]), cmd[0] |
2442 api.clean_trace(logfile) | 2436 api.clean_trace(logfile) |
2443 return api.get_tracer().trace(cmd, cwd, logfile, output) | 2437 with api.get_tracer(logfile) as tracer: |
| 2438 return tracer.trace(cmd, cwd, 'default', output) |
2444 | 2439 |
2445 | 2440 |
2446 def load_trace(logfile, root_dir, api): | 2441 def load_trace(logfile, root_dir, api): |
2447 """Loads a trace file and returns the Results instance. | 2442 """Loads a trace file and returns the Results instance. |
2448 | 2443 |
2449 Arguments: | 2444 Arguments: |
2450 - logfile: File to load. | 2445 - logfile: File to load. |
2451 - root_dir: Root directory to use to determine if a file is relevant to the | 2446 - root_dir: Root directory to use to determine if a file is relevant to the |
2452 trace or not. | 2447 trace or not. |
2453 - api: A tracing api instance. | 2448 - api: A tracing api instance. |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2610 for fn in dir(sys.modules[__name__]) | 2605 for fn in dir(sys.modules[__name__]) |
2611 if fn.startswith('CMD'))) | 2606 if fn.startswith('CMD'))) |
2612 | 2607 |
2613 command = get_command_handler(argv[0] if argv else None) | 2608 command = get_command_handler(argv[0] if argv else None) |
2614 parser = gen_parser(command) | 2609 parser = gen_parser(command) |
2615 return command(parser, argv[1:]) | 2610 return command(parser, argv[1:]) |
2616 | 2611 |
2617 | 2612 |
2618 if __name__ == '__main__': | 2613 if __name__ == '__main__': |
2619 sys.exit(main(sys.argv[1:])) | 2614 sys.exit(main(sys.argv[1:])) |
OLD | NEW |