OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 551 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
562 json.dump(data, f, separators=(',',':')) | 562 json.dump(data, f, separators=(',',':')) |
563 else: | 563 else: |
564 json.dump(data, f, sort_keys=True, indent=2) | 564 json.dump(data, f, sort_keys=True, indent=2) |
565 | 565 |
566 | 566 |
567 class Results(object): | 567 class Results(object): |
568 """Results of a trace session.""" | 568 """Results of a trace session.""" |
569 | 569 |
570 class _TouchedObject(object): | 570 class _TouchedObject(object): |
571 """Something, a file or a directory, that was accessed.""" | 571 """Something, a file or a directory, that was accessed.""" |
572 def __init__(self, root, path, tainted): | 572 def __init__(self, root, path, tainted, size, nb_files): |
573 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) | 573 logging.debug( |
| 574 '%s(%s, %s, %s, %s, %s)' % |
| 575 (self.__class__.__name__, root, path, tainted, size, nb_files)) |
574 self.root = root | 576 self.root = root |
575 self.path = path | 577 self.path = path |
576 self.tainted = tainted | 578 self.tainted = tainted |
| 579 self.nb_files = nb_files |
| 580 # Can be used as a cache or a default value, depending on context. |
| 581 self._size = size |
577 # These are cache only. | 582 # These are cache only. |
578 self._real_path = None | 583 self._real_path = None |
579 self._size = None | |
580 | 584 |
581 # Check internal consistency. | 585 # Check internal consistency. |
582 assert path, path | 586 assert path, path |
583 assert tainted or bool(root) != bool(isabs(path)), (root, path) | 587 assert tainted or bool(root) != bool(isabs(path)), (root, path) |
584 assert tainted or ( | 588 assert tainted or ( |
585 not os.path.exists(self.full_path) or | 589 not os.path.exists(self.full_path) or |
586 (self.full_path == get_native_path_case(self.full_path))), ( | 590 (self.full_path == get_native_path_case(self.full_path))), ( |
587 tainted, self.full_path, get_native_path_case(self.full_path)) | 591 tainted, self.full_path, get_native_path_case(self.full_path)) |
588 | 592 |
589 @property | 593 @property |
(...skipping 17 matching lines...) Expand all Loading... |
607 def size(self): | 611 def size(self): |
608 """File's size. -1 is not existent.""" | 612 """File's size. -1 is not existent.""" |
609 if self._size is None and not self.tainted: | 613 if self._size is None and not self.tainted: |
610 try: | 614 try: |
611 self._size = os.stat(self.full_path).st_size | 615 self._size = os.stat(self.full_path).st_size |
612 except OSError: | 616 except OSError: |
613 self._size = -1 | 617 self._size = -1 |
614 return self._size | 618 return self._size |
615 | 619 |
616 def flatten(self): | 620 def flatten(self): |
617 """Returns a dict representing this object.""" | 621 """Returns a dict representing this object. |
| 622 |
| 623 A 'size' of 0 means the file was only touched and not read. |
| 624 """ |
618 return { | 625 return { |
619 'path': self.path, | 626 'path': self.path, |
620 'size': self.size, | 627 'size': self.size, |
621 } | 628 } |
622 | 629 |
623 def replace_variables(self, variables): | 630 def replace_variables(self, variables): |
624 """Replaces the root of this File with one of the variables if it matches. | 631 """Replaces the root of this File with one of the variables if it matches. |
625 | 632 |
626 If a variable replacement occurs, the cloned object becomes tainted. | 633 If a variable replacement occurs, the cloned object becomes tainted. |
627 """ | 634 """ |
(...skipping 15 matching lines...) Expand all Loading... |
643 return None | 650 return None |
644 path = self.real_path | 651 path = self.real_path |
645 else: | 652 else: |
646 path = self.full_path | 653 path = self.full_path |
647 return self._clone(root, path[len(root):], self.tainted) | 654 return self._clone(root, path[len(root):], self.tainted) |
648 | 655 |
649 def _clone(self, new_root, new_path, tainted): | 656 def _clone(self, new_root, new_path, tainted): |
650 raise NotImplementedError(self.__class__.__name__) | 657 raise NotImplementedError(self.__class__.__name__) |
651 | 658 |
652 class File(_TouchedObject): | 659 class File(_TouchedObject): |
653 """A file that was accessed. | 660 """A file that was accessed. May not be present anymore. |
654 | 661 |
655 If tainted is true, it means it is not a real path anymore as a variable | 662 If tainted is true, it means it is not a real path anymore as a variable |
656 replacement occured. | 663 replacement occured. |
| 664 |
| 665 If touched_only is True, this means the file was probed for existence, and |
| 666 it is existent, but was never _opened_. If touched_only is True, the file |
| 667 must have existed. |
657 """ | 668 """ |
658 def __init__(self, root, path, tainted): | 669 def __init__(self, root, path, tainted, size): |
659 """Represents a file accessed. May not be present anymore.""" | 670 super(Results.File, self).__init__(root, path, tainted, size, 1) |
660 super(Results.File, self).__init__(root, path, tainted) | |
661 # For compatibility with Directory object interface. | |
662 # Shouldn't be used normally, only exists to simplify algorithms. | |
663 self.nb_files = 1 | |
664 | 671 |
665 def _clone(self, new_root, new_path, tainted): | 672 def _clone(self, new_root, new_path, tainted): |
666 """Clones itself keeping meta-data.""" | 673 """Clones itself keeping meta-data.""" |
667 out = self.__class__(new_root, new_path, tainted) | 674 # Keep the self.size and self._real_path caches for performance reason. It |
668 # Keep the cache for performance reason. It is also important when the | 675 # is also important when the file becomes tainted (with a variable instead |
669 # file becomes tainted (with a variable instead of the real path) since | 676 # of the real path) since self.path is not an on-disk path anymore so |
670 # self.path is not an on-disk path anymore so out._size cannot be updated. | 677 # out._size cannot be updated. |
671 out._size = self.size | 678 out = self.__class__(new_root, new_path, tainted, self.size) |
672 out._real_path = self._real_path | 679 out._real_path = self._real_path |
673 return out | 680 return out |
674 | 681 |
675 class Directory(_TouchedObject): | 682 class Directory(_TouchedObject): |
676 """A directory of files. Must exist.""" | 683 """A directory of files. Must exist.""" |
677 def __init__(self, root, path, tainted, size, nb_files): | 684 def __init__(self, root, path, tainted, size, nb_files): |
678 """path='.' is a valid value and must be handled appropriately.""" | 685 """path='.' is a valid value and must be handled appropriately.""" |
679 assert not path.endswith(os.path.sep), path | 686 assert not path.endswith(os.path.sep), path |
680 super(Results.Directory, self).__init__(root, path + os.path.sep, tainted) | 687 super(Results.Directory, self).__init__( |
681 self.nb_files = nb_files | 688 root, path + os.path.sep, tainted, size, nb_files) |
682 # In that case, it's not a cache, it's an actual value that is never | 689 # In that case, it's not a cache, it's an actual value that is never |
683 # modified. | 690 # modified and represents the total size of the files contained in this |
| 691 # directory. |
684 assert size | 692 assert size |
685 self._size = size | |
686 | 693 |
687 def flatten(self): | 694 def flatten(self): |
688 out = super(Results.Directory, self).flatten() | 695 out = super(Results.Directory, self).flatten() |
689 out['nb_files'] = self.nb_files | 696 out['nb_files'] = self.nb_files |
690 return out | 697 return out |
691 | 698 |
692 def _clone(self, new_root, new_path, tainted): | 699 def _clone(self, new_root, new_path, tainted): |
693 """Clones itself keeping meta-data.""" | 700 """Clones itself keeping meta-data.""" |
694 out = self.__class__( | 701 out = self.__class__( |
695 new_root, | 702 new_root, |
696 new_path.rstrip(os.path.sep), | 703 new_path.rstrip(os.path.sep), |
697 tainted, | 704 tainted, |
698 self.size, | 705 self.size, |
699 self.nb_files) | 706 self.nb_files) |
700 out._real_path = self._real_path | 707 out._real_path = self._real_path |
701 return out | 708 return out |
702 | 709 |
703 class Process(object): | 710 class Process(object): |
704 """A process that was traced. | 711 """A process that was traced. |
705 | 712 |
706 Contains references to the files accessed by this process and its children. | 713 Contains references to the files accessed by this process and its children. |
707 """ | 714 """ |
708 def __init__( | 715 def __init__(self, pid, files, executable, command, initial_cwd, children): |
709 self, pid, files, executable, command, initial_cwd, children): | |
710 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) | 716 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) |
711 self.pid = pid | 717 self.pid = pid |
712 self.files = sorted( | 718 self.files = sorted(files, key=lambda x: x.path) |
713 (Results.File(None, f, False) for f in files), key=lambda x: x.path) | |
714 self.children = children | 719 self.children = children |
715 self.executable = executable | 720 self.executable = executable |
716 self.command = command | 721 self.command = command |
717 self.initial_cwd = initial_cwd | 722 self.initial_cwd = initial_cwd |
718 | 723 |
719 # Check internal consistency. | 724 # Check internal consistency. |
720 assert len(set(f.path for f in self.files)) == len(self.files), [ | 725 assert len(set(f.path for f in self.files)) == len(self.files), sorted( |
721 f.path for f in self.files] | 726 f.path for f in self.files) |
722 assert isinstance(self.children, list) | 727 assert isinstance(self.children, list) |
723 assert isinstance(self.files, list) | 728 assert isinstance(self.files, list) |
724 | 729 |
725 @property | 730 @property |
726 def all(self): | 731 def all(self): |
727 for child in self.children: | 732 for child in self.children: |
728 for i in child.all: | 733 for i in child.all: |
729 yield i | 734 yield i |
730 yield self | 735 yield self |
731 | 736 |
732 def flatten(self): | 737 def flatten(self): |
733 return { | 738 return { |
734 'children': [c.flatten() for c in self.children], | 739 'children': [c.flatten() for c in self.children], |
735 'command': self.command, | 740 'command': self.command, |
736 'executable': self.executable, | 741 'executable': self.executable, |
737 'files': [f.flatten() for f in self.files], | 742 'files': [f.flatten() for f in self.files], |
738 'initial_cwd': self.initial_cwd, | 743 'initial_cwd': self.initial_cwd, |
739 'pid': self.pid, | 744 'pid': self.pid, |
740 } | 745 } |
741 | 746 |
742 def strip_root(self, root): | 747 def strip_root(self, root): |
743 assert isabs(root) and root.endswith(os.path.sep), root | 748 assert isabs(root) and root.endswith(os.path.sep), root |
| 749 # Loads the files after since they are constructed as objects. |
744 out = self.__class__( | 750 out = self.__class__( |
745 self.pid, | 751 self.pid, |
746 [], | 752 filter(None, (f.strip_root(root) for f in self.files)), |
747 self.executable, | 753 self.executable, |
748 self.command, | 754 self.command, |
749 self.initial_cwd, | 755 self.initial_cwd, |
750 [c.strip_root(root) for c in self.children]) | 756 [c.strip_root(root) for c in self.children]) |
751 # Override the files property. | |
752 out.files = filter(None, (f.strip_root(root) for f in self.files)) | |
753 logging.debug( | 757 logging.debug( |
754 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) | 758 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) |
755 return out | 759 return out |
756 | 760 |
757 | |
758 def __init__(self, process): | 761 def __init__(self, process): |
759 self.process = process | 762 self.process = process |
760 # Cache. | 763 # Cache. |
761 self._files = None | 764 self._files = None |
762 | 765 |
763 def flatten(self): | 766 def flatten(self): |
764 return { | 767 return { |
765 'root': self.process.flatten(), | 768 'root': self.process.flatten(), |
766 } | 769 } |
767 | 770 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
808 assert isinstance(root, ApiBase.Context) | 811 assert isinstance(root, ApiBase.Context) |
809 assert isinstance(pid, int), repr(pid) | 812 assert isinstance(pid, int), repr(pid) |
810 self.root = weakref.ref(root) | 813 self.root = weakref.ref(root) |
811 self.pid = pid | 814 self.pid = pid |
812 # Children are pids. | 815 # Children are pids. |
813 self.children = [] | 816 self.children = [] |
814 self.parentid = parentid | 817 self.parentid = parentid |
815 self.initial_cwd = initial_cwd | 818 self.initial_cwd = initial_cwd |
816 self.cwd = None | 819 self.cwd = None |
817 self.files = set() | 820 self.files = set() |
| 821 self.only_touched = set() |
818 self.executable = None | 822 self.executable = None |
819 self.command = None | 823 self.command = None |
820 | 824 |
821 if parentid: | 825 if parentid: |
822 self.root().processes[parentid].children.append(pid) | 826 self.root().processes[parentid].children.append(pid) |
823 | 827 |
824 def to_results_process(self): | 828 def to_results_process(self): |
825 """Resolves file case sensitivity and or late-bound strings.""" | 829 """Resolves file case sensitivity and or late-bound strings.""" |
826 children = [ | 830 children = [ |
827 self.root().processes[c].to_results_process() for c in self.children | 831 self.root().processes[c].to_results_process() for c in self.children |
828 ] | 832 ] |
829 # When resolving files, it's normal to get dupe because a file could be | 833 # When resolving files, it's normal to get dupe because a file could be |
830 # opened multiple times with different case. Resolve the deduplication | 834 # opened multiple times with different case. Resolve the deduplication |
831 # here. | 835 # here. |
832 def render_to_string_and_fix_case(x): | 836 def render_to_string_and_fix_case(x): |
833 """Returns the native file path case if the file exists. | 837 """Returns the native file path case if the file exists. |
834 | 838 |
835 Converts late-bound strings. | 839 Converts late-bound strings. |
836 """ | 840 """ |
837 if not x: | 841 if not x: |
838 return x | 842 return x |
839 # TODO(maruel): Do not upconvert to unicode here, on linux we don't | 843 # TODO(maruel): Do not upconvert to unicode here, on linux we don't |
840 # know the file path encoding so they must be treated as bytes. | 844 # know the file path encoding so they must be treated as bytes. |
841 x = unicode(x) | 845 x = unicode(x) |
842 if not os.path.exists(x): | 846 if not os.path.exists(x): |
843 return x | 847 return x |
844 return get_native_path_case(x) | 848 return get_native_path_case(x) |
845 | 849 |
| 850 # Filters out directories. Some may have passed through. |
| 851 files = set(map(render_to_string_and_fix_case, self.files)) |
| 852 only_touched = set( |
| 853 map(render_to_string_and_fix_case, self.only_touched)) |
| 854 only_touched -= files |
| 855 |
846 files = [ | 856 files = [ |
847 f for f in set(map(render_to_string_and_fix_case, self.files)) | 857 Results.File(None, f, False, None) for f in files |
848 if not os.path.isdir(f) | 858 if not os.path.isdir(f) |
849 ] | 859 ] |
| 860 # Using 0 as size means the file's content is ignored since the file was |
| 861 # never opened for I/O. |
| 862 files.extend( |
| 863 Results.File(None, f, False, 0) for f in only_touched |
| 864 if not os.path.isdir(f) |
| 865 ) |
850 return Results.Process( | 866 return Results.Process( |
851 self.pid, | 867 self.pid, |
852 files, | 868 files, |
853 render_to_string_and_fix_case(self.executable), | 869 render_to_string_and_fix_case(self.executable), |
854 self.command, | 870 self.command, |
855 render_to_string_and_fix_case(self.initial_cwd), | 871 render_to_string_and_fix_case(self.initial_cwd), |
856 children) | 872 children) |
857 | 873 |
858 def add_file(self, filepath): | 874 def add_file(self, filepath): |
859 if self.root().blacklist(unicode(filepath)): | 875 if self.root().blacklist(unicode(filepath)): |
(...skipping 2229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3089 return command(argv[1:]) | 3105 return command(argv[1:]) |
3090 except TracingFailure, e: | 3106 except TracingFailure, e: |
3091 sys.stderr.write('\nError: ') | 3107 sys.stderr.write('\nError: ') |
3092 sys.stderr.write(str(e)) | 3108 sys.stderr.write(str(e)) |
3093 sys.stderr.write('\n') | 3109 sys.stderr.write('\n') |
3094 return 1 | 3110 return 1 |
3095 | 3111 |
3096 | 3112 |
3097 if __name__ == '__main__': | 3113 if __name__ == '__main__': |
3098 sys.exit(main(sys.argv[1:])) | 3114 sys.exit(main(sys.argv[1:])) |
OLD | NEW |