Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 10772002: Initial refactoring to eventually support touch-only access. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/isolate/data/trace_inputs/touch_only.py ('k') | tools/isolate/trace_inputs_smoke_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding=utf-8 2 # coding=utf-8
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """Traces an executable and its child processes and extract the files accessed 7 """Traces an executable and its child processes and extract the files accessed
8 by them. 8 by them.
9 9
10 The implementation uses OS-specific API. The native Kernel logger and the ETL 10 The implementation uses OS-specific API. The native Kernel logger and the ETL
(...skipping 551 matching lines...) Expand 10 before | Expand all | Expand 10 after
562 json.dump(data, f, separators=(',',':')) 562 json.dump(data, f, separators=(',',':'))
563 else: 563 else:
564 json.dump(data, f, sort_keys=True, indent=2) 564 json.dump(data, f, sort_keys=True, indent=2)
565 565
566 566
567 class Results(object): 567 class Results(object):
568 """Results of a trace session.""" 568 """Results of a trace session."""
569 569
570 class _TouchedObject(object): 570 class _TouchedObject(object):
571 """Something, a file or a directory, that was accessed.""" 571 """Something, a file or a directory, that was accessed."""
572 def __init__(self, root, path, tainted): 572 def __init__(self, root, path, tainted, size, nb_files):
573 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) 573 logging.debug(
574 '%s(%s, %s, %s, %s, %s)' %
575 (self.__class__.__name__, root, path, tainted, size, nb_files))
574 self.root = root 576 self.root = root
575 self.path = path 577 self.path = path
576 self.tainted = tainted 578 self.tainted = tainted
579 self.nb_files = nb_files
580 # Can be used as a cache or a default value, depending on context.
581 self._size = size
577 # These are cache only. 582 # These are cache only.
578 self._real_path = None 583 self._real_path = None
579 self._size = None
580 584
581 # Check internal consistency. 585 # Check internal consistency.
582 assert path, path 586 assert path, path
583 assert tainted or bool(root) != bool(isabs(path)), (root, path) 587 assert tainted or bool(root) != bool(isabs(path)), (root, path)
584 assert tainted or ( 588 assert tainted or (
585 not os.path.exists(self.full_path) or 589 not os.path.exists(self.full_path) or
586 (self.full_path == get_native_path_case(self.full_path))), ( 590 (self.full_path == get_native_path_case(self.full_path))), (
587 tainted, self.full_path, get_native_path_case(self.full_path)) 591 tainted, self.full_path, get_native_path_case(self.full_path))
588 592
589 @property 593 @property
(...skipping 17 matching lines...) Expand all
607 def size(self): 611 def size(self):
608 """File's size. -1 is not existent.""" 612 """File's size. -1 is not existent."""
609 if self._size is None and not self.tainted: 613 if self._size is None and not self.tainted:
610 try: 614 try:
611 self._size = os.stat(self.full_path).st_size 615 self._size = os.stat(self.full_path).st_size
612 except OSError: 616 except OSError:
613 self._size = -1 617 self._size = -1
614 return self._size 618 return self._size
615 619
616 def flatten(self): 620 def flatten(self):
617 """Returns a dict representing this object.""" 621 """Returns a dict representing this object.
622
623 A 'size' of 0 means the file was only touched and not read.
624 """
618 return { 625 return {
619 'path': self.path, 626 'path': self.path,
620 'size': self.size, 627 'size': self.size,
621 } 628 }
622 629
623 def replace_variables(self, variables): 630 def replace_variables(self, variables):
624 """Replaces the root of this File with one of the variables if it matches. 631 """Replaces the root of this File with one of the variables if it matches.
625 632
626 If a variable replacement occurs, the cloned object becomes tainted. 633 If a variable replacement occurs, the cloned object becomes tainted.
627 """ 634 """
(...skipping 15 matching lines...) Expand all
643 return None 650 return None
644 path = self.real_path 651 path = self.real_path
645 else: 652 else:
646 path = self.full_path 653 path = self.full_path
647 return self._clone(root, path[len(root):], self.tainted) 654 return self._clone(root, path[len(root):], self.tainted)
648 655
649 def _clone(self, new_root, new_path, tainted): 656 def _clone(self, new_root, new_path, tainted):
650 raise NotImplementedError(self.__class__.__name__) 657 raise NotImplementedError(self.__class__.__name__)
651 658
652 class File(_TouchedObject): 659 class File(_TouchedObject):
653 """A file that was accessed. 660 """A file that was accessed. May not be present anymore.
654 661
655 If tainted is true, it means it is not a real path anymore as a variable 662 If tainted is true, it means it is not a real path anymore as a variable
656 replacement occured. 663 replacement occured.
664
665 If touched_only is True, this means the file was probed for existence, and
666 it is existent, but was never _opened_. If touched_only is True, the file
667 must have existed.
657 """ 668 """
658 def __init__(self, root, path, tainted): 669 def __init__(self, root, path, tainted, size):
659 """Represents a file accessed. May not be present anymore.""" 670 super(Results.File, self).__init__(root, path, tainted, size, 1)
660 super(Results.File, self).__init__(root, path, tainted)
661 # For compatibility with Directory object interface.
662 # Shouldn't be used normally, only exists to simplify algorithms.
663 self.nb_files = 1
664 671
665 def _clone(self, new_root, new_path, tainted): 672 def _clone(self, new_root, new_path, tainted):
666 """Clones itself keeping meta-data.""" 673 """Clones itself keeping meta-data."""
667 out = self.__class__(new_root, new_path, tainted) 674 # Keep the self.size and self._real_path caches for performance reason. It
668 # Keep the cache for performance reason. It is also important when the 675 # is also important when the file becomes tainted (with a variable instead
669 # file becomes tainted (with a variable instead of the real path) since 676 # of the real path) since self.path is not an on-disk path anymore so
670 # self.path is not an on-disk path anymore so out._size cannot be updated. 677 # out._size cannot be updated.
671 out._size = self.size 678 out = self.__class__(new_root, new_path, tainted, self.size)
672 out._real_path = self._real_path 679 out._real_path = self._real_path
673 return out 680 return out
674 681
675 class Directory(_TouchedObject): 682 class Directory(_TouchedObject):
676 """A directory of files. Must exist.""" 683 """A directory of files. Must exist."""
677 def __init__(self, root, path, tainted, size, nb_files): 684 def __init__(self, root, path, tainted, size, nb_files):
678 """path='.' is a valid value and must be handled appropriately.""" 685 """path='.' is a valid value and must be handled appropriately."""
679 assert not path.endswith(os.path.sep), path 686 assert not path.endswith(os.path.sep), path
680 super(Results.Directory, self).__init__(root, path + os.path.sep, tainted) 687 super(Results.Directory, self).__init__(
681 self.nb_files = nb_files 688 root, path + os.path.sep, tainted, size, nb_files)
682 # In that case, it's not a cache, it's an actual value that is never 689 # In that case, it's not a cache, it's an actual value that is never
683 # modified. 690 # modified and represents the total size of the files contained in this
691 # directory.
684 assert size 692 assert size
685 self._size = size
686 693
687 def flatten(self): 694 def flatten(self):
688 out = super(Results.Directory, self).flatten() 695 out = super(Results.Directory, self).flatten()
689 out['nb_files'] = self.nb_files 696 out['nb_files'] = self.nb_files
690 return out 697 return out
691 698
692 def _clone(self, new_root, new_path, tainted): 699 def _clone(self, new_root, new_path, tainted):
693 """Clones itself keeping meta-data.""" 700 """Clones itself keeping meta-data."""
694 out = self.__class__( 701 out = self.__class__(
695 new_root, 702 new_root,
696 new_path.rstrip(os.path.sep), 703 new_path.rstrip(os.path.sep),
697 tainted, 704 tainted,
698 self.size, 705 self.size,
699 self.nb_files) 706 self.nb_files)
700 out._real_path = self._real_path 707 out._real_path = self._real_path
701 return out 708 return out
702 709
703 class Process(object): 710 class Process(object):
704 """A process that was traced. 711 """A process that was traced.
705 712
706 Contains references to the files accessed by this process and its children. 713 Contains references to the files accessed by this process and its children.
707 """ 714 """
708 def __init__( 715 def __init__(self, pid, files, executable, command, initial_cwd, children):
709 self, pid, files, executable, command, initial_cwd, children):
710 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) 716 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
711 self.pid = pid 717 self.pid = pid
712 self.files = sorted( 718 self.files = sorted(files, key=lambda x: x.path)
713 (Results.File(None, f, False) for f in files), key=lambda x: x.path)
714 self.children = children 719 self.children = children
715 self.executable = executable 720 self.executable = executable
716 self.command = command 721 self.command = command
717 self.initial_cwd = initial_cwd 722 self.initial_cwd = initial_cwd
718 723
719 # Check internal consistency. 724 # Check internal consistency.
720 assert len(set(f.path for f in self.files)) == len(self.files), [ 725 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
721 f.path for f in self.files] 726 f.path for f in self.files)
722 assert isinstance(self.children, list) 727 assert isinstance(self.children, list)
723 assert isinstance(self.files, list) 728 assert isinstance(self.files, list)
724 729
725 @property 730 @property
726 def all(self): 731 def all(self):
727 for child in self.children: 732 for child in self.children:
728 for i in child.all: 733 for i in child.all:
729 yield i 734 yield i
730 yield self 735 yield self
731 736
732 def flatten(self): 737 def flatten(self):
733 return { 738 return {
734 'children': [c.flatten() for c in self.children], 739 'children': [c.flatten() for c in self.children],
735 'command': self.command, 740 'command': self.command,
736 'executable': self.executable, 741 'executable': self.executable,
737 'files': [f.flatten() for f in self.files], 742 'files': [f.flatten() for f in self.files],
738 'initial_cwd': self.initial_cwd, 743 'initial_cwd': self.initial_cwd,
739 'pid': self.pid, 744 'pid': self.pid,
740 } 745 }
741 746
742 def strip_root(self, root): 747 def strip_root(self, root):
743 assert isabs(root) and root.endswith(os.path.sep), root 748 assert isabs(root) and root.endswith(os.path.sep), root
749 # Loads the files after since they are constructed as objects.
744 out = self.__class__( 750 out = self.__class__(
745 self.pid, 751 self.pid,
746 [], 752 filter(None, (f.strip_root(root) for f in self.files)),
747 self.executable, 753 self.executable,
748 self.command, 754 self.command,
749 self.initial_cwd, 755 self.initial_cwd,
750 [c.strip_root(root) for c in self.children]) 756 [c.strip_root(root) for c in self.children])
751 # Override the files property.
752 out.files = filter(None, (f.strip_root(root) for f in self.files))
753 logging.debug( 757 logging.debug(
754 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) 758 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
755 return out 759 return out
756 760
757
758 def __init__(self, process): 761 def __init__(self, process):
759 self.process = process 762 self.process = process
760 # Cache. 763 # Cache.
761 self._files = None 764 self._files = None
762 765
763 def flatten(self): 766 def flatten(self):
764 return { 767 return {
765 'root': self.process.flatten(), 768 'root': self.process.flatten(),
766 } 769 }
767 770
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
808 assert isinstance(root, ApiBase.Context) 811 assert isinstance(root, ApiBase.Context)
809 assert isinstance(pid, int), repr(pid) 812 assert isinstance(pid, int), repr(pid)
810 self.root = weakref.ref(root) 813 self.root = weakref.ref(root)
811 self.pid = pid 814 self.pid = pid
812 # Children are pids. 815 # Children are pids.
813 self.children = [] 816 self.children = []
814 self.parentid = parentid 817 self.parentid = parentid
815 self.initial_cwd = initial_cwd 818 self.initial_cwd = initial_cwd
816 self.cwd = None 819 self.cwd = None
817 self.files = set() 820 self.files = set()
821 self.only_touched = set()
818 self.executable = None 822 self.executable = None
819 self.command = None 823 self.command = None
820 824
821 if parentid: 825 if parentid:
822 self.root().processes[parentid].children.append(pid) 826 self.root().processes[parentid].children.append(pid)
823 827
824 def to_results_process(self): 828 def to_results_process(self):
825 """Resolves file case sensitivity and or late-bound strings.""" 829 """Resolves file case sensitivity and or late-bound strings."""
826 children = [ 830 children = [
827 self.root().processes[c].to_results_process() for c in self.children 831 self.root().processes[c].to_results_process() for c in self.children
828 ] 832 ]
829 # When resolving files, it's normal to get dupe because a file could be 833 # When resolving files, it's normal to get dupe because a file could be
830 # opened multiple times with different case. Resolve the deduplication 834 # opened multiple times with different case. Resolve the deduplication
831 # here. 835 # here.
832 def render_to_string_and_fix_case(x): 836 def render_to_string_and_fix_case(x):
833 """Returns the native file path case if the file exists. 837 """Returns the native file path case if the file exists.
834 838
835 Converts late-bound strings. 839 Converts late-bound strings.
836 """ 840 """
837 if not x: 841 if not x:
838 return x 842 return x
839 # TODO(maruel): Do not upconvert to unicode here, on linux we don't 843 # TODO(maruel): Do not upconvert to unicode here, on linux we don't
840 # know the file path encoding so they must be treated as bytes. 844 # know the file path encoding so they must be treated as bytes.
841 x = unicode(x) 845 x = unicode(x)
842 if not os.path.exists(x): 846 if not os.path.exists(x):
843 return x 847 return x
844 return get_native_path_case(x) 848 return get_native_path_case(x)
845 849
850 # Filters out directories. Some may have passed through.
851 files = set(map(render_to_string_and_fix_case, self.files))
852 only_touched = set(
853 map(render_to_string_and_fix_case, self.only_touched))
854 only_touched -= files
855
846 files = [ 856 files = [
847 f for f in set(map(render_to_string_and_fix_case, self.files)) 857 Results.File(None, f, False, None) for f in files
848 if not os.path.isdir(f) 858 if not os.path.isdir(f)
849 ] 859 ]
860 # Using 0 as size means the file's content is ignored since the file was
861 # never opened for I/O.
862 files.extend(
863 Results.File(None, f, False, 0) for f in only_touched
864 if not os.path.isdir(f)
865 )
850 return Results.Process( 866 return Results.Process(
851 self.pid, 867 self.pid,
852 files, 868 files,
853 render_to_string_and_fix_case(self.executable), 869 render_to_string_and_fix_case(self.executable),
854 self.command, 870 self.command,
855 render_to_string_and_fix_case(self.initial_cwd), 871 render_to_string_and_fix_case(self.initial_cwd),
856 children) 872 children)
857 873
858 def add_file(self, filepath): 874 def add_file(self, filepath):
859 if self.root().blacklist(unicode(filepath)): 875 if self.root().blacklist(unicode(filepath)):
(...skipping 2229 matching lines...) Expand 10 before | Expand all | Expand 10 after
3089 return command(argv[1:]) 3105 return command(argv[1:])
3090 except TracingFailure, e: 3106 except TracingFailure, e:
3091 sys.stderr.write('\nError: ') 3107 sys.stderr.write('\nError: ')
3092 sys.stderr.write(str(e)) 3108 sys.stderr.write(str(e))
3093 sys.stderr.write('\n') 3109 sys.stderr.write('\n')
3094 return 1 3110 return 1
3095 3111
3096 3112
3097 if __name__ == '__main__': 3113 if __name__ == '__main__':
3098 sys.exit(main(sys.argv[1:])) 3114 sys.exit(main(sys.argv[1:]))
OLDNEW
« no previous file with comments | « tools/isolate/data/trace_inputs/touch_only.py ('k') | tools/isolate/trace_inputs_smoke_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698