OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
376 if dense: | 376 if dense: |
377 json.dump(data, f, separators=(',',':')) | 377 json.dump(data, f, separators=(',',':')) |
378 else: | 378 else: |
379 json.dump(data, f, sort_keys=True, indent=2) | 379 json.dump(data, f, sort_keys=True, indent=2) |
380 | 380 |
381 | 381 |
382 class Results(object): | 382 class Results(object): |
383 """Results of a trace session.""" | 383 """Results of a trace session.""" |
384 | 384 |
385 class File(object): | 385 class File(object): |
386 """A file that was accessed.""" | 386 """A file that was accessed. |
387 def __init__(self, root, path): | 387 |
| 388 If tainted is true, it means it is not a real path anymore as a variable |
| 389 replacement occured. |
| 390 """ |
| 391 def __init__(self, root, path, tainted=False): |
388 """Represents a file accessed. May not be present anymore.""" | 392 """Represents a file accessed. May not be present anymore.""" |
389 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) | 393 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) |
390 self.root = root | 394 self.root = root |
391 self.path = path | 395 self.path = path |
392 | 396 |
| 397 self.tainted = tainted |
393 self._size = None | 398 self._size = None |
394 # For compatibility with Directory object interface. | 399 # For compatibility with Directory object interface. |
395 # Shouldn't be used normally, only exists to simplify algorithms. | 400 # Shouldn't be used normally, only exists to simplify algorithms. |
396 self.nb_files = 1 | 401 self.nb_files = 1 |
397 | 402 |
398 # Check internal consistency. | 403 # Check internal consistency. |
399 assert path, path | 404 assert path, path |
400 assert bool(root) != bool(isabs(path)), (root, path) | 405 assert tainted or bool(root) != bool(isabs(path)), (root, path) |
401 assert ( | 406 assert tainted or ( |
402 not os.path.exists(self.full_path) or | 407 not os.path.exists(self.full_path) or |
403 self.full_path == get_native_path_case(self.full_path)) | 408 self.full_path == get_native_path_case(self.full_path)) |
404 | 409 |
405 @property | 410 @property |
406 def existent(self): | 411 def existent(self): |
407 return self.size != -1 | 412 return self.size != -1 |
408 | 413 |
409 @property | 414 @property |
410 def size(self): | 415 def size(self): |
411 """File's size. -1 is not existent.""" | 416 """File's size. -1 is not existent.""" |
412 if self._size is None: | 417 if self._size is None and not self.tainted: |
413 try: | 418 try: |
414 self._size = os.stat(self.full_path).st_size | 419 self._size = os.stat(self.full_path).st_size |
415 except OSError: | 420 except OSError: |
416 self._size = -1 | 421 self._size = -1 |
417 return self._size | 422 return self._size |
418 | 423 |
419 @property | 424 @property |
420 def full_path(self): | 425 def full_path(self): |
421 if self.root: | 426 if self.root: |
422 return os.path.join(self.root, self.path) | 427 return os.path.join(self.root, self.path) |
423 return self.path | 428 return self.path |
424 | 429 |
425 def flatten(self): | 430 def flatten(self): |
426 return { | 431 return { |
427 'path': self.path, | 432 'path': self.path, |
428 'size': self.size, | 433 'size': self.size, |
429 } | 434 } |
430 | 435 |
431 def strip_root(self, root): | 436 def strip_root(self, root): |
432 """Returns a clone of itself with 'root' stripped off.""" | 437 """Returns a clone of itself with 'root' stripped off.""" |
433 # Check internal consistency. | 438 # Check internal consistency. |
434 assert isabs(root) and root.endswith(os.path.sep), root | 439 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root |
435 if not self.full_path.startswith(root): | 440 if not self.full_path.startswith(root): |
436 return None | 441 return None |
437 out = self.__class__(root, self.full_path[len(root):]) | 442 return self._clone(root, self.full_path[len(root):], self.tainted) |
438 # Keep size cache. | 443 |
439 out._size = self._size | 444 def replace_variables(self, variables): |
| 445 """Replaces the root of this File with one of the variables if it matches. |
| 446 |
| 447 If a variable replacement occurs, the cloned object becomes tainted. |
| 448 """ |
| 449 for variable, root_path in variables.iteritems(): |
| 450 if self.path.startswith(root_path): |
| 451 return self._clone( |
| 452 self.root, variable + self.path[len(root_path):], True) |
| 453 # No need to clone, returns ourself. |
| 454 return self |
| 455 |
| 456 def _clone(self, new_root, new_path, tainted): |
| 457 """Clones itself keeping meta-data.""" |
| 458 out = self.__class__(new_root, new_path, tainted) |
| 459 out._size = self.size |
440 return out | 460 return out |
441 | 461 |
442 class Directory(File): | 462 class Directory(File): |
443 """A directory of files. Must exist.""" | 463 """A directory of files. Must exist.""" |
444 def __init__(self, root, path, size, nb_files): | 464 def __init__(self, root, path, tainted, size, nb_files): |
445 """path='.' is a valid value and must be handled appropriately.""" | 465 """path='.' is a valid value and must be handled appropriately.""" |
446 super(Results.Directory, self).__init__(root, path) | 466 super(Results.Directory, self).__init__(root, path, tainted) |
447 assert not self.path.endswith(os.path.sep) | 467 assert not self.path.endswith(os.path.sep), self.path |
448 self.path = self.path + os.path.sep | 468 self.path = self.path + os.path.sep |
449 self.nb_files = nb_files | 469 self.nb_files = nb_files |
450 self._size = size | 470 self._size = size |
451 | 471 |
452 def flatten(self): | 472 def flatten(self): |
453 out = super(Results.Directory, self).flatten() | 473 out = super(Results.Directory, self).flatten() |
454 out['nb_files'] = self.nb_files | 474 out['nb_files'] = self.nb_files |
455 return out | 475 return out |
456 | 476 |
| 477 def _clone(self, new_root, new_path, tainted): |
| 478 """Clones itself keeping meta-data.""" |
| 479 return self.__class__( |
| 480 new_root, |
| 481 new_path.rstrip(os.path.sep), |
| 482 tainted, |
| 483 self.size, |
| 484 self.nb_files) |
| 485 |
457 class Process(object): | 486 class Process(object): |
458 """A process that was traced. | 487 """A process that was traced. |
459 | 488 |
460 Contains references to the files accessed by this process and its children. | 489 Contains references to the files accessed by this process and its children. |
461 """ | 490 """ |
462 def __init__( | 491 def __init__( |
463 self, pid, files, executable, command, initial_cwd, children): | 492 self, pid, files, executable, command, initial_cwd, children): |
464 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) | 493 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) |
465 self.pid = pid | 494 self.pid = pid |
466 self.files = sorted( | 495 self.files = sorted( |
(...skipping 2121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2588 | 2617 |
2589 root_prefix = len(root_dir) + 1 if root_dir else 0 | 2618 root_prefix = len(root_dir) + 1 if root_dir else 0 |
2590 for directory in sorted(buckets, reverse=True): | 2619 for directory in sorted(buckets, reverse=True): |
2591 actual = set(f for f in os.listdir(directory) if not blacklist(f)) | 2620 actual = set(f for f in os.listdir(directory) if not blacklist(f)) |
2592 expected = set(buckets[directory]) | 2621 expected = set(buckets[directory]) |
2593 if not (actual - expected): | 2622 if not (actual - expected): |
2594 parent = os.path.dirname(directory) | 2623 parent = os.path.dirname(directory) |
2595 buckets[parent][os.path.basename(directory)] = Results.Directory( | 2624 buckets[parent][os.path.basename(directory)] = Results.Directory( |
2596 root_dir, | 2625 root_dir, |
2597 directory[root_prefix:], | 2626 directory[root_prefix:], |
| 2627 False, |
2598 sum(f.size for f in buckets[directory].itervalues()), | 2628 sum(f.size for f in buckets[directory].itervalues()), |
2599 sum(f.nb_files for f in buckets[directory].itervalues())) | 2629 sum(f.nb_files for f in buckets[directory].itervalues())) |
2600 # Remove the whole bucket. | 2630 # Remove the whole bucket. |
2601 del buckets[directory] | 2631 del buckets[directory] |
2602 | 2632 |
2603 # Reverse the mapping with what remains. The original instances are returned, | 2633 # Reverse the mapping with what remains. The original instances are returned, |
2604 # so the cached meta data is kept. | 2634 # so the cached meta data is kept. |
2605 return sorted( | 2635 return sorted( |
2606 sum((x.values() for x in buckets.itervalues()), []), | 2636 sum((x.values() for x in buckets.itervalues()), []), |
2607 key=lambda x: x.path) | 2637 key=lambda x: x.path) |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2689 help='Root directory to base everything off it. Anything outside of this ' | 2719 help='Root directory to base everything off it. Anything outside of this ' |
2690 'this directory will not be reported') | 2720 'this directory will not be reported') |
2691 parser.add_option( | 2721 parser.add_option( |
2692 '-j', '--json', action='store_true', | 2722 '-j', '--json', action='store_true', |
2693 help='Outputs raw result data as json') | 2723 help='Outputs raw result data as json') |
2694 options, args = parser.parse_args(args) | 2724 options, args = parser.parse_args(args) |
2695 | 2725 |
2696 if options.root_dir: | 2726 if options.root_dir: |
2697 options.root_dir = os.path.abspath(options.root_dir) | 2727 options.root_dir = os.path.abspath(options.root_dir) |
2698 | 2728 |
| 2729 variables = dict(options.variables) |
2699 api = get_api() | 2730 api = get_api() |
2700 try: | 2731 try: |
2701 results = load_trace(options.log, options.root_dir, api) | 2732 results = load_trace(options.log, options.root_dir, api) |
2702 simplified = extract_directories(options.root_dir, results.files) | 2733 simplified = extract_directories(options.root_dir, results.files) |
| 2734 simplified = [f.replace_variables(variables) for f in simplified] |
| 2735 |
2703 if options.json: | 2736 if options.json: |
2704 write_json(sys.stdout, results.flatten(), False) | 2737 write_json(sys.stdout, results.flatten(), False) |
2705 else: | 2738 else: |
2706 print('Total: %d' % len(results.files)) | 2739 print('Total: %d' % len(results.files)) |
2707 print('Non existent: %d' % len(results.non_existent)) | 2740 print('Non existent: %d' % len(results.non_existent)) |
2708 for f in results.non_existent: | 2741 for f in results.non_existent: |
2709 print(' %s' % f.path) | 2742 print(' %s' % f.path) |
2710 print( | 2743 print( |
2711 'Interesting: %d reduced to %d' % ( | 2744 'Interesting: %d reduced to %d' % ( |
2712 len(results.existent), len(simplified))) | 2745 len(results.existent), len(simplified))) |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2793 for fn in dir(sys.modules[__name__]) | 2826 for fn in dir(sys.modules[__name__]) |
2794 if fn.startswith('CMD'))) | 2827 if fn.startswith('CMD'))) |
2795 | 2828 |
2796 command = get_command_handler(argv[0] if argv else None) | 2829 command = get_command_handler(argv[0] if argv else None) |
2797 parser = gen_parser(command) | 2830 parser = gen_parser(command) |
2798 return command(parser, argv[1:]) | 2831 return command(parser, argv[1:]) |
2799 | 2832 |
2800 | 2833 |
2801 if __name__ == '__main__': | 2834 if __name__ == '__main__': |
2802 sys.exit(main(sys.argv[1:])) | 2835 sys.exit(main(sys.argv[1:])) |
OLD | NEW |