Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding=utf-8 | 2 # coding=utf-8 |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 | 6 |
| 7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
| 8 by them. | 8 by them. |
| 9 | 9 |
| 10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
| (...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 270 assert isinstance(root, ApiBase.Context) | 270 assert isinstance(root, ApiBase.Context) |
| 271 assert isinstance(pid, int), repr(pid) | 271 assert isinstance(pid, int), repr(pid) |
| 272 self.root = weakref.ref(root) | 272 self.root = weakref.ref(root) |
| 273 self.pid = pid | 273 self.pid = pid |
| 274 # Children are pids. | 274 # Children are pids. |
| 275 self.children = [] | 275 self.children = [] |
| 276 self.parentid = parentid | 276 self.parentid = parentid |
| 277 self.initial_cwd = initial_cwd | 277 self.initial_cwd = initial_cwd |
| 278 self.cwd = None | 278 self.cwd = None |
| 279 self.files = set() | 279 self.files = set() |
| 280 self.executable = None | |
| 281 self.command = None | |
| 282 | |
| 283 if parentid: | |
| 284 self.root().processes[parentid].children.append(pid) | |
| 285 | |
| 286 def to_results_process(self): | |
| 287 """Resolves file case sensitivity and or late-bound strings.""" | |
| 288 children = [ | |
| 289 self.root().processes[c].to_results_process() for c in self.children | |
|
MAD
2012/05/30 20:29:36
We usually prefer not doing in an list constructio
M-A Ruel
2012/05/30 22:34:10
Why? It's much slower. I agree for this line in pa
MAD
2012/05/31 14:00:57
OK, then... I would find it more readable, but fi
| |
| 290 ] | |
| 291 # When resolving files, it's normal to get dupe because of a file could | |
|
MAD
2012/05/30 20:29:36
"because of a" -> "because a"
M-A Ruel
2012/05/30 22:34:10
done.
| |
| 292 # be opened multiple times with different case. Resolve the | |
| 293 # deduplication here. | |
| 294 def render_to_string_and_fix_case(x): | |
| 295 """Returns the native file path case if the file exists. | |
| 296 | |
| 297 Converts late-bound strings. | |
| 298 """ | |
| 299 if not x: | |
| 300 return x | |
| 301 # TODO(maruel): Do not upconvert to unicode here, on linux we don't | |
| 302 # know the file path encoding so they must be treated as bytes. | |
| 303 x = unicode(x) | |
| 304 if not os.path.exists(x): | |
| 305 return x | |
| 306 return get_native_path_case(x) | |
| 307 | |
| 308 return Results.Process( | |
| 309 self.pid, | |
| 310 set(map(render_to_string_and_fix_case, self.files)), | |
| 311 render_to_string_and_fix_case(self.executable), | |
| 312 self.command, | |
| 313 render_to_string_and_fix_case(self.initial_cwd), | |
| 314 children) | |
| 280 | 315 |
| 281 def add_file(self, filepath): | 316 def add_file(self, filepath): |
| 282 if self.root().blacklist(unicode(filepath)): | 317 if self.root().blacklist(unicode(filepath)): |
| 283 return | 318 return |
| 284 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) | 319 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) |
| 285 self.files.add(filepath) | 320 self.files.add(filepath) |
| 286 | 321 |
| 287 def __init__(self, blacklist): | 322 def __init__(self, blacklist): |
| 288 self.blacklist = blacklist | 323 self.blacklist = blacklist |
| 289 self.processes = {} | 324 self.processes = {} |
| 290 | 325 |
| 291 def resolve(self): | |
| 292 """Resolve all the filenames and returns them.""" | |
| 293 files = set() | |
| 294 non_existent = set() | |
| 295 for p in self.processes.itervalues(): | |
| 296 for filepath in p.files: | |
| 297 filepath = unicode(filepath) | |
| 298 # For late-bound file paths, it could be blacklisted after all the | |
| 299 # processes are processed so it needs to be checked again. | |
| 300 if self.blacklist(filepath): | |
| 301 break | |
| 302 if os.path.isfile(filepath): | |
| 303 files.add(filepath) | |
| 304 else: | |
| 305 non_existent.add(filepath) | |
| 306 return files, non_existent | |
| 307 | |
| 308 @staticmethod | 326 @staticmethod |
| 309 def clean_trace(logname): | 327 def clean_trace(logname): |
| 310 """Deletes the old log.""" | 328 """Deletes the old log.""" |
| 311 raise NotImplementedError() | 329 raise NotImplementedError() |
| 312 | 330 |
| 313 @classmethod | 331 @classmethod |
| 314 def gen_trace(cls, cmd, cwd, logname, output): | 332 def gen_trace(cls, cmd, cwd, logname, output): |
| 315 """Runs the OS-specific trace program on an executable. | 333 """Runs the OS-specific trace program on an executable. |
| 316 | 334 |
| 317 Since the logs are per pid, we need to log the list of the initial pid. | 335 Since the logs are per pid, we need to log the list of the initial pid. |
| 318 """ | 336 """ |
| 319 raise NotImplementedError(cls.__class__.__name__) | 337 raise NotImplementedError(cls.__class__.__name__) |
| 320 | 338 |
| 321 @classmethod | 339 @classmethod |
| 322 def parse_log(cls, filename, blacklist): | 340 def parse_log(cls, filename, blacklist): |
| 323 """Processes a trace log and returns the files opened and the files that do | 341 """Processes a trace log and returns the files opened and the files that do |
| 324 not exist. | 342 not exist. |
| 325 | 343 |
| 326 It does not track directories. | 344 It does not track directories. |
| 327 | 345 |
| 328 Most of the time, files that do not exist are temporary test files that | 346 Most of the time, files that do not exist are temporary test files that |
| 329 should be put in /tmp instead. See http://crbug.com/116251. | 347 should be put in /tmp instead. See http://crbug.com/116251. |
| 330 | 348 |
| 331 Returns a tuple (existing files, non existing files, nb_processes_created) | 349 Returns a tuple (existing files, non existing files, nb_processes_created) |
| 332 """ | 350 """ |
| 333 raise NotImplementedError(cls.__class__.__name__) | 351 raise NotImplementedError(cls.__class__.__name__) |
| 334 | 352 |
| 335 | 353 |
| 354 class Results(object): | |
| 355 """Results of a trace session.""" | |
| 356 | |
| 357 class File(object): | |
| 358 """A file that was accessed.""" | |
| 359 def __init__(self, root, path): | |
| 360 """Represents a file accessed. May not be present anymore.""" | |
| 361 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) | |
| 362 self.root = root | |
| 363 self.path = path | |
| 364 | |
| 365 self._size = None | |
| 366 # For compatibility with Directory object interface. | |
| 367 # Shouldn't be used normally, only exists to simplify algorithms. | |
| 368 self.nb_files = 1 | |
| 369 | |
| 370 assert path, path | |
| 371 assert bool(root) != bool(isabs(path)), (root, path) | |
| 372 assert ( | |
| 373 not os.path.exists(self.full_path) or | |
| 374 self.full_path == get_native_path_case(self.full_path)) | |
| 375 | |
| 376 @property | |
| 377 def existent(self): | |
| 378 return self.size != -1 | |
| 379 | |
| 380 @property | |
| 381 def size(self): | |
| 382 """File's size. -1 is not existent.""" | |
| 383 if self._size is None: | |
| 384 try: | |
| 385 self._size = os.stat(self.full_path).st_size | |
| 386 except OSError: | |
| 387 self._size = -1 | |
| 388 return self._size | |
| 389 | |
| 390 @property | |
| 391 def full_path(self): | |
| 392 if self.root: | |
| 393 return os.path.join(self.root, self.path) | |
| 394 return self.path | |
| 395 | |
| 396 def flatten(self): | |
| 397 return { | |
| 398 'path': self.path, | |
| 399 'size': self.size, | |
| 400 } | |
| 401 | |
| 402 def strip_root(self, root): | |
| 403 """Returns a clone of itself with 'root' stripped off.""" | |
| 404 assert isabs(root) and root.endswith(os.path.sep), root | |
| 405 if not self.full_path.startswith(root): | |
| 406 return None | |
| 407 out = self.__class__(root, self.full_path[len(root):]) | |
| 408 # Keep size cache. | |
| 409 out._size = self._size | |
| 410 return out | |
| 411 | |
| 412 class Directory(File): | |
| 413 """A directory of files. Must exist.""" | |
| 414 def __init__(self, root, path, size, nb_files): | |
| 415 """path='.' is a valid value and must be handled appropriately.""" | |
| 416 super(Results.Directory, self).__init__(root, path) | |
| 417 self.path = self.path + os.path.sep | |
|
MAD
2012/05/30 20:29:36
maybe add an assert that path doesn't already ends
M-A Ruel
2012/05/30 22:34:10
done
| |
| 418 self.nb_files = nb_files | |
| 419 self._size = size | |
| 420 | |
| 421 def flatten(self): | |
| 422 out = super(Results.Directory, self).flatten() | |
| 423 out['nb_files'] = self.nb_files | |
| 424 return out | |
| 425 | |
| 426 class Process(object): | |
| 427 """A process that was traced. | |
| 428 | |
| 429 Contains references to the files accessed by this process and its children. | |
| 430 """ | |
| 431 def __init__( | |
| 432 self, pid, files, executable, command, initial_cwd, children): | |
| 433 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) | |
| 434 self.pid = pid | |
| 435 self.files = sorted( | |
| 436 (Results.File(None, f) for f in files), key=lambda x: x.path) | |
| 437 assert len(set(f.path for f in self.files)) == len(self.files), [ | |
| 438 f.path for f in self.files] | |
| 439 assert isinstance(children, list) | |
| 440 assert isinstance(self.files, list) | |
| 441 self.children = children | |
| 442 self.executable = executable | |
| 443 self.command = command | |
| 444 self.initial_cwd = initial_cwd | |
| 445 | |
| 446 @property | |
| 447 def all(self): | |
| 448 for child in self.children: | |
| 449 for i in child.all: | |
| 450 yield i | |
| 451 yield self | |
| 452 | |
| 453 def flatten(self): | |
| 454 return { | |
| 455 'children': [c.flatten() for c in self.children], | |
| 456 'command': self.command, | |
| 457 'executable': self.executable, | |
| 458 'files': [f.flatten() for f in self.files], | |
| 459 'initial_cwd': self.initial_cwd, | |
| 460 'pid': self.pid, | |
| 461 } | |
| 462 | |
| 463 def strip_root(self, root): | |
| 464 assert isabs(root) and root.endswith(os.path.sep), root | |
| 465 out = self.__class__( | |
| 466 self.pid, | |
| 467 [], | |
| 468 self.executable, | |
| 469 self.command, | |
| 470 self.initial_cwd, | |
| 471 [c.strip_root(root) for c in self.children]) | |
| 472 # Override the files property. | |
| 473 out.files = filter(None, (f.strip_root(root) for f in self.files)) | |
| 474 logging.debug( | |
| 475 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) | |
| 476 return out | |
| 477 | |
| 478 | |
| 479 def __init__(self, process): | |
| 480 self.process = process | |
| 481 # Cache. | |
| 482 self._files = None | |
| 483 | |
| 484 def flatten(self): | |
| 485 return { | |
| 486 'root': self.process.flatten(), | |
| 487 } | |
| 488 | |
| 489 @property | |
| 490 def files(self): | |
| 491 if self._files is None: | |
| 492 self._files = sorted( | |
| 493 sum((p.files for p in self.process.all), []), | |
| 494 key=lambda x: x.path) | |
| 495 return self._files | |
| 496 | |
| 497 @property | |
| 498 def existent(self): | |
| 499 return [f for f in self.files if f.existent] | |
| 500 | |
| 501 @property | |
| 502 def non_existent(self): | |
| 503 return [f for f in self.files if not f.existent] | |
| 504 | |
| 505 def strip_root(self, root): | |
| 506 """Returns a clone with all the files outside the directory |root| removed | |
| 507 and converts all the path to be relative paths. | |
| 508 """ | |
| 509 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep | |
| 510 logging.debug('strip_root(%s)' % root) | |
| 511 return Results(self.process.strip_root(root)) | |
| 512 | |
| 513 | |
| 514 def extract_directories(files): | |
| 515 """Detects if all the files in a directory are in |files| and if so, replace | |
| 516 the individual files by a Results.Directory instance. | |
| 517 | |
| 518 Takes an array of Results.File instances and returns an array of | |
| 519 Results.File and Results.Directory instances. | |
| 520 """ | |
| 521 assert not any(isinstance(f, Results.Directory) for f in files) | |
| 522 # Remove non existent files. | |
| 523 files = [f for f in files if f.existent] | |
| 524 if not files: | |
| 525 return files | |
| 526 # All files must share the same root, which can be None. | |
| 527 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) | |
| 528 | |
| 529 def blacklist(f): | |
| 530 return f in ('.git', '.svn') or f.endswith('.pyc') | |
| 531 | |
| 532 # Creates a {directory: {filename: File}} mapping, up to root. | |
| 533 root = files[0].root | |
| 534 buckets = {} | |
| 535 if root: | |
| 536 buckets[root.rstrip(os.path.sep)] = {} | |
| 537 for f in files: | |
|
MAD
2012/05/30 20:29:36
I prefer for file in files:
for a one line list c
M-A Ruel
2012/05/30 22:34:10
'file' is a poor choice because it is a builtin, s
| |
| 538 path = f.full_path | |
| 539 directory = os.path.dirname(path) | |
| 540 x = buckets.setdefault(directory, {}) | |
|
MAD
2012/05/30 20:29:36
what's 'x'?
M-A Ruel
2012/05/30 22:34:10
I didn't recall, fixed.
| |
| 541 x[path[len(directory)+1:]] = f | |
|
MAD
2012/05/30 20:29:36
really readable... :-P
M-A Ruel
2012/05/30 22:34:10
Rewrote.
| |
| 542 # Add all the directories recursively up to root. | |
| 543 while True: | |
| 544 old_d = directory | |
| 545 directory = os.path.dirname(directory) | |
| 546 if directory + os.path.sep == root or directory == old_d: | |
|
MAD
2012/05/30 20:29:36
You assume root ends with a path.sep but you don't
M-A Ruel
2012/05/30 22:34:10
Agreed, added assert.
| |
| 547 break | |
| 548 buckets.setdefault(directory, {}) | |
| 549 | |
| 550 for directory in sorted(buckets, reverse=True): | |
| 551 actual = set(f for f in os.listdir(directory) if not blacklist(f)) | |
| 552 expected = set(buckets[directory]) | |
| 553 if not (actual - expected): | |
| 554 parent = os.path.dirname(directory) | |
| 555 buckets[parent][os.path.basename(directory)] = Results.Directory( | |
| 556 root, | |
| 557 directory[len(root):], | |
| 558 sum(f.size for f in buckets[directory].itervalues()), | |
| 559 sum(f.nb_files for f in buckets[directory].itervalues())) | |
| 560 # Remove the whole bucket. | |
| 561 del buckets[directory] | |
| 562 | |
| 563 # Reverse the mapping with what remains. The original instances are returned, | |
| 564 # so the cached meta data is kept. | |
| 565 return sorted( | |
| 566 sum((x.values() for x in buckets.itervalues()), []), | |
| 567 key=lambda x: x.path) | |
| 568 | |
| 569 | |
| 336 class Strace(ApiBase): | 570 class Strace(ApiBase): |
| 337 """strace implies linux.""" | 571 """strace implies linux.""" |
| 338 IGNORED = ( | 572 IGNORED = ( |
| 339 '/bin', | 573 '/bin', |
| 340 '/dev', | 574 '/dev', |
| 341 '/etc', | 575 '/etc', |
| 342 '/lib', | 576 '/lib', |
| 343 '/proc', | 577 '/proc', |
| 344 '/sys', | 578 '/sys', |
| 345 '/tmp', | 579 '/tmp', |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 556 def render(self): | 790 def render(self): |
| 557 """Returns the string value of the initial cwd of the root process. | 791 """Returns the string value of the initial cwd of the root process. |
| 558 | 792 |
| 559 Used by RelativePath. | 793 Used by RelativePath. |
| 560 """ | 794 """ |
| 561 return self.initial_cwd | 795 return self.initial_cwd |
| 562 | 796 |
| 563 def on_line(self, pid, line): | 797 def on_line(self, pid, line): |
| 564 self.get_or_set_proc(pid).on_line(line.strip()) | 798 self.get_or_set_proc(pid).on_line(line.strip()) |
| 565 | 799 |
| 800 def to_results(self): | |
| 801 """Finds back the root process and verify consistency.""" | |
| 802 # TODO(maruel): Absolutely unecessary, fix me. | |
| 803 root = [p for p in self.processes.itervalues() if not p.parentid] | |
| 804 assert len(root) == 1 | |
| 805 process = root[0].to_results_process() | |
| 806 assert sorted(self.processes) == sorted(p.pid for p in process.all) | |
| 807 return Results(process) | |
| 808 | |
| 566 def get_or_set_proc(self, pid): | 809 def get_or_set_proc(self, pid): |
| 567 """Returns the Context.Process instance for this pid or creates a new one. | 810 """Returns the Context.Process instance for this pid or creates a new one. |
| 568 """ | 811 """ |
| 569 assert isinstance(pid, int) and pid | 812 assert isinstance(pid, int) and pid |
| 570 return self.processes.setdefault(pid, self.Process(self, pid)) | 813 return self.processes.setdefault(pid, self.Process(self, pid)) |
| 571 | 814 |
| 572 @classmethod | 815 @classmethod |
| 573 def traces(cls): | 816 def traces(cls): |
| 574 prefix = 'handle_' | 817 prefix = 'handle_' |
| 575 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] | 818 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 628 with open(filename, 'r') as f: | 871 with open(filename, 'r') as f: |
| 629 data = json.load(f) | 872 data = json.load(f) |
| 630 context = cls.Context(blacklist, data['cwd']) | 873 context = cls.Context(blacklist, data['cwd']) |
| 631 for pidfile in glob.iglob(filename + '.*'): | 874 for pidfile in glob.iglob(filename + '.*'): |
| 632 pid = pidfile.rsplit('.', 1)[1] | 875 pid = pidfile.rsplit('.', 1)[1] |
| 633 if pid.isdigit(): | 876 if pid.isdigit(): |
| 634 pid = int(pid) | 877 pid = int(pid) |
| 635 # TODO(maruel): Load as utf-8 | 878 # TODO(maruel): Load as utf-8 |
| 636 for line in open(pidfile, 'rb'): | 879 for line in open(pidfile, 'rb'): |
| 637 context.on_line(pid, line) | 880 context.on_line(pid, line) |
| 638 files, non_existent = context.resolve() | 881 |
| 639 # Resolve any symlink we hit. | 882 return context.to_results() |
| 640 return ( | |
| 641 set(os.path.realpath(f) for f in files), | |
| 642 set(os.path.realpath(f) for f in non_existent), | |
| 643 len(context.processes)) | |
| 644 | 883 |
| 645 | 884 |
| 646 class Dtrace(ApiBase): | 885 class Dtrace(ApiBase): |
| 647 """Uses DTrace framework through dtrace. Requires root access. | 886 """Uses DTrace framework through dtrace. Requires root access. |
| 648 | 887 |
| 649 Implies Mac OSX. | 888 Implies Mac OSX. |
| 650 | 889 |
| 651 dtruss can't be used because it has compatibility issues with python. | 890 dtruss can't be used because it has compatibility issues with python. |
| 652 | 891 |
| 653 Also, the pid->cwd handling needs to be done manually since OSX has no way to | 892 Also, the pid->cwd handling needs to be done manually since OSX has no way to |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 820 self, | 1059 self, |
| 821 'handle_%s' % match.group(3).replace('-', '_'), | 1060 'handle_%s' % match.group(3).replace('-', '_'), |
| 822 self._handle_ignored) | 1061 self._handle_ignored) |
| 823 return fn( | 1062 return fn( |
| 824 int(match.group(1)), | 1063 int(match.group(1)), |
| 825 int(match.group(2)), | 1064 int(match.group(2)), |
| 826 match.group(3), | 1065 match.group(3), |
| 827 match.group(4), | 1066 match.group(4), |
| 828 match.group(5)) | 1067 match.group(5)) |
| 829 | 1068 |
| 1069 def to_results(self): | |
| 1070 """Uses self._initial_pid to determine the initial process.""" | |
| 1071 process = self.processes[self._initial_pid].to_results_process() | |
| 1072 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( | |
| 1073 sorted(self.processes), sorted(p.pid for p in process.all)) | |
| 1074 return Results(process) | |
| 1075 | |
| 830 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): | 1076 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
| 831 assert not self._tracer_pid and not self._initial_pid | 1077 assert not self._tracer_pid and not self._initial_pid |
| 832 self._tracer_pid = pid | 1078 self._tracer_pid = pid |
| 833 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) | 1079 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
| 834 | 1080 |
| 835 def handle_proc_start(self, ppid, pid, _function, _args, result): | 1081 def handle_proc_start(self, ppid, pid, _function, _args, result): |
| 836 """Transfers cwd. | 1082 """Transfers cwd. |
| 837 | 1083 |
| 838 The dtrace script already takes care of only tracing the processes that | 1084 The dtrace script already takes care of only tracing the processes that |
| 839 are child of the traced processes so there is no need to verify the | 1085 are child of the traced processes so there is no need to verify the |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 993 raise | 1239 raise |
| 994 | 1240 |
| 995 return dtrace.returncode or child.returncode, out | 1241 return dtrace.returncode or child.returncode, out |
| 996 | 1242 |
| 997 @classmethod | 1243 @classmethod |
| 998 def parse_log(cls, filename, blacklist): | 1244 def parse_log(cls, filename, blacklist): |
| 999 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1245 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
| 1000 context = cls.Context(blacklist) | 1246 context = cls.Context(blacklist) |
| 1001 for line in open(filename, 'rb'): | 1247 for line in open(filename, 'rb'): |
| 1002 context.on_line(line) | 1248 context.on_line(line) |
| 1003 files, non_existent = context.resolve() | 1249 return context.to_results() |
| 1004 # Resolve any symlink we hit. | |
| 1005 return ( | |
| 1006 set(os.path.realpath(f) for f in files), | |
| 1007 set(os.path.realpath(f) for f in non_existent), | |
| 1008 len(context.processes)) | |
| 1009 | 1250 |
| 1010 @staticmethod | 1251 @staticmethod |
| 1011 def _sort_log(logname): | 1252 def _sort_log(logname): |
| 1012 """Sorts the log back in order when each call occured. | 1253 """Sorts the log back in order when each call occured. |
| 1013 | 1254 |
| 1014 dtrace doesn't save the buffer in strict order since it keeps one buffer per | 1255 dtrace doesn't save the buffer in strict order since it keeps one buffer per |
| 1015 CPU. | 1256 CPU. |
| 1016 """ | 1257 """ |
| 1017 with open(logname, 'rb') as logfile: | 1258 with open(logname, 'rb') as logfile: |
| 1018 lines = [f for f in logfile.readlines() if f.strip()] | 1259 lines = [f for f in logfile.readlines() if f.strip()] |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1124 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), | 1365 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), |
| 1125 None) | 1366 None) |
| 1126 if not handler: | 1367 if not handler: |
| 1127 # Try to get an universal fallback | 1368 # Try to get an universal fallback |
| 1128 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) | 1369 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) |
| 1129 if handler: | 1370 if handler: |
| 1130 handler(line) | 1371 handler(line) |
| 1131 else: | 1372 else: |
| 1132 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) | 1373 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) |
| 1133 | 1374 |
| 1375 def to_results(self): | |
| 1376 """Uses self._initial_pid to determine the initial process.""" | |
| 1377 process = self.processes[self._initial_pid].to_results_process() | |
| 1378 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( | |
| 1379 sorted(self.processes), sorted(p.pid for p in process.all)) | |
| 1380 return Results(process) | |
| 1381 | |
| 1134 def _thread_to_process(self, tid): | 1382 def _thread_to_process(self, tid): |
| 1135 """Finds the process from the thread id.""" | 1383 """Finds the process from the thread id.""" |
| 1136 tid = int(tid, 16) | 1384 tid = int(tid, 16) |
| 1137 return self.processes.get(self._threads_active.get(tid)) | 1385 return self.processes.get(self._threads_active.get(tid)) |
| 1138 | 1386 |
| 1139 @staticmethod | 1387 @staticmethod |
| 1140 def handle_EventTrace_Header(line): | 1388 def handle_EventTrace_Header(line): |
| 1141 """Verifies no event was dropped, e.g. no buffer overrun occured.""" | 1389 """Verifies no event was dropped, e.g. no buffer overrun occured.""" |
| 1142 #BUFFER_SIZE = 19 | 1390 #BUFFER_SIZE = 19 |
| 1143 #VERSION = 20 | 1391 #VERSION = 20 |
| (...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1546 | 1794 |
| 1547 # The fastest and smallest format but only supports 'ANSI' file paths. | 1795 # The fastest and smallest format but only supports 'ANSI' file paths. |
| 1548 # E.g. the filenames are encoding in the 'current' encoding. | 1796 # E.g. the filenames are encoding in the 'current' encoding. |
| 1549 for line in ansi_csv_reader(open(filename)): | 1797 for line in ansi_csv_reader(open(filename)): |
| 1550 # line is a list of unicode objects. | 1798 # line is a list of unicode objects. |
| 1551 context.on_csv_line(line) | 1799 context.on_csv_line(line) |
| 1552 | 1800 |
| 1553 else: | 1801 else: |
| 1554 raise NotImplementedError('Implement %s' % logformat) | 1802 raise NotImplementedError('Implement %s' % logformat) |
| 1555 | 1803 |
| 1556 files, non_existent = context.resolve() | 1804 return context.to_results() |
| 1557 # Resolve any symlink we hit. | |
| 1558 return ( | |
| 1559 set(os.path.realpath(f) for f in files), | |
| 1560 set(os.path.realpath(f) for f in non_existent), | |
| 1561 len(context.processes)) | |
| 1562 | |
| 1563 | |
| 1564 def relevant_files(files, root): | |
| 1565 """Trims the list of files to keep the expected files and unexpected files. | |
| 1566 | |
| 1567 Unexpected files are files that are not based inside the |root| directory. | |
| 1568 """ | |
| 1569 expected = [] | |
| 1570 unexpected = [] | |
| 1571 for f in files: | |
| 1572 if f.startswith(root): | |
| 1573 f = f[len(root):] | |
| 1574 assert f | |
| 1575 expected.append(f) | |
| 1576 else: | |
| 1577 unexpected.append(f) | |
| 1578 return sorted(set(expected)), sorted(set(unexpected)) | |
| 1579 | |
| 1580 | |
| 1581 def extract_directories(files, root): | |
| 1582 """Detects if all the files in a directory were loaded and if so, replace the | |
| 1583 individual files by the directory entry. | |
| 1584 """ | |
| 1585 directories = set(os.path.dirname(f) for f in files) | |
| 1586 files = set(files) | |
| 1587 for directory in sorted(directories, reverse=True): | |
| 1588 actual = set( | |
| 1589 os.path.join(directory, f) for f in | |
| 1590 os.listdir(os.path.join(root, directory)) | |
| 1591 if not f.endswith(('.svn', '.pyc')) | |
| 1592 ) | |
| 1593 if not (actual - files): | |
| 1594 files -= actual | |
| 1595 files.add(directory + os.path.sep) | |
| 1596 return sorted(files) | |
| 1597 | 1805 |
| 1598 | 1806 |
| 1599 def pretty_print(variables, stdout): | 1807 def pretty_print(variables, stdout): |
| 1600 """Outputs a gyp compatible list from the decoded variables. | 1808 """Outputs a gyp compatible list from the decoded variables. |
| 1601 | 1809 |
| 1602 Similar to pprint.print() but with NIH syndrome. | 1810 Similar to pprint.print() but with NIH syndrome. |
| 1603 """ | 1811 """ |
| 1604 # Order the dictionary keys by these keys in priority. | 1812 # Order the dictionary keys by these keys in priority. |
| 1605 ORDER = ( | 1813 ORDER = ( |
| 1606 'variables', 'condition', 'command', 'relative_cwd', 'read_only', | 1814 'variables', 'condition', 'command', 'relative_cwd', 'read_only', |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1742 | 1950 |
| 1743 def load_trace(logfile, root_dir, api): | 1951 def load_trace(logfile, root_dir, api): |
| 1744 """Loads a trace file and returns the processed file lists. | 1952 """Loads a trace file and returns the processed file lists. |
| 1745 | 1953 |
| 1746 Arguments: | 1954 Arguments: |
| 1747 - logfile: file to load. | 1955 - logfile: file to load. |
| 1748 - root_dir: root directory to use to determine if a file is relevant to the | 1956 - root_dir: root directory to use to determine if a file is relevant to the |
| 1749 trace or not. | 1957 trace or not. |
| 1750 - api: a tracing api instance. | 1958 - api: a tracing api instance. |
| 1751 """ | 1959 """ |
| 1752 root_dir = get_native_path_case(root_dir) | 1960 results = api.parse_log(logfile, get_blacklist(api)) |
| 1753 files, non_existent, processes = api.parse_log(logfile, get_blacklist(api)) | 1961 results = results.strip_root(root_dir) |
| 1754 expected, unexpected = relevant_files( | 1962 simplified = extract_directories(results.files) |
| 1755 files, root_dir.rstrip(os.path.sep) + os.path.sep) | 1963 return results, simplified |
| 1756 # In case the file system is case insensitive. | |
| 1757 expected = sorted(set( | |
| 1758 get_native_path_case(os.path.join(root_dir, f))[len(root_dir)+1:] | |
| 1759 for f in expected)) | |
| 1760 simplified = extract_directories(expected, root_dir) | |
| 1761 return files, expected, unexpected, non_existent, simplified, processes | |
| 1762 | 1964 |
| 1763 | 1965 |
| 1764 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): | 1966 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): |
| 1765 """Tries to load the logs if available. If not, trace the test. | 1967 """Tries to load the logs if available. If not, trace the test. |
| 1766 | 1968 |
| 1767 Symlinks are not processed at all. | 1969 Symlinks are not processed at all. |
| 1768 | 1970 |
| 1769 Arguments: | 1971 Arguments: |
| 1770 - logfile: Absolute path to the OS-specific trace. | 1972 - logfile: Absolute path to the OS-specific trace. |
| 1771 - cmd: Command list to run. | 1973 - cmd: Command list to run. |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 1798 if not os.path.isfile(logfile) or force_trace: | 2000 if not os.path.isfile(logfile) or force_trace: |
| 1799 print_if('Tracing... %s' % cmd) | 2001 print_if('Tracing... %s' % cmd) |
| 1800 # Use the proper relative directory. | 2002 # Use the proper relative directory. |
| 1801 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) | 2003 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) |
| 1802 silent = not isEnabledFor(logging.WARNING) | 2004 silent = not isEnabledFor(logging.WARNING) |
| 1803 returncode, _ = trace(logfile, cmd, cwd, api, silent) | 2005 returncode, _ = trace(logfile, cmd, cwd, api, silent) |
| 1804 if returncode and not force_trace: | 2006 if returncode and not force_trace: |
| 1805 return returncode | 2007 return returncode |
| 1806 | 2008 |
| 1807 print_if('Loading traces... %s' % logfile) | 2009 print_if('Loading traces... %s' % logfile) |
| 1808 files, expected, unexpected, non_existent, simplified, _ = load_trace( | 2010 results, simplified = load_trace(logfile, root_dir, api) |
| 1809 logfile, root_dir, api) | |
| 1810 | 2011 |
| 1811 print_if('Total: %d' % len(files)) | 2012 print_if('Total: %d' % len(results.files)) |
| 1812 print_if('Non existent: %d' % len(non_existent)) | 2013 print_if('Non existent: %d' % len(results.non_existent)) |
| 1813 for f in non_existent: | 2014 for f in results.non_existent: |
| 1814 print_if(' %s' % f) | 2015 print_if(' %s' % f.path) |
| 1815 if unexpected: | 2016 print_if( |
| 1816 print_if('Unexpected: %d' % len(unexpected)) | 2017 'Interesting: %d reduced to %d' % ( |
| 1817 for f in unexpected: | 2018 len(results.existent), len(simplified))) |
| 1818 print_if(' %s' % f) | |
| 1819 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified))) | |
| 1820 for f in simplified: | 2019 for f in simplified: |
| 1821 print_if(' %s' % f) | 2020 print_if(' %s' % f.path) |
| 1822 | 2021 |
| 1823 if cwd_dir is not None: | 2022 if cwd_dir is not None: |
| 1824 value = { | 2023 value = { |
| 1825 'conditions': [ | 2024 'conditions': [ |
| 1826 ['OS=="%s"' % get_flavor(), { | 2025 ['OS=="%s"' % get_flavor(), { |
| 1827 'variables': generate_dict(simplified, cwd_dir, product_dir), | 2026 'variables': generate_dict( |
| 2027 [f.path for f in simplified], cwd_dir, product_dir), | |
| 1828 }], | 2028 }], |
| 1829 ], | 2029 ], |
| 1830 } | 2030 } |
| 1831 pretty_print(value, sys.stdout) | 2031 pretty_print(value, sys.stdout) |
| 1832 return 0 | 2032 return 0 |
| 1833 | 2033 |
| 1834 | 2034 |
| 1835 def main(): | 2035 def main(): |
| 1836 parser = optparse.OptionParser( | 2036 parser = optparse.OptionParser( |
| 1837 usage='%prog <options> [cmd line...]') | 2037 usage='%prog <options> [cmd line...]') |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1878 os.path.abspath(options.log), | 2078 os.path.abspath(options.log), |
| 1879 args, | 2079 args, |
| 1880 options.root_dir, | 2080 options.root_dir, |
| 1881 options.cwd, | 2081 options.cwd, |
| 1882 options.product_dir, | 2082 options.product_dir, |
| 1883 options.force) | 2083 options.force) |
| 1884 | 2084 |
| 1885 | 2085 |
| 1886 if __name__ == '__main__': | 2086 if __name__ == '__main__': |
| 1887 sys.exit(main()) | 2087 sys.exit(main()) |
| OLD | NEW |