OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
270 assert isinstance(root, ApiBase.Context) | 270 assert isinstance(root, ApiBase.Context) |
271 assert isinstance(pid, int), repr(pid) | 271 assert isinstance(pid, int), repr(pid) |
272 self.root = weakref.ref(root) | 272 self.root = weakref.ref(root) |
273 self.pid = pid | 273 self.pid = pid |
274 # Children are pids. | 274 # Children are pids. |
275 self.children = [] | 275 self.children = [] |
276 self.parentid = parentid | 276 self.parentid = parentid |
277 self.initial_cwd = initial_cwd | 277 self.initial_cwd = initial_cwd |
278 self.cwd = None | 278 self.cwd = None |
279 self.files = set() | 279 self.files = set() |
| 280 self.executable = None |
| 281 self.command = None |
| 282 |
| 283 if parentid: |
| 284 self.root().processes[parentid].children.append(pid) |
| 285 |
| 286 def to_results_process(self): |
| 287 """Resolves file case sensitivity and or late-bound strings.""" |
| 288 children = [ |
| 289 self.root().processes[c].to_results_process() for c in self.children |
| 290 ] |
| 291 # When resolving files, it's normal to get dupe because a file could be |
| 292 # opened multiple times with different case. Resolve the deduplication |
| 293 # here. |
| 294 def render_to_string_and_fix_case(x): |
| 295 """Returns the native file path case if the file exists. |
| 296 |
| 297 Converts late-bound strings. |
| 298 """ |
| 299 if not x: |
| 300 return x |
| 301 # TODO(maruel): Do not upconvert to unicode here, on linux we don't |
| 302 # know the file path encoding so they must be treated as bytes. |
| 303 x = unicode(x) |
| 304 if not os.path.exists(x): |
| 305 return x |
| 306 return get_native_path_case(x) |
| 307 |
| 308 return Results.Process( |
| 309 self.pid, |
| 310 set(map(render_to_string_and_fix_case, self.files)), |
| 311 render_to_string_and_fix_case(self.executable), |
| 312 self.command, |
| 313 render_to_string_and_fix_case(self.initial_cwd), |
| 314 children) |
280 | 315 |
281 def add_file(self, filepath): | 316 def add_file(self, filepath): |
282 if self.root().blacklist(unicode(filepath)): | 317 if self.root().blacklist(unicode(filepath)): |
283 return | 318 return |
284 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) | 319 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) |
285 self.files.add(filepath) | 320 self.files.add(filepath) |
286 | 321 |
287 def __init__(self, blacklist): | 322 def __init__(self, blacklist): |
288 self.blacklist = blacklist | 323 self.blacklist = blacklist |
289 self.processes = {} | 324 self.processes = {} |
290 | 325 |
291 def resolve(self): | |
292 """Resolve all the filenames and returns them.""" | |
293 files = set() | |
294 non_existent = set() | |
295 for p in self.processes.itervalues(): | |
296 for filepath in p.files: | |
297 filepath = unicode(filepath) | |
298 # For late-bound file paths, it could be blacklisted after all the | |
299 # processes are processed so it needs to be checked again. | |
300 if self.blacklist(filepath): | |
301 break | |
302 if os.path.isfile(filepath): | |
303 files.add(filepath) | |
304 else: | |
305 non_existent.add(filepath) | |
306 return files, non_existent | |
307 | |
308 @staticmethod | 326 @staticmethod |
309 def clean_trace(logname): | 327 def clean_trace(logname): |
310 """Deletes the old log.""" | 328 """Deletes the old log.""" |
311 raise NotImplementedError() | 329 raise NotImplementedError() |
312 | 330 |
313 @classmethod | 331 @classmethod |
314 def gen_trace(cls, cmd, cwd, logname, output): | 332 def gen_trace(cls, cmd, cwd, logname, output): |
315 """Runs the OS-specific trace program on an executable. | 333 """Runs the OS-specific trace program on an executable. |
316 | 334 |
317 Since the logs are per pid, we need to log the list of the initial pid. | 335 Since the logs are per pid, we need to log the list of the initial pid. |
318 """ | 336 """ |
319 raise NotImplementedError(cls.__class__.__name__) | 337 raise NotImplementedError(cls.__class__.__name__) |
320 | 338 |
321 @classmethod | 339 @classmethod |
322 def parse_log(cls, filename, blacklist): | 340 def parse_log(cls, filename, blacklist): |
323 """Processes a trace log and returns the files opened and the files that do | 341 """Processes a trace log and returns the files opened and the files that do |
324 not exist. | 342 not exist. |
325 | 343 |
326 It does not track directories. | 344 It does not track directories. |
327 | 345 |
328 Most of the time, files that do not exist are temporary test files that | 346 Most of the time, files that do not exist are temporary test files that |
329 should be put in /tmp instead. See http://crbug.com/116251. | 347 should be put in /tmp instead. See http://crbug.com/116251. |
330 | 348 |
331 Returns a tuple (existing files, non existing files, nb_processes_created) | 349 Returns a tuple (existing files, non existing files, nb_processes_created) |
332 """ | 350 """ |
333 raise NotImplementedError(cls.__class__.__name__) | 351 raise NotImplementedError(cls.__class__.__name__) |
334 | 352 |
335 | 353 |
| 354 class Results(object): |
| 355 """Results of a trace session.""" |
| 356 |
| 357 class File(object): |
| 358 """A file that was accessed.""" |
| 359 def __init__(self, root, path): |
| 360 """Represents a file accessed. May not be present anymore.""" |
| 361 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) |
| 362 self.root = root |
| 363 self.path = path |
| 364 |
| 365 self._size = None |
| 366 # For compatibility with Directory object interface. |
| 367 # Shouldn't be used normally, only exists to simplify algorithms. |
| 368 self.nb_files = 1 |
| 369 |
| 370 assert path, path |
| 371 assert bool(root) != bool(isabs(path)), (root, path) |
| 372 assert ( |
| 373 not os.path.exists(self.full_path) or |
| 374 self.full_path == get_native_path_case(self.full_path)) |
| 375 |
| 376 @property |
| 377 def existent(self): |
| 378 return self.size != -1 |
| 379 |
| 380 @property |
| 381 def size(self): |
| 382 """File's size. -1 is not existent.""" |
| 383 if self._size is None: |
| 384 try: |
| 385 self._size = os.stat(self.full_path).st_size |
| 386 except OSError: |
| 387 self._size = -1 |
| 388 return self._size |
| 389 |
| 390 @property |
| 391 def full_path(self): |
| 392 if self.root: |
| 393 return os.path.join(self.root, self.path) |
| 394 return self.path |
| 395 |
| 396 def flatten(self): |
| 397 return { |
| 398 'path': self.path, |
| 399 'size': self.size, |
| 400 } |
| 401 |
| 402 def strip_root(self, root): |
| 403 """Returns a clone of itself with 'root' stripped off.""" |
| 404 assert isabs(root) and root.endswith(os.path.sep), root |
| 405 if not self.full_path.startswith(root): |
| 406 return None |
| 407 out = self.__class__(root, self.full_path[len(root):]) |
| 408 # Keep size cache. |
| 409 out._size = self._size |
| 410 return out |
| 411 |
| 412 class Directory(File): |
| 413 """A directory of files. Must exist.""" |
| 414 def __init__(self, root, path, size, nb_files): |
| 415 """path='.' is a valid value and must be handled appropriately.""" |
| 416 super(Results.Directory, self).__init__(root, path) |
| 417 assert not self.path.endswith(os.path.sep) |
| 418 self.path = self.path + os.path.sep |
| 419 self.nb_files = nb_files |
| 420 self._size = size |
| 421 |
| 422 def flatten(self): |
| 423 out = super(Results.Directory, self).flatten() |
| 424 out['nb_files'] = self.nb_files |
| 425 return out |
| 426 |
| 427 class Process(object): |
| 428 """A process that was traced. |
| 429 |
| 430 Contains references to the files accessed by this process and its children. |
| 431 """ |
| 432 def __init__( |
| 433 self, pid, files, executable, command, initial_cwd, children): |
| 434 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) |
| 435 self.pid = pid |
| 436 self.files = sorted( |
| 437 (Results.File(None, f) for f in files), key=lambda x: x.path) |
| 438 assert len(set(f.path for f in self.files)) == len(self.files), [ |
| 439 f.path for f in self.files] |
| 440 assert isinstance(children, list) |
| 441 assert isinstance(self.files, list) |
| 442 self.children = children |
| 443 self.executable = executable |
| 444 self.command = command |
| 445 self.initial_cwd = initial_cwd |
| 446 |
| 447 @property |
| 448 def all(self): |
| 449 for child in self.children: |
| 450 for i in child.all: |
| 451 yield i |
| 452 yield self |
| 453 |
| 454 def flatten(self): |
| 455 return { |
| 456 'children': [c.flatten() for c in self.children], |
| 457 'command': self.command, |
| 458 'executable': self.executable, |
| 459 'files': [f.flatten() for f in self.files], |
| 460 'initial_cwd': self.initial_cwd, |
| 461 'pid': self.pid, |
| 462 } |
| 463 |
| 464 def strip_root(self, root): |
| 465 assert isabs(root) and root.endswith(os.path.sep), root |
| 466 out = self.__class__( |
| 467 self.pid, |
| 468 [], |
| 469 self.executable, |
| 470 self.command, |
| 471 self.initial_cwd, |
| 472 [c.strip_root(root) for c in self.children]) |
| 473 # Override the files property. |
| 474 out.files = filter(None, (f.strip_root(root) for f in self.files)) |
| 475 logging.debug( |
| 476 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) |
| 477 return out |
| 478 |
| 479 |
| 480 def __init__(self, process): |
| 481 self.process = process |
| 482 # Cache. |
| 483 self._files = None |
| 484 |
| 485 def flatten(self): |
| 486 return { |
| 487 'root': self.process.flatten(), |
| 488 } |
| 489 |
| 490 @property |
| 491 def files(self): |
| 492 if self._files is None: |
| 493 self._files = sorted( |
| 494 sum((p.files for p in self.process.all), []), |
| 495 key=lambda x: x.path) |
| 496 return self._files |
| 497 |
| 498 @property |
| 499 def existent(self): |
| 500 return [f for f in self.files if f.existent] |
| 501 |
| 502 @property |
| 503 def non_existent(self): |
| 504 return [f for f in self.files if not f.existent] |
| 505 |
| 506 def strip_root(self, root): |
| 507 """Returns a clone with all the files outside the directory |root| removed |
| 508 and converts all the path to be relative paths. |
| 509 """ |
| 510 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep |
| 511 logging.debug('strip_root(%s)' % root) |
| 512 return Results(self.process.strip_root(root)) |
| 513 |
| 514 |
| 515 def extract_directories(files): |
| 516 """Detects if all the files in a directory are in |files| and if so, replace |
| 517 the individual files by a Results.Directory instance. |
| 518 |
| 519 Takes an array of Results.File instances and returns an array of |
| 520 Results.File and Results.Directory instances. |
| 521 """ |
| 522 assert not any(isinstance(f, Results.Directory) for f in files) |
| 523 # Remove non existent files. |
| 524 files = [f for f in files if f.existent] |
| 525 if not files: |
| 526 return files |
| 527 # All files must share the same root, which can be None. |
| 528 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) |
| 529 |
| 530 def blacklist(f): |
| 531 return f in ('.git', '.svn') or f.endswith('.pyc') |
| 532 |
| 533 # Creates a {directory: {filename: File}} mapping, up to root. |
| 534 root = files[0].root |
| 535 assert root.endswith(os.path.sep) |
| 536 buckets = {} |
| 537 if root: |
| 538 buckets[root.rstrip(os.path.sep)] = {} |
| 539 for fileobj in files: |
| 540 path = fileobj.full_path |
| 541 directory = os.path.dirname(path) |
| 542 # Do not use os.path.basename() so trailing os.path.sep is kept. |
| 543 basename = path[len(directory)+1:] |
| 544 files_in_directory = buckets.setdefault(directory, {}) |
| 545 files_in_directory[basename] = fileobj |
| 546 # Add all the directories recursively up to root. |
| 547 while True: |
| 548 old_d = directory |
| 549 directory = os.path.dirname(directory) |
| 550 if directory + os.path.sep == root or directory == old_d: |
| 551 break |
| 552 buckets.setdefault(directory, {}) |
| 553 |
| 554 for directory in sorted(buckets, reverse=True): |
| 555 actual = set(f for f in os.listdir(directory) if not blacklist(f)) |
| 556 expected = set(buckets[directory]) |
| 557 if not (actual - expected): |
| 558 parent = os.path.dirname(directory) |
| 559 buckets[parent][os.path.basename(directory)] = Results.Directory( |
| 560 root, |
| 561 directory[len(root):], |
| 562 sum(f.size for f in buckets[directory].itervalues()), |
| 563 sum(f.nb_files for f in buckets[directory].itervalues())) |
| 564 # Remove the whole bucket. |
| 565 del buckets[directory] |
| 566 |
| 567 # Reverse the mapping with what remains. The original instances are returned, |
| 568 # so the cached meta data is kept. |
| 569 return sorted( |
| 570 sum((x.values() for x in buckets.itervalues()), []), |
| 571 key=lambda x: x.path) |
| 572 |
| 573 |
336 class Strace(ApiBase): | 574 class Strace(ApiBase): |
337 """strace implies linux.""" | 575 """strace implies linux.""" |
338 IGNORED = ( | 576 IGNORED = ( |
339 '/bin', | 577 '/bin', |
340 '/dev', | 578 '/dev', |
341 '/etc', | 579 '/etc', |
342 '/lib', | 580 '/lib', |
343 '/proc', | 581 '/proc', |
344 '/sys', | 582 '/sys', |
345 '/tmp', | 583 '/tmp', |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
556 def render(self): | 794 def render(self): |
557 """Returns the string value of the initial cwd of the root process. | 795 """Returns the string value of the initial cwd of the root process. |
558 | 796 |
559 Used by RelativePath. | 797 Used by RelativePath. |
560 """ | 798 """ |
561 return self.initial_cwd | 799 return self.initial_cwd |
562 | 800 |
563 def on_line(self, pid, line): | 801 def on_line(self, pid, line): |
564 self.get_or_set_proc(pid).on_line(line.strip()) | 802 self.get_or_set_proc(pid).on_line(line.strip()) |
565 | 803 |
| 804 def to_results(self): |
| 805 """Finds back the root process and verify consistency.""" |
| 806 # TODO(maruel): Absolutely unecessary, fix me. |
| 807 root = [p for p in self.processes.itervalues() if not p.parentid] |
| 808 assert len(root) == 1 |
| 809 process = root[0].to_results_process() |
| 810 assert sorted(self.processes) == sorted(p.pid for p in process.all) |
| 811 return Results(process) |
| 812 |
566 def get_or_set_proc(self, pid): | 813 def get_or_set_proc(self, pid): |
567 """Returns the Context.Process instance for this pid or creates a new one. | 814 """Returns the Context.Process instance for this pid or creates a new one. |
568 """ | 815 """ |
569 assert isinstance(pid, int) and pid | 816 assert isinstance(pid, int) and pid |
570 return self.processes.setdefault(pid, self.Process(self, pid)) | 817 return self.processes.setdefault(pid, self.Process(self, pid)) |
571 | 818 |
572 @classmethod | 819 @classmethod |
573 def traces(cls): | 820 def traces(cls): |
574 prefix = 'handle_' | 821 prefix = 'handle_' |
575 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] | 822 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
628 with open(filename, 'r') as f: | 875 with open(filename, 'r') as f: |
629 data = json.load(f) | 876 data = json.load(f) |
630 context = cls.Context(blacklist, data['cwd']) | 877 context = cls.Context(blacklist, data['cwd']) |
631 for pidfile in glob.iglob(filename + '.*'): | 878 for pidfile in glob.iglob(filename + '.*'): |
632 pid = pidfile.rsplit('.', 1)[1] | 879 pid = pidfile.rsplit('.', 1)[1] |
633 if pid.isdigit(): | 880 if pid.isdigit(): |
634 pid = int(pid) | 881 pid = int(pid) |
635 # TODO(maruel): Load as utf-8 | 882 # TODO(maruel): Load as utf-8 |
636 for line in open(pidfile, 'rb'): | 883 for line in open(pidfile, 'rb'): |
637 context.on_line(pid, line) | 884 context.on_line(pid, line) |
638 files, non_existent = context.resolve() | 885 |
639 # Resolve any symlink we hit. | 886 return context.to_results() |
640 return ( | |
641 set(os.path.realpath(f) for f in files), | |
642 set(os.path.realpath(f) for f in non_existent), | |
643 len(context.processes)) | |
644 | 887 |
645 | 888 |
646 class Dtrace(ApiBase): | 889 class Dtrace(ApiBase): |
647 """Uses DTrace framework through dtrace. Requires root access. | 890 """Uses DTrace framework through dtrace. Requires root access. |
648 | 891 |
649 Implies Mac OSX. | 892 Implies Mac OSX. |
650 | 893 |
651 dtruss can't be used because it has compatibility issues with python. | 894 dtruss can't be used because it has compatibility issues with python. |
652 | 895 |
653 Also, the pid->cwd handling needs to be done manually since OSX has no way to | 896 Also, the pid->cwd handling needs to be done manually since OSX has no way to |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
820 self, | 1063 self, |
821 'handle_%s' % match.group(3).replace('-', '_'), | 1064 'handle_%s' % match.group(3).replace('-', '_'), |
822 self._handle_ignored) | 1065 self._handle_ignored) |
823 return fn( | 1066 return fn( |
824 int(match.group(1)), | 1067 int(match.group(1)), |
825 int(match.group(2)), | 1068 int(match.group(2)), |
826 match.group(3), | 1069 match.group(3), |
827 match.group(4), | 1070 match.group(4), |
828 match.group(5)) | 1071 match.group(5)) |
829 | 1072 |
| 1073 def to_results(self): |
| 1074 """Uses self._initial_pid to determine the initial process.""" |
| 1075 process = self.processes[self._initial_pid].to_results_process() |
| 1076 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( |
| 1077 sorted(self.processes), sorted(p.pid for p in process.all)) |
| 1078 return Results(process) |
| 1079 |
830 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): | 1080 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
831 assert not self._tracer_pid and not self._initial_pid | 1081 assert not self._tracer_pid and not self._initial_pid |
832 self._tracer_pid = pid | 1082 self._tracer_pid = pid |
833 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) | 1083 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
834 | 1084 |
835 def handle_proc_start(self, ppid, pid, _function, _args, result): | 1085 def handle_proc_start(self, ppid, pid, _function, _args, result): |
836 """Transfers cwd. | 1086 """Transfers cwd. |
837 | 1087 |
838 The dtrace script already takes care of only tracing the processes that | 1088 The dtrace script already takes care of only tracing the processes that |
839 are child of the traced processes so there is no need to verify the | 1089 are child of the traced processes so there is no need to verify the |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
993 raise | 1243 raise |
994 | 1244 |
995 return dtrace.returncode or child.returncode, out | 1245 return dtrace.returncode or child.returncode, out |
996 | 1246 |
997 @classmethod | 1247 @classmethod |
998 def parse_log(cls, filename, blacklist): | 1248 def parse_log(cls, filename, blacklist): |
999 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1249 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1000 context = cls.Context(blacklist) | 1250 context = cls.Context(blacklist) |
1001 for line in open(filename, 'rb'): | 1251 for line in open(filename, 'rb'): |
1002 context.on_line(line) | 1252 context.on_line(line) |
1003 files, non_existent = context.resolve() | 1253 return context.to_results() |
1004 # Resolve any symlink we hit. | |
1005 return ( | |
1006 set(os.path.realpath(f) for f in files), | |
1007 set(os.path.realpath(f) for f in non_existent), | |
1008 len(context.processes)) | |
1009 | 1254 |
1010 @staticmethod | 1255 @staticmethod |
1011 def _sort_log(logname): | 1256 def _sort_log(logname): |
1012 """Sorts the log back in order when each call occured. | 1257 """Sorts the log back in order when each call occured. |
1013 | 1258 |
1014 dtrace doesn't save the buffer in strict order since it keeps one buffer per | 1259 dtrace doesn't save the buffer in strict order since it keeps one buffer per |
1015 CPU. | 1260 CPU. |
1016 """ | 1261 """ |
1017 with open(logname, 'rb') as logfile: | 1262 with open(logname, 'rb') as logfile: |
1018 lines = [f for f in logfile.readlines() if f.strip()] | 1263 lines = [f for f in logfile.readlines() if f.strip()] |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1124 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), | 1369 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), |
1125 None) | 1370 None) |
1126 if not handler: | 1371 if not handler: |
1127 # Try to get an universal fallback | 1372 # Try to get an universal fallback |
1128 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) | 1373 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) |
1129 if handler: | 1374 if handler: |
1130 handler(line) | 1375 handler(line) |
1131 else: | 1376 else: |
1132 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) | 1377 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) |
1133 | 1378 |
| 1379 def to_results(self): |
| 1380 """Uses self._initial_pid to determine the initial process.""" |
| 1381 process = self.processes[self._initial_pid].to_results_process() |
| 1382 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( |
| 1383 sorted(self.processes), sorted(p.pid for p in process.all)) |
| 1384 return Results(process) |
| 1385 |
1134 def _thread_to_process(self, tid): | 1386 def _thread_to_process(self, tid): |
1135 """Finds the process from the thread id.""" | 1387 """Finds the process from the thread id.""" |
1136 tid = int(tid, 16) | 1388 tid = int(tid, 16) |
1137 return self.processes.get(self._threads_active.get(tid)) | 1389 return self.processes.get(self._threads_active.get(tid)) |
1138 | 1390 |
1139 @staticmethod | 1391 @staticmethod |
1140 def handle_EventTrace_Header(line): | 1392 def handle_EventTrace_Header(line): |
1141 """Verifies no event was dropped, e.g. no buffer overrun occured.""" | 1393 """Verifies no event was dropped, e.g. no buffer overrun occured.""" |
1142 #BUFFER_SIZE = 19 | 1394 #BUFFER_SIZE = 19 |
1143 #VERSION = 20 | 1395 #VERSION = 20 |
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1547 | 1799 |
1548 # The fastest and smallest format but only supports 'ANSI' file paths. | 1800 # The fastest and smallest format but only supports 'ANSI' file paths. |
1549 # E.g. the filenames are encoding in the 'current' encoding. | 1801 # E.g. the filenames are encoding in the 'current' encoding. |
1550 for line in ansi_csv_reader(open(filename)): | 1802 for line in ansi_csv_reader(open(filename)): |
1551 # line is a list of unicode objects. | 1803 # line is a list of unicode objects. |
1552 context.on_csv_line(line) | 1804 context.on_csv_line(line) |
1553 | 1805 |
1554 else: | 1806 else: |
1555 raise NotImplementedError('Implement %s' % logformat) | 1807 raise NotImplementedError('Implement %s' % logformat) |
1556 | 1808 |
1557 files, non_existent = context.resolve() | 1809 return context.to_results() |
1558 # Resolve any symlink we hit. | |
1559 return ( | |
1560 set(os.path.realpath(f) for f in files), | |
1561 set(os.path.realpath(f) for f in non_existent), | |
1562 len(context.processes)) | |
1563 | |
1564 | |
1565 def relevant_files(files, root): | |
1566 """Trims the list of files to keep the expected files and unexpected files. | |
1567 | |
1568 Unexpected files are files that are not based inside the |root| directory. | |
1569 """ | |
1570 expected = [] | |
1571 unexpected = [] | |
1572 for f in files: | |
1573 if f.startswith(root): | |
1574 f = f[len(root):] | |
1575 assert f | |
1576 expected.append(f) | |
1577 else: | |
1578 unexpected.append(f) | |
1579 return sorted(set(expected)), sorted(set(unexpected)) | |
1580 | |
1581 | |
1582 def extract_directories(files, root): | |
1583 """Detects if all the files in a directory were loaded and if so, replace the | |
1584 individual files by the directory entry. | |
1585 """ | |
1586 directories = set(os.path.dirname(f) for f in files) | |
1587 files = set(files) | |
1588 for directory in sorted(directories, reverse=True): | |
1589 actual = set( | |
1590 os.path.join(directory, f) for f in | |
1591 os.listdir(os.path.join(root, directory)) | |
1592 if not f.endswith(('.svn', '.pyc')) | |
1593 ) | |
1594 if not (actual - files): | |
1595 files -= actual | |
1596 files.add(directory + os.path.sep) | |
1597 return sorted(files) | |
1598 | 1810 |
1599 | 1811 |
1600 def pretty_print(variables, stdout): | 1812 def pretty_print(variables, stdout): |
1601 """Outputs a gyp compatible list from the decoded variables. | 1813 """Outputs a gyp compatible list from the decoded variables. |
1602 | 1814 |
1603 Similar to pprint.print() but with NIH syndrome. | 1815 Similar to pprint.print() but with NIH syndrome. |
1604 """ | 1816 """ |
1605 # Order the dictionary keys by these keys in priority. | 1817 # Order the dictionary keys by these keys in priority. |
1606 ORDER = ( | 1818 ORDER = ( |
1607 'variables', 'condition', 'command', 'relative_cwd', 'read_only', | 1819 'variables', 'condition', 'command', 'relative_cwd', 'read_only', |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1743 | 1955 |
1744 def load_trace(logfile, root_dir, api): | 1956 def load_trace(logfile, root_dir, api): |
1745 """Loads a trace file and returns the processed file lists. | 1957 """Loads a trace file and returns the processed file lists. |
1746 | 1958 |
1747 Arguments: | 1959 Arguments: |
1748 - logfile: file to load. | 1960 - logfile: file to load. |
1749 - root_dir: root directory to use to determine if a file is relevant to the | 1961 - root_dir: root directory to use to determine if a file is relevant to the |
1750 trace or not. | 1962 trace or not. |
1751 - api: a tracing api instance. | 1963 - api: a tracing api instance. |
1752 """ | 1964 """ |
1753 root_dir = get_native_path_case(root_dir) | 1965 results = api.parse_log(logfile, get_blacklist(api)) |
1754 files, non_existent, processes = api.parse_log(logfile, get_blacklist(api)) | 1966 results = results.strip_root(root_dir) |
1755 expected, unexpected = relevant_files( | 1967 simplified = extract_directories(results.files) |
1756 files, root_dir.rstrip(os.path.sep) + os.path.sep) | 1968 return results, simplified |
1757 # In case the file system is case insensitive. | |
1758 expected = sorted(set( | |
1759 get_native_path_case(os.path.join(root_dir, f))[len(root_dir)+1:] | |
1760 for f in expected)) | |
1761 simplified = extract_directories(expected, root_dir) | |
1762 return files, expected, unexpected, non_existent, simplified, processes | |
1763 | 1969 |
1764 | 1970 |
1765 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): | 1971 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): |
1766 """Tries to load the logs if available. If not, trace the test. | 1972 """Tries to load the logs if available. If not, trace the test. |
1767 | 1973 |
1768 Symlinks are not processed at all. | 1974 Symlinks are not processed at all. |
1769 | 1975 |
1770 Arguments: | 1976 Arguments: |
1771 - logfile: Absolute path to the OS-specific trace. | 1977 - logfile: Absolute path to the OS-specific trace. |
1772 - cmd: Command list to run. | 1978 - cmd: Command list to run. |
(...skipping 26 matching lines...) Expand all Loading... |
1799 if not os.path.isfile(logfile) or force_trace: | 2005 if not os.path.isfile(logfile) or force_trace: |
1800 print_if('Tracing... %s' % cmd) | 2006 print_if('Tracing... %s' % cmd) |
1801 # Use the proper relative directory. | 2007 # Use the proper relative directory. |
1802 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) | 2008 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) |
1803 silent = not isEnabledFor(logging.WARNING) | 2009 silent = not isEnabledFor(logging.WARNING) |
1804 returncode, _ = trace(logfile, cmd, cwd, api, silent) | 2010 returncode, _ = trace(logfile, cmd, cwd, api, silent) |
1805 if returncode and not force_trace: | 2011 if returncode and not force_trace: |
1806 return returncode | 2012 return returncode |
1807 | 2013 |
1808 print_if('Loading traces... %s' % logfile) | 2014 print_if('Loading traces... %s' % logfile) |
1809 files, expected, unexpected, non_existent, simplified, _ = load_trace( | 2015 results, simplified = load_trace(logfile, root_dir, api) |
1810 logfile, root_dir, api) | |
1811 | 2016 |
1812 print_if('Total: %d' % len(files)) | 2017 print_if('Total: %d' % len(results.files)) |
1813 print_if('Non existent: %d' % len(non_existent)) | 2018 print_if('Non existent: %d' % len(results.non_existent)) |
1814 for f in non_existent: | 2019 for f in results.non_existent: |
1815 print_if(' %s' % f) | 2020 print_if(' %s' % f.path) |
1816 if unexpected: | 2021 print_if( |
1817 print_if('Unexpected: %d' % len(unexpected)) | 2022 'Interesting: %d reduced to %d' % ( |
1818 for f in unexpected: | 2023 len(results.existent), len(simplified))) |
1819 print_if(' %s' % f) | |
1820 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified))) | |
1821 for f in simplified: | 2024 for f in simplified: |
1822 print_if(' %s' % f) | 2025 print_if(' %s' % f.path) |
1823 | 2026 |
1824 if cwd_dir is not None: | 2027 if cwd_dir is not None: |
1825 value = { | 2028 value = { |
1826 'conditions': [ | 2029 'conditions': [ |
1827 ['OS=="%s"' % get_flavor(), { | 2030 ['OS=="%s"' % get_flavor(), { |
1828 'variables': generate_dict(simplified, cwd_dir, product_dir), | 2031 'variables': generate_dict( |
| 2032 [f.path for f in simplified], cwd_dir, product_dir), |
1829 }], | 2033 }], |
1830 ], | 2034 ], |
1831 } | 2035 } |
1832 pretty_print(value, sys.stdout) | 2036 pretty_print(value, sys.stdout) |
1833 return 0 | 2037 return 0 |
1834 | 2038 |
1835 | 2039 |
1836 def main(): | 2040 def main(): |
1837 parser = optparse.OptionParser( | 2041 parser = optparse.OptionParser( |
1838 usage='%prog <options> [cmd line...]') | 2042 usage='%prog <options> [cmd line...]') |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1879 os.path.abspath(options.log), | 2083 os.path.abspath(options.log), |
1880 args, | 2084 args, |
1881 options.root_dir, | 2085 options.root_dir, |
1882 options.cwd, | 2086 options.cwd, |
1883 options.product_dir, | 2087 options.product_dir, |
1884 options.force) | 2088 options.force) |
1885 | 2089 |
1886 | 2090 |
1887 if __name__ == '__main__': | 2091 if __name__ == '__main__': |
1888 sys.exit(main()) | 2092 sys.exit(main()) |
OLD | NEW |