OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
270 assert isinstance(root, ApiBase.Context) | 270 assert isinstance(root, ApiBase.Context) |
271 assert isinstance(pid, int), repr(pid) | 271 assert isinstance(pid, int), repr(pid) |
272 self.root = weakref.ref(root) | 272 self.root = weakref.ref(root) |
273 self.pid = pid | 273 self.pid = pid |
274 # Children are pids. | 274 # Children are pids. |
275 self.children = [] | 275 self.children = [] |
276 self.parentid = parentid | 276 self.parentid = parentid |
277 self.initial_cwd = initial_cwd | 277 self.initial_cwd = initial_cwd |
278 self.cwd = None | 278 self.cwd = None |
279 self.files = set() | 279 self.files = set() |
280 self.executable = None | |
281 self.command = None | |
282 | |
283 if parentid: | |
284 self.root().processes[parentid].children.append(pid) | |
285 | |
286 def to_results_process(self): | |
287 """Resolves file case sensitivity and or late-bound strings.""" | |
288 children = [ | |
289 self.root().processes[c].to_results_process() for c in self.children | |
MAD
2012/05/30 20:29:36
We usually prefer not doing in an list constructio
M-A Ruel
2012/05/30 22:34:10
Why? It's much slower. I agree for this line in pa
MAD
2012/05/31 14:00:57
OK, then... I would find it more readable, but fi
| |
290 ] | |
291 # When resolving files, it's normal to get dupe because of a file could | |
MAD
2012/05/30 20:29:36
"because of a" -> "because a"
M-A Ruel
2012/05/30 22:34:10
done.
| |
292 # be opened multiple times with different case. Resolve the | |
293 # deduplication here. | |
294 def render_to_string_and_fix_case(x): | |
295 """Returns the native file path case if the file exists. | |
296 | |
297 Converts late-bound strings. | |
298 """ | |
299 if not x: | |
300 return x | |
301 # TODO(maruel): Do not upconvert to unicode here, on linux we don't | |
302 # know the file path encoding so they must be treated as bytes. | |
303 x = unicode(x) | |
304 if not os.path.exists(x): | |
305 return x | |
306 return get_native_path_case(x) | |
307 | |
308 return Results.Process( | |
309 self.pid, | |
310 set(map(render_to_string_and_fix_case, self.files)), | |
311 render_to_string_and_fix_case(self.executable), | |
312 self.command, | |
313 render_to_string_and_fix_case(self.initial_cwd), | |
314 children) | |
280 | 315 |
281 def add_file(self, filepath): | 316 def add_file(self, filepath): |
282 if self.root().blacklist(unicode(filepath)): | 317 if self.root().blacklist(unicode(filepath)): |
283 return | 318 return |
284 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) | 319 logging.debug('add_file(%d, %s)' % (self.pid, filepath)) |
285 self.files.add(filepath) | 320 self.files.add(filepath) |
286 | 321 |
287 def __init__(self, blacklist): | 322 def __init__(self, blacklist): |
288 self.blacklist = blacklist | 323 self.blacklist = blacklist |
289 self.processes = {} | 324 self.processes = {} |
290 | 325 |
291 def resolve(self): | |
292 """Resolve all the filenames and returns them.""" | |
293 files = set() | |
294 non_existent = set() | |
295 for p in self.processes.itervalues(): | |
296 for filepath in p.files: | |
297 filepath = unicode(filepath) | |
298 # For late-bound file paths, it could be blacklisted after all the | |
299 # processes are processed so it needs to be checked again. | |
300 if self.blacklist(filepath): | |
301 break | |
302 if os.path.isfile(filepath): | |
303 files.add(filepath) | |
304 else: | |
305 non_existent.add(filepath) | |
306 return files, non_existent | |
307 | |
308 @staticmethod | 326 @staticmethod |
309 def clean_trace(logname): | 327 def clean_trace(logname): |
310 """Deletes the old log.""" | 328 """Deletes the old log.""" |
311 raise NotImplementedError() | 329 raise NotImplementedError() |
312 | 330 |
313 @classmethod | 331 @classmethod |
314 def gen_trace(cls, cmd, cwd, logname, output): | 332 def gen_trace(cls, cmd, cwd, logname, output): |
315 """Runs the OS-specific trace program on an executable. | 333 """Runs the OS-specific trace program on an executable. |
316 | 334 |
317 Since the logs are per pid, we need to log the list of the initial pid. | 335 Since the logs are per pid, we need to log the list of the initial pid. |
318 """ | 336 """ |
319 raise NotImplementedError(cls.__class__.__name__) | 337 raise NotImplementedError(cls.__class__.__name__) |
320 | 338 |
321 @classmethod | 339 @classmethod |
322 def parse_log(cls, filename, blacklist): | 340 def parse_log(cls, filename, blacklist): |
323 """Processes a trace log and returns the files opened and the files that do | 341 """Processes a trace log and returns the files opened and the files that do |
324 not exist. | 342 not exist. |
325 | 343 |
326 It does not track directories. | 344 It does not track directories. |
327 | 345 |
328 Most of the time, files that do not exist are temporary test files that | 346 Most of the time, files that do not exist are temporary test files that |
329 should be put in /tmp instead. See http://crbug.com/116251. | 347 should be put in /tmp instead. See http://crbug.com/116251. |
330 | 348 |
331 Returns a tuple (existing files, non existing files, nb_processes_created) | 349 Returns a tuple (existing files, non existing files, nb_processes_created) |
332 """ | 350 """ |
333 raise NotImplementedError(cls.__class__.__name__) | 351 raise NotImplementedError(cls.__class__.__name__) |
334 | 352 |
335 | 353 |
354 class Results(object): | |
355 """Results of a trace session.""" | |
356 | |
357 class File(object): | |
358 """A file that was accessed.""" | |
359 def __init__(self, root, path): | |
360 """Represents a file accessed. May not be present anymore.""" | |
361 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) | |
362 self.root = root | |
363 self.path = path | |
364 | |
365 self._size = None | |
366 # For compatibility with Directory object interface. | |
367 # Shouldn't be used normally, only exists to simplify algorithms. | |
368 self.nb_files = 1 | |
369 | |
370 assert path, path | |
371 assert bool(root) != bool(isabs(path)), (root, path) | |
372 assert ( | |
373 not os.path.exists(self.full_path) or | |
374 self.full_path == get_native_path_case(self.full_path)) | |
375 | |
376 @property | |
377 def existent(self): | |
378 return self.size != -1 | |
379 | |
380 @property | |
381 def size(self): | |
382 """File's size. -1 is not existent.""" | |
383 if self._size is None: | |
384 try: | |
385 self._size = os.stat(self.full_path).st_size | |
386 except OSError: | |
387 self._size = -1 | |
388 return self._size | |
389 | |
390 @property | |
391 def full_path(self): | |
392 if self.root: | |
393 return os.path.join(self.root, self.path) | |
394 return self.path | |
395 | |
396 def flatten(self): | |
397 return { | |
398 'path': self.path, | |
399 'size': self.size, | |
400 } | |
401 | |
402 def strip_root(self, root): | |
403 """Returns a clone of itself with 'root' stripped off.""" | |
404 assert isabs(root) and root.endswith(os.path.sep), root | |
405 if not self.full_path.startswith(root): | |
406 return None | |
407 out = self.__class__(root, self.full_path[len(root):]) | |
408 # Keep size cache. | |
409 out._size = self._size | |
410 return out | |
411 | |
412 class Directory(File): | |
413 """A directory of files. Must exist.""" | |
414 def __init__(self, root, path, size, nb_files): | |
415 """path='.' is a valid value and must be handled appropriately.""" | |
416 super(Results.Directory, self).__init__(root, path) | |
417 self.path = self.path + os.path.sep | |
MAD
2012/05/30 20:29:36
maybe add an assert that path doesn't already ends
M-A Ruel
2012/05/30 22:34:10
done
| |
418 self.nb_files = nb_files | |
419 self._size = size | |
420 | |
421 def flatten(self): | |
422 out = super(Results.Directory, self).flatten() | |
423 out['nb_files'] = self.nb_files | |
424 return out | |
425 | |
426 class Process(object): | |
427 """A process that was traced. | |
428 | |
429 Contains references to the files accessed by this process and its children. | |
430 """ | |
431 def __init__( | |
432 self, pid, files, executable, command, initial_cwd, children): | |
433 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) | |
434 self.pid = pid | |
435 self.files = sorted( | |
436 (Results.File(None, f) for f in files), key=lambda x: x.path) | |
437 assert len(set(f.path for f in self.files)) == len(self.files), [ | |
438 f.path for f in self.files] | |
439 assert isinstance(children, list) | |
440 assert isinstance(self.files, list) | |
441 self.children = children | |
442 self.executable = executable | |
443 self.command = command | |
444 self.initial_cwd = initial_cwd | |
445 | |
446 @property | |
447 def all(self): | |
448 for child in self.children: | |
449 for i in child.all: | |
450 yield i | |
451 yield self | |
452 | |
453 def flatten(self): | |
454 return { | |
455 'children': [c.flatten() for c in self.children], | |
456 'command': self.command, | |
457 'executable': self.executable, | |
458 'files': [f.flatten() for f in self.files], | |
459 'initial_cwd': self.initial_cwd, | |
460 'pid': self.pid, | |
461 } | |
462 | |
463 def strip_root(self, root): | |
464 assert isabs(root) and root.endswith(os.path.sep), root | |
465 out = self.__class__( | |
466 self.pid, | |
467 [], | |
468 self.executable, | |
469 self.command, | |
470 self.initial_cwd, | |
471 [c.strip_root(root) for c in self.children]) | |
472 # Override the files property. | |
473 out.files = filter(None, (f.strip_root(root) for f in self.files)) | |
474 logging.debug( | |
475 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) | |
476 return out | |
477 | |
478 | |
479 def __init__(self, process): | |
480 self.process = process | |
481 # Cache. | |
482 self._files = None | |
483 | |
484 def flatten(self): | |
485 return { | |
486 'root': self.process.flatten(), | |
487 } | |
488 | |
489 @property | |
490 def files(self): | |
491 if self._files is None: | |
492 self._files = sorted( | |
493 sum((p.files for p in self.process.all), []), | |
494 key=lambda x: x.path) | |
495 return self._files | |
496 | |
497 @property | |
498 def existent(self): | |
499 return [f for f in self.files if f.existent] | |
500 | |
501 @property | |
502 def non_existent(self): | |
503 return [f for f in self.files if not f.existent] | |
504 | |
505 def strip_root(self, root): | |
506 """Returns a clone with all the files outside the directory |root| removed | |
507 and converts all the path to be relative paths. | |
508 """ | |
509 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep | |
510 logging.debug('strip_root(%s)' % root) | |
511 return Results(self.process.strip_root(root)) | |
512 | |
513 | |
514 def extract_directories(files): | |
515 """Detects if all the files in a directory are in |files| and if so, replace | |
516 the individual files by a Results.Directory instance. | |
517 | |
518 Takes an array of Results.File instances and returns an array of | |
519 Results.File and Results.Directory instances. | |
520 """ | |
521 assert not any(isinstance(f, Results.Directory) for f in files) | |
522 # Remove non existent files. | |
523 files = [f for f in files if f.existent] | |
524 if not files: | |
525 return files | |
526 # All files must share the same root, which can be None. | |
527 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) | |
528 | |
529 def blacklist(f): | |
530 return f in ('.git', '.svn') or f.endswith('.pyc') | |
531 | |
532 # Creates a {directory: {filename: File}} mapping, up to root. | |
533 root = files[0].root | |
534 buckets = {} | |
535 if root: | |
536 buckets[root.rstrip(os.path.sep)] = {} | |
537 for f in files: | |
MAD
2012/05/30 20:29:36
I prefer for file in files:
for a one line list c
M-A Ruel
2012/05/30 22:34:10
'file' is a poor choice because it is a builtin, s
| |
538 path = f.full_path | |
539 directory = os.path.dirname(path) | |
540 x = buckets.setdefault(directory, {}) | |
MAD
2012/05/30 20:29:36
what's 'x'?
M-A Ruel
2012/05/30 22:34:10
I didn't recall, fixed.
| |
541 x[path[len(directory)+1:]] = f | |
MAD
2012/05/30 20:29:36
really readable... :-P
M-A Ruel
2012/05/30 22:34:10
Rewrote.
| |
542 # Add all the directories recursively up to root. | |
543 while True: | |
544 old_d = directory | |
545 directory = os.path.dirname(directory) | |
546 if directory + os.path.sep == root or directory == old_d: | |
MAD
2012/05/30 20:29:36
You assume root ends with a path.sep but you don't
M-A Ruel
2012/05/30 22:34:10
Agreed, added assert.
| |
547 break | |
548 buckets.setdefault(directory, {}) | |
549 | |
550 for directory in sorted(buckets, reverse=True): | |
551 actual = set(f for f in os.listdir(directory) if not blacklist(f)) | |
552 expected = set(buckets[directory]) | |
553 if not (actual - expected): | |
554 parent = os.path.dirname(directory) | |
555 buckets[parent][os.path.basename(directory)] = Results.Directory( | |
556 root, | |
557 directory[len(root):], | |
558 sum(f.size for f in buckets[directory].itervalues()), | |
559 sum(f.nb_files for f in buckets[directory].itervalues())) | |
560 # Remove the whole bucket. | |
561 del buckets[directory] | |
562 | |
563 # Reverse the mapping with what remains. The original instances are returned, | |
564 # so the cached meta data is kept. | |
565 return sorted( | |
566 sum((x.values() for x in buckets.itervalues()), []), | |
567 key=lambda x: x.path) | |
568 | |
569 | |
336 class Strace(ApiBase): | 570 class Strace(ApiBase): |
337 """strace implies linux.""" | 571 """strace implies linux.""" |
338 IGNORED = ( | 572 IGNORED = ( |
339 '/bin', | 573 '/bin', |
340 '/dev', | 574 '/dev', |
341 '/etc', | 575 '/etc', |
342 '/lib', | 576 '/lib', |
343 '/proc', | 577 '/proc', |
344 '/sys', | 578 '/sys', |
345 '/tmp', | 579 '/tmp', |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
556 def render(self): | 790 def render(self): |
557 """Returns the string value of the initial cwd of the root process. | 791 """Returns the string value of the initial cwd of the root process. |
558 | 792 |
559 Used by RelativePath. | 793 Used by RelativePath. |
560 """ | 794 """ |
561 return self.initial_cwd | 795 return self.initial_cwd |
562 | 796 |
563 def on_line(self, pid, line): | 797 def on_line(self, pid, line): |
564 self.get_or_set_proc(pid).on_line(line.strip()) | 798 self.get_or_set_proc(pid).on_line(line.strip()) |
565 | 799 |
800 def to_results(self): | |
801 """Finds back the root process and verify consistency.""" | |
802 # TODO(maruel): Absolutely unecessary, fix me. | |
803 root = [p for p in self.processes.itervalues() if not p.parentid] | |
804 assert len(root) == 1 | |
805 process = root[0].to_results_process() | |
806 assert sorted(self.processes) == sorted(p.pid for p in process.all) | |
807 return Results(process) | |
808 | |
566 def get_or_set_proc(self, pid): | 809 def get_or_set_proc(self, pid): |
567 """Returns the Context.Process instance for this pid or creates a new one. | 810 """Returns the Context.Process instance for this pid or creates a new one. |
568 """ | 811 """ |
569 assert isinstance(pid, int) and pid | 812 assert isinstance(pid, int) and pid |
570 return self.processes.setdefault(pid, self.Process(self, pid)) | 813 return self.processes.setdefault(pid, self.Process(self, pid)) |
571 | 814 |
572 @classmethod | 815 @classmethod |
573 def traces(cls): | 816 def traces(cls): |
574 prefix = 'handle_' | 817 prefix = 'handle_' |
575 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] | 818 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
628 with open(filename, 'r') as f: | 871 with open(filename, 'r') as f: |
629 data = json.load(f) | 872 data = json.load(f) |
630 context = cls.Context(blacklist, data['cwd']) | 873 context = cls.Context(blacklist, data['cwd']) |
631 for pidfile in glob.iglob(filename + '.*'): | 874 for pidfile in glob.iglob(filename + '.*'): |
632 pid = pidfile.rsplit('.', 1)[1] | 875 pid = pidfile.rsplit('.', 1)[1] |
633 if pid.isdigit(): | 876 if pid.isdigit(): |
634 pid = int(pid) | 877 pid = int(pid) |
635 # TODO(maruel): Load as utf-8 | 878 # TODO(maruel): Load as utf-8 |
636 for line in open(pidfile, 'rb'): | 879 for line in open(pidfile, 'rb'): |
637 context.on_line(pid, line) | 880 context.on_line(pid, line) |
638 files, non_existent = context.resolve() | 881 |
639 # Resolve any symlink we hit. | 882 return context.to_results() |
640 return ( | |
641 set(os.path.realpath(f) for f in files), | |
642 set(os.path.realpath(f) for f in non_existent), | |
643 len(context.processes)) | |
644 | 883 |
645 | 884 |
646 class Dtrace(ApiBase): | 885 class Dtrace(ApiBase): |
647 """Uses DTrace framework through dtrace. Requires root access. | 886 """Uses DTrace framework through dtrace. Requires root access. |
648 | 887 |
649 Implies Mac OSX. | 888 Implies Mac OSX. |
650 | 889 |
651 dtruss can't be used because it has compatibility issues with python. | 890 dtruss can't be used because it has compatibility issues with python. |
652 | 891 |
653 Also, the pid->cwd handling needs to be done manually since OSX has no way to | 892 Also, the pid->cwd handling needs to be done manually since OSX has no way to |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
820 self, | 1059 self, |
821 'handle_%s' % match.group(3).replace('-', '_'), | 1060 'handle_%s' % match.group(3).replace('-', '_'), |
822 self._handle_ignored) | 1061 self._handle_ignored) |
823 return fn( | 1062 return fn( |
824 int(match.group(1)), | 1063 int(match.group(1)), |
825 int(match.group(2)), | 1064 int(match.group(2)), |
826 match.group(3), | 1065 match.group(3), |
827 match.group(4), | 1066 match.group(4), |
828 match.group(5)) | 1067 match.group(5)) |
829 | 1068 |
1069 def to_results(self): | |
1070 """Uses self._initial_pid to determine the initial process.""" | |
1071 process = self.processes[self._initial_pid].to_results_process() | |
1072 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( | |
1073 sorted(self.processes), sorted(p.pid for p in process.all)) | |
1074 return Results(process) | |
1075 | |
830 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): | 1076 def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
831 assert not self._tracer_pid and not self._initial_pid | 1077 assert not self._tracer_pid and not self._initial_pid |
832 self._tracer_pid = pid | 1078 self._tracer_pid = pid |
833 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) | 1079 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
834 | 1080 |
835 def handle_proc_start(self, ppid, pid, _function, _args, result): | 1081 def handle_proc_start(self, ppid, pid, _function, _args, result): |
836 """Transfers cwd. | 1082 """Transfers cwd. |
837 | 1083 |
838 The dtrace script already takes care of only tracing the processes that | 1084 The dtrace script already takes care of only tracing the processes that |
839 are child of the traced processes so there is no need to verify the | 1085 are child of the traced processes so there is no need to verify the |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
993 raise | 1239 raise |
994 | 1240 |
995 return dtrace.returncode or child.returncode, out | 1241 return dtrace.returncode or child.returncode, out |
996 | 1242 |
997 @classmethod | 1243 @classmethod |
998 def parse_log(cls, filename, blacklist): | 1244 def parse_log(cls, filename, blacklist): |
999 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1245 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1000 context = cls.Context(blacklist) | 1246 context = cls.Context(blacklist) |
1001 for line in open(filename, 'rb'): | 1247 for line in open(filename, 'rb'): |
1002 context.on_line(line) | 1248 context.on_line(line) |
1003 files, non_existent = context.resolve() | 1249 return context.to_results() |
1004 # Resolve any symlink we hit. | |
1005 return ( | |
1006 set(os.path.realpath(f) for f in files), | |
1007 set(os.path.realpath(f) for f in non_existent), | |
1008 len(context.processes)) | |
1009 | 1250 |
1010 @staticmethod | 1251 @staticmethod |
1011 def _sort_log(logname): | 1252 def _sort_log(logname): |
1012 """Sorts the log back in order when each call occured. | 1253 """Sorts the log back in order when each call occured. |
1013 | 1254 |
1014 dtrace doesn't save the buffer in strict order since it keeps one buffer per | 1255 dtrace doesn't save the buffer in strict order since it keeps one buffer per |
1015 CPU. | 1256 CPU. |
1016 """ | 1257 """ |
1017 with open(logname, 'rb') as logfile: | 1258 with open(logname, 'rb') as logfile: |
1018 lines = [f for f in logfile.readlines() if f.strip()] | 1259 lines = [f for f in logfile.readlines() if f.strip()] |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1124 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), | 1365 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), |
1125 None) | 1366 None) |
1126 if not handler: | 1367 if not handler: |
1127 # Try to get an universal fallback | 1368 # Try to get an universal fallback |
1128 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) | 1369 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) |
1129 if handler: | 1370 if handler: |
1130 handler(line) | 1371 handler(line) |
1131 else: | 1372 else: |
1132 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) | 1373 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) |
1133 | 1374 |
1375 def to_results(self): | |
1376 """Uses self._initial_pid to determine the initial process.""" | |
1377 process = self.processes[self._initial_pid].to_results_process() | |
1378 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( | |
1379 sorted(self.processes), sorted(p.pid for p in process.all)) | |
1380 return Results(process) | |
1381 | |
1134 def _thread_to_process(self, tid): | 1382 def _thread_to_process(self, tid): |
1135 """Finds the process from the thread id.""" | 1383 """Finds the process from the thread id.""" |
1136 tid = int(tid, 16) | 1384 tid = int(tid, 16) |
1137 return self.processes.get(self._threads_active.get(tid)) | 1385 return self.processes.get(self._threads_active.get(tid)) |
1138 | 1386 |
1139 @staticmethod | 1387 @staticmethod |
1140 def handle_EventTrace_Header(line): | 1388 def handle_EventTrace_Header(line): |
1141 """Verifies no event was dropped, e.g. no buffer overrun occured.""" | 1389 """Verifies no event was dropped, e.g. no buffer overrun occured.""" |
1142 #BUFFER_SIZE = 19 | 1390 #BUFFER_SIZE = 19 |
1143 #VERSION = 20 | 1391 #VERSION = 20 |
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1546 | 1794 |
1547 # The fastest and smallest format but only supports 'ANSI' file paths. | 1795 # The fastest and smallest format but only supports 'ANSI' file paths. |
1548 # E.g. the filenames are encoding in the 'current' encoding. | 1796 # E.g. the filenames are encoding in the 'current' encoding. |
1549 for line in ansi_csv_reader(open(filename)): | 1797 for line in ansi_csv_reader(open(filename)): |
1550 # line is a list of unicode objects. | 1798 # line is a list of unicode objects. |
1551 context.on_csv_line(line) | 1799 context.on_csv_line(line) |
1552 | 1800 |
1553 else: | 1801 else: |
1554 raise NotImplementedError('Implement %s' % logformat) | 1802 raise NotImplementedError('Implement %s' % logformat) |
1555 | 1803 |
1556 files, non_existent = context.resolve() | 1804 return context.to_results() |
1557 # Resolve any symlink we hit. | |
1558 return ( | |
1559 set(os.path.realpath(f) for f in files), | |
1560 set(os.path.realpath(f) for f in non_existent), | |
1561 len(context.processes)) | |
1562 | |
1563 | |
1564 def relevant_files(files, root): | |
1565 """Trims the list of files to keep the expected files and unexpected files. | |
1566 | |
1567 Unexpected files are files that are not based inside the |root| directory. | |
1568 """ | |
1569 expected = [] | |
1570 unexpected = [] | |
1571 for f in files: | |
1572 if f.startswith(root): | |
1573 f = f[len(root):] | |
1574 assert f | |
1575 expected.append(f) | |
1576 else: | |
1577 unexpected.append(f) | |
1578 return sorted(set(expected)), sorted(set(unexpected)) | |
1579 | |
1580 | |
1581 def extract_directories(files, root): | |
1582 """Detects if all the files in a directory were loaded and if so, replace the | |
1583 individual files by the directory entry. | |
1584 """ | |
1585 directories = set(os.path.dirname(f) for f in files) | |
1586 files = set(files) | |
1587 for directory in sorted(directories, reverse=True): | |
1588 actual = set( | |
1589 os.path.join(directory, f) for f in | |
1590 os.listdir(os.path.join(root, directory)) | |
1591 if not f.endswith(('.svn', '.pyc')) | |
1592 ) | |
1593 if not (actual - files): | |
1594 files -= actual | |
1595 files.add(directory + os.path.sep) | |
1596 return sorted(files) | |
1597 | 1805 |
1598 | 1806 |
1599 def pretty_print(variables, stdout): | 1807 def pretty_print(variables, stdout): |
1600 """Outputs a gyp compatible list from the decoded variables. | 1808 """Outputs a gyp compatible list from the decoded variables. |
1601 | 1809 |
1602 Similar to pprint.print() but with NIH syndrome. | 1810 Similar to pprint.print() but with NIH syndrome. |
1603 """ | 1811 """ |
1604 # Order the dictionary keys by these keys in priority. | 1812 # Order the dictionary keys by these keys in priority. |
1605 ORDER = ( | 1813 ORDER = ( |
1606 'variables', 'condition', 'command', 'relative_cwd', 'read_only', | 1814 'variables', 'condition', 'command', 'relative_cwd', 'read_only', |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1742 | 1950 |
1743 def load_trace(logfile, root_dir, api): | 1951 def load_trace(logfile, root_dir, api): |
1744 """Loads a trace file and returns the processed file lists. | 1952 """Loads a trace file and returns the processed file lists. |
1745 | 1953 |
1746 Arguments: | 1954 Arguments: |
1747 - logfile: file to load. | 1955 - logfile: file to load. |
1748 - root_dir: root directory to use to determine if a file is relevant to the | 1956 - root_dir: root directory to use to determine if a file is relevant to the |
1749 trace or not. | 1957 trace or not. |
1750 - api: a tracing api instance. | 1958 - api: a tracing api instance. |
1751 """ | 1959 """ |
1752 root_dir = get_native_path_case(root_dir) | 1960 results = api.parse_log(logfile, get_blacklist(api)) |
1753 files, non_existent, processes = api.parse_log(logfile, get_blacklist(api)) | 1961 results = results.strip_root(root_dir) |
1754 expected, unexpected = relevant_files( | 1962 simplified = extract_directories(results.files) |
1755 files, root_dir.rstrip(os.path.sep) + os.path.sep) | 1963 return results, simplified |
1756 # In case the file system is case insensitive. | |
1757 expected = sorted(set( | |
1758 get_native_path_case(os.path.join(root_dir, f))[len(root_dir)+1:] | |
1759 for f in expected)) | |
1760 simplified = extract_directories(expected, root_dir) | |
1761 return files, expected, unexpected, non_existent, simplified, processes | |
1762 | 1964 |
1763 | 1965 |
1764 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): | 1966 def trace_inputs(logfile, cmd, root_dir, cwd_dir, product_dir, force_trace): |
1765 """Tries to load the logs if available. If not, trace the test. | 1967 """Tries to load the logs if available. If not, trace the test. |
1766 | 1968 |
1767 Symlinks are not processed at all. | 1969 Symlinks are not processed at all. |
1768 | 1970 |
1769 Arguments: | 1971 Arguments: |
1770 - logfile: Absolute path to the OS-specific trace. | 1972 - logfile: Absolute path to the OS-specific trace. |
1771 - cmd: Command list to run. | 1973 - cmd: Command list to run. |
(...skipping 26 matching lines...) Expand all Loading... | |
1798 if not os.path.isfile(logfile) or force_trace: | 2000 if not os.path.isfile(logfile) or force_trace: |
1799 print_if('Tracing... %s' % cmd) | 2001 print_if('Tracing... %s' % cmd) |
1800 # Use the proper relative directory. | 2002 # Use the proper relative directory. |
1801 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) | 2003 cwd = root_dir if not cwd_dir else os.path.join(root_dir, cwd_dir) |
1802 silent = not isEnabledFor(logging.WARNING) | 2004 silent = not isEnabledFor(logging.WARNING) |
1803 returncode, _ = trace(logfile, cmd, cwd, api, silent) | 2005 returncode, _ = trace(logfile, cmd, cwd, api, silent) |
1804 if returncode and not force_trace: | 2006 if returncode and not force_trace: |
1805 return returncode | 2007 return returncode |
1806 | 2008 |
1807 print_if('Loading traces... %s' % logfile) | 2009 print_if('Loading traces... %s' % logfile) |
1808 files, expected, unexpected, non_existent, simplified, _ = load_trace( | 2010 results, simplified = load_trace(logfile, root_dir, api) |
1809 logfile, root_dir, api) | |
1810 | 2011 |
1811 print_if('Total: %d' % len(files)) | 2012 print_if('Total: %d' % len(results.files)) |
1812 print_if('Non existent: %d' % len(non_existent)) | 2013 print_if('Non existent: %d' % len(results.non_existent)) |
1813 for f in non_existent: | 2014 for f in results.non_existent: |
1814 print_if(' %s' % f) | 2015 print_if(' %s' % f.path) |
1815 if unexpected: | 2016 print_if( |
1816 print_if('Unexpected: %d' % len(unexpected)) | 2017 'Interesting: %d reduced to %d' % ( |
1817 for f in unexpected: | 2018 len(results.existent), len(simplified))) |
1818 print_if(' %s' % f) | |
1819 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified))) | |
1820 for f in simplified: | 2019 for f in simplified: |
1821 print_if(' %s' % f) | 2020 print_if(' %s' % f.path) |
1822 | 2021 |
1823 if cwd_dir is not None: | 2022 if cwd_dir is not None: |
1824 value = { | 2023 value = { |
1825 'conditions': [ | 2024 'conditions': [ |
1826 ['OS=="%s"' % get_flavor(), { | 2025 ['OS=="%s"' % get_flavor(), { |
1827 'variables': generate_dict(simplified, cwd_dir, product_dir), | 2026 'variables': generate_dict( |
2027 [f.path for f in simplified], cwd_dir, product_dir), | |
1828 }], | 2028 }], |
1829 ], | 2029 ], |
1830 } | 2030 } |
1831 pretty_print(value, sys.stdout) | 2031 pretty_print(value, sys.stdout) |
1832 return 0 | 2032 return 0 |
1833 | 2033 |
1834 | 2034 |
1835 def main(): | 2035 def main(): |
1836 parser = optparse.OptionParser( | 2036 parser = optparse.OptionParser( |
1837 usage='%prog <options> [cmd line...]') | 2037 usage='%prog <options> [cmd line...]') |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1878 os.path.abspath(options.log), | 2078 os.path.abspath(options.log), |
1879 args, | 2079 args, |
1880 options.root_dir, | 2080 options.root_dir, |
1881 options.cwd, | 2081 options.cwd, |
1882 options.product_dir, | 2082 options.product_dir, |
1883 options.force) | 2083 options.force) |
1884 | 2084 |
1885 | 2085 |
1886 if __name__ == '__main__': | 2086 if __name__ == '__main__': |
1887 sys.exit(main()) | 2087 sys.exit(main()) |
OLD | NEW |