Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(221)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 9835084: Implementing stateful directory tracking for dtrace on OSX. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase against http://codereview.chromium.org/9834052 Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/isolate/isolate_test.py ('k') | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Runs strace or dtrace on a test and processes the logs to extract the 6 """Runs strace or dtrace on a test and processes the logs to extract the
7 dependencies from the source tree. 7 dependencies from the source tree.
8 8
9 Automatically extracts directories where all the files are used to make the 9 Automatically extracts directories where all the files are used to make the
10 dependencies list more compact. 10 dependencies list more compact.
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 set(os.path.realpath(f) for f in context.files), 232 set(os.path.realpath(f) for f in context.files),
233 set(os.path.realpath(f) for f in context.non_existent)) 233 set(os.path.realpath(f) for f in context.non_existent))
234 234
235 235
236 class Dtrace(object): 236 class Dtrace(object):
237 """Uses DTrace framework through dtrace. Requires root access. 237 """Uses DTrace framework through dtrace. Requires root access.
238 238
239 Implies Mac OSX. 239 Implies Mac OSX.
240 240
241 dtruss can't be used because it has compatibility issues with python. 241 dtruss can't be used because it has compatibility issues with python.
242
243 Also, the pid->cwd handling needs to be done manually since OSX has no way to
244 get the absolute path of the 'cwd' dtrace variable from the probe.
245
246 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
247 https://discussions.apple.com/thread/1980539.
242 """ 248 """
243 IGNORED = ( 249 IGNORED = (
244 '/.vol', 250 '/.vol',
245 '/Library', 251 '/Library',
246 '/System', 252 '/System',
247 '/dev', 253 '/dev',
248 '/etc', 254 '/etc',
249 '/private/var', 255 '/private/var',
250 '/tmp', 256 '/tmp',
251 '/usr', 257 '/usr',
252 '/var', 258 '/var',
253 ) 259 )
254 260
255 # pylint: disable=C0301 261 # pylint: disable=C0301
256 # To understand the following code, you'll want to take a look at: 262 # To understand the following code, you'll want to take a look at:
257 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml 263 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml
258 # and
259 # https://wikis.oracle.com/display/DTrace/Variables 264 # https://wikis.oracle.com/display/DTrace/Variables
260 # Note that cwd doesn't keep the absolute path so we need to compute it 265 # http://docs.oracle.com/cd/E19205-01/820-4221/
261 # ourselve! 266 #
267 # The list of valid probes can be retrieved with:
268 # sudo dtrace -l -P syscall | less
262 D_CODE = """ 269 D_CODE = """
263 /* Child process tracking. 270 proc:::start /trackedpid[ppid]/ {
264 * I'm really depressed that I need to do it myself. */
265 dtrace:::BEGIN {
266 trackedpid[ppid] = 1;
267 trackedpid[pid] = 1; 271 trackedpid[pid] = 1;
268 /* Signal gen_trace() that we are ready to trace. */ 272 current_processes += 1;
269 printf("Go! 1 %d:%d %s", ppid, pid, execname); 273 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
270 } 274 logindex, ppid, pid, probeprov, probename, execname,
271 275 current_processes);
272 /* Make sure all child processes are tracked. This is not very efficient 276 logindex++;
273 * but for our use case, it's fine enough. 277 }
274 * TODO(maruel): We should properly track fork, execve, vfork and friends 278 proc:::exit /trackedpid[pid] && current_processes == 1/ {
275 * instead. */
276 syscall:::entry /trackedpid[ppid]/ {
277 trackedpid[pid] = 1;
278 }
279 syscall::exit:entry /trackedpid[pid]/ {
280 trackedpid[pid] = 0; 279 trackedpid[pid] = 0;
280 current_processes -= 1;
281 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
282 logindex, ppid, pid, probeprov, probename, execname,
283 current_processes);
284 logindex++;
285 exit(0);
286 }
287 proc:::exit /trackedpid[pid]/ {
288 trackedpid[pid] = 0;
289 current_processes -= 1;
290 printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n",
291 logindex, ppid, pid, probeprov, probename, execname,
292 current_processes);
293 logindex++;
281 } 294 }
282 295
283 /* Finally what we care about! */ 296 /* Finally what we care about! */
284 syscall::open*:entry /trackedpid[pid]/ { 297 syscall::open*:entry /trackedpid[pid]/ {
285 self->arg0 = copyinstr(arg0); 298 self->arg0 = arg0;
286 self->arg1 = arg1; 299 self->arg1 = arg1;
287 self->arg2 = arg2; 300 self->arg2 = arg2;
288 self->cwd = cwd; 301 }
289 } 302 syscall::open*:return /trackedpid[pid] && errno == 0/ {
290 syscall::open*:return /trackedpid[pid]/ { 303 printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n",
291 printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d", 304 logindex, ppid, pid, probefunc, copyinstr(self->arg0),
292 ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno); 305 self->arg1, self->arg2, errno);
306 logindex++;
293 self->arg0 = 0; 307 self->arg0 = 0;
294 self->arg1 = 0; 308 self->arg1 = 0;
295 self->arg2 = 0; 309 self->arg2 = 0;
296 } 310 }
311
312 syscall::rename:entry /trackedpid[pid]/ {
313 self->arg0 = arg0;
314 self->arg1 = arg1;
315 }
316 syscall::rename:return /trackedpid[pid]/ {
317 printf("%d %d:%d %s(\\"%s\\", \\"%s\\") = %d\\n",
318 logindex, ppid, pid, probefunc, copyinstr(self->arg0),
319 copyinstr(self->arg1), errno);
320 logindex++;
321 self->arg0 = 0;
322 self->arg1 = 0;
323 }
324
297 /* Track chdir, it's painful because it is only receiving relative path */ 325 /* Track chdir, it's painful because it is only receiving relative path */
298 syscall::chdir:entry /trackedpid[pid]/ { 326 syscall::chdir:entry /trackedpid[pid]/ {
299 self->arg0 = copyinstr(arg0); 327 self->arg0 = arg0;
300 } 328 }
301 syscall::chdir:return /trackedpid[pid] && errno == 0/ { 329 syscall::chdir:return /trackedpid[pid] && errno == 0/ {
302 printf("%d:%d \\"%s\\"; \\"%s\\" = %d", 330 printf("%d %d:%d %s(\\"%s\\") = %d\\n",
303 ppid, pid, execname, self->arg0, errno); 331 logindex, ppid, pid, probefunc, copyinstr(self->arg0), errno);
304 self->cwd = self->arg0; 332 logindex++;
305 } 333 self->arg0 = 0;
334 }
335 /* TODO(maruel): *stat* functions and friends
336 syscall::access:return,
337 syscall::chdir:return,
338 syscall::chflags:return,
339 syscall::chown:return,
340 syscall::chroot:return,
341 syscall::getattrlist:return,
342 syscall::getxattr:return,
343 syscall::lchown:return,
344 syscall::lstat64:return,
345 syscall::lstat:return,
346 syscall::mkdir:return,
347 syscall::pathconf:return,
348 syscall::readlink:return,
349 syscall::removexattr:return,
350 syscall::setxattr:return,
351 syscall::stat64:return,
352 syscall::stat:return,
353 syscall::truncate:return,
354 syscall::unlink:return,
355 syscall::utimes:return,
356 */
306 """ 357 """
307 358
308 @classmethod 359 @classmethod
360 def code(cls, pid, cwd):
361 """Setups the D code to implement child process tracking.
362
363 Injects a fake chdir() trace to simplify parsing. The reason is that the
364 child process is already running at that point so:
365 - no proc_start() is logged for it.
366 - there is no way to figure out the absolute path of cwd in kernel on OSX
367
368 Since the child process is already started, initialize current_processes to
369 1.
370 """
371 pid = str(pid)
372 cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%')
373 return (
374 'dtrace:::BEGIN {\n'
375 ' current_processes = 1;\n'
376 ' logindex = 0;\n'
377 ' trackedpid[') + pid + ('] = 1;\n'
378 ' printf("%d %d:%d chdir(\\"' + cwd + '\\") = 0\\n",\n'
379 ' logindex, 1, ' + pid + ');\n'
380 ' logindex++;\n'
381 ' printf("%d %d:%d %s_%s() = 0\\n",\n'
382 ' logindex, ppid, pid, probeprov, probename);\n'
383 ' logindex++;\n'
384 '}\n') + cls.D_CODE
385
386 class _Context(object):
387 """Processes a dtrace log line and keeps the list of existent and non
388 existent files accessed.
389
390 Ignores directories.
391 """
392 # This is the most common format. index pid function(args) = result
393 RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$')
394
395 # Arguments parsing.
396 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
397 RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (\d+)$')
398 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
399
400 O_DIRECTORY = 0x100000
401
402 def __init__(self, blacklist):
403 # TODO(maruel): Handling chdir() and cwd in general on OSX is tricky
404 # because OSX only keeps relative directory names. In addition, cwd is a
405 # process local variable so forks need to be properly traced and cwd
406 # saved.
407 self._cwd = {}
408 self.blacklist = blacklist
409 self.files = set()
410 self.non_existent = set()
411
412 def on_line(self, line):
413 m = self.RE_HEADER.match(line)
414 assert m, line
415 fn = getattr(
416 self,
417 'handle_%s' % m.group(3).replace('-', '_'),
418 self._handle_ignored)
419 return fn(
420 int(m.group(1)),
421 int(m.group(2)),
422 m.group(3),
423 m.group(4),
424 m.group(5))
425
426 def handle_dtrace_BEGIN(self, _ppid, _pid, _function, args, _result):
427 pass
428
429 def handle_proc_start(self, ppid, pid, _function, _args, result):
430 """Transfers cwd."""
431 assert result == '0'
432 self._cwd[pid] = self._cwd[ppid]
433
434 def handle_proc_exit(self, _ppid, pid, _function, _args, _result):
435 """Removes cwd."""
436 del self._cwd[pid]
437
438 def handle_chdir(self, _ppid, pid, _function, args, result):
439 """Updates cwd."""
440 if result.startswith('0'):
441 cwd = self.RE_CHDIR.match(args).group(1)
442 if not cwd.startswith('/'):
443 cwd2 = os.path.join(self._cwd[pid], cwd)
444 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
445 self._cwd[pid] = cwd2
446 else:
447 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
448 self._cwd[pid] = cwd
449 else:
450 assert False, 'Unexecpected fail: %s' % result
451
452 def handle_open_nocancel(self, ppid, pid, function, args, result):
453 return self.handle_open(ppid, pid, function, args, result)
454
455 def handle_open(self, _ppid, pid, _function, args, result):
456 args = self.RE_OPEN.match(args).groups()
457 flag = int(args[1])
458 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
459 # Ignore directories.
460 return
461 self._handle_file(pid, args[0], result)
462
463 def handle_rename(self, _ppid, pid, _function, args, result):
464 args = self.RE_RENAME.match(args).groups()
465 self._handle_file(pid, args[0], result)
466 self._handle_file(pid, args[1], result)
467
468 def _handle_file(self, pid, filepath, result):
469 if result.startswith(('-1', '2')):
470 return
471 orig_filepath = filepath
472 if not filepath.startswith('/'):
473 filepath = os.path.join(self._cwd[pid], filepath)
474 filepath = os.path.normpath(filepath)
475 if self.blacklist(filepath):
476 return
477 # Sadly, still need to filter out directories here;
478 # saw open_nocancel(".", 0, 0) = 0 lines.
479 if (filepath not in self.files and
480 filepath not in self.non_existent and
481 not os.path.isdir(filepath)):
482 if orig_filepath:
483 logging.debug(
484 '_handle_file(%d, %s) -> %s' % (pid, orig_filepath, filepath))
485 else:
486 logging.debug('_handle_file(%d, %s)' % (pid, filepath))
487 if os.path.isfile(filepath):
488 self.files.add(filepath)
489 else:
490 self.non_existent.add(filepath)
491
492 @staticmethod
493 def _handle_ignored(_ppid, pid, function, args, result):
494 logging.debug('%d %s(%s) = %s' % (pid, function, args, result))
495
496 @classmethod
309 def gen_trace(cls, cmd, cwd, logname): 497 def gen_trace(cls, cmd, cwd, logname):
310 """Runs dtrace on an executable.""" 498 """Runs dtrace on an executable."""
311 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname)) 499 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname))
312 silent = not isEnabledFor(logging.INFO) 500 silent = not isEnabledFor(logging.INFO)
313 logging.info('Running: %s' % cmd) 501 logging.info('Running: %s' % cmd)
314 signal = 'Go!' 502 signal = 'Go!'
315 logging.debug('Our pid: %d' % os.getpid()) 503 logging.debug('Our pid: %d' % os.getpid())
316 504
317 # Part 1: start the child process. 505 # Part 1: start the child process.
318 stdout = stderr = None 506 stdout = stderr = None
319 if silent: 507 if silent:
320 stdout = subprocess.PIPE 508 stdout = subprocess.PIPE
321 stderr = subprocess.PIPE 509 stderr = subprocess.PIPE
322 child_cmd = [ 510 child_cmd = [
323 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), 511 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'),
324 ] 512 ]
325 child = subprocess.Popen( 513 child = subprocess.Popen(
326 child_cmd + cmd, 514 child_cmd + cmd,
327 stdin=subprocess.PIPE, 515 stdin=subprocess.PIPE,
328 stdout=stdout, 516 stdout=stdout,
329 stderr=stderr, 517 stderr=stderr,
330 cwd=cwd) 518 cwd=cwd)
331 logging.debug('Started child pid: %d' % child.pid) 519 logging.debug('Started child pid: %d' % child.pid)
332 520
333 # Part 2: start dtrace process. 521 # Part 2: start dtrace process.
522 # Note: do not use the -p flag. It's useless if the initial process quits
523 # too fast, resulting in missing traces from the grand-children. The D code
524 # manages the dtrace lifetime itself.
334 trace_cmd = [ 525 trace_cmd = [
335 'sudo', 526 'sudo',
336 'dtrace', 527 'dtrace',
337 '-x', 'dynvarsize=4m', 528 '-x', 'dynvarsize=4m',
338 '-x', 'evaltime=exec', 529 '-x', 'evaltime=exec',
339 '-n', cls.D_CODE, 530 '-n', cls.code(child.pid, cwd),
340 '-o', '/dev/stderr', 531 '-o', '/dev/stderr',
341 '-p', str(child.pid), 532 '-q',
342 ] 533 ]
343 with open(logname, 'w') as logfile: 534 with open(logname, 'w') as logfile:
344 # TODO(maruel): Inject a chdir() call with the absolute path (!) of cwd to
345 # be able to reconstruct the full paths.
346 #logfile.write('0 chdir("%s") = 0\n' % cwd)
347 #logfile.flush()
348 dtrace = subprocess.Popen( 535 dtrace = subprocess.Popen(
349 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) 536 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
350 logging.debug('Started dtrace pid: %d' % dtrace.pid) 537 logging.debug('Started dtrace pid: %d' % dtrace.pid)
351 538
352 # Part 3: Read until the Go! signal is sent. 539 # Part 3: Read until one line is printed, which signifies dtrace is up and
540 # ready.
353 with open(logname, 'r') as logfile: 541 with open(logname, 'r') as logfile:
354 while True: 542 while 'dtrace_BEGIN' not in logfile.readline():
355 x = logfile.readline() 543 if dtrace.poll() is not None:
356 if signal in x:
357 break 544 break
358 545
359 # Part 4: We can now tell our child to go. 546 try:
360 child.communicate(signal) 547 # Part 4: We can now tell our child to go.
548 child.communicate(signal)
361 549
362 dtrace.wait() 550 dtrace.wait()
363 if dtrace.returncode != 0: 551 if dtrace.returncode != 0:
364 print 'Failure: %d' % dtrace.returncode 552 print 'Failure: %d' % dtrace.returncode
365 with open(logname) as logfile: 553 with open(logname) as logfile:
366 print ''.join(logfile.readlines()[-100:]) 554 print ''.join(logfile.readlines()[-100:])
367 # Find a better way. 555 # Find a better way.
368 os.remove(logname) 556 os.remove(logname)
557 else:
558 # Short the log right away to simplify our life. There isn't much
559 # advantage in keeping it out of order.
560 cls._sort_log(logname)
561 except KeyboardInterrupt:
562 # Still sort when testing.
563 cls._sort_log(logname)
564 raise
565
566 # Note that we ignore the child result code.
369 return dtrace.returncode 567 return dtrace.returncode
370 568
371 @staticmethod 569 @classmethod
372 def parse_log(filename, blacklist): 570 def parse_log(cls, filename, blacklist):
373 """Processes a dtrace log and returns the files opened and the files that do 571 """Processes a dtrace log and returns the files opened and the files that do
374 not exist. 572 not exist.
375 573
574 It does not track directories.
575
376 Most of the time, files that do not exist are temporary test files that 576 Most of the time, files that do not exist are temporary test files that
377 should be put in /tmp instead. See http://crbug.com/116251 577 should be put in /tmp instead. See http://crbug.com/116251
378
379 TODO(maruel): Process chdir() calls so relative paths can be processed.
380 """ 578 """
381 logging.info('parse_log(%s, %s)' % (filename, blacklist)) 579 logging.info('parse_log(%s, %s)' % (filename, blacklist))
382 files = set() 580 context = cls._Context(blacklist)
383 non_existent = set() 581 for line in open(filename, 'rb'):
384 ignored = set() 582 context.on_line(line)
385 # 1=filepath, 2=flags, 3=mode, 4=cwd 5=result 583 # Resolve any symlink we hit.
386 re_open = re.compile( 584 return (
387 # CPU ID PROBE PPID PID EXECNAME 585 set(os.path.realpath(f) for f in context.files),
388 r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; ' 586 set(os.path.realpath(f) for f in context.non_existent))
389 # PATH FLAGS MODE RESULT 587
390 r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$') 588 @staticmethod
391 # TODO(maruel): Handling chdir() and cwd in general on OSX is complex 589 def _sort_log(logname):
392 # because OSX only keeps relative directory names. In addition, cwd is a 590 """Sorts the log back in order when each call occured.
393 # process local variable so forks need to be properly traced and cwd saved. 591
394 for line in open(filename): 592 dtrace doesn't save the buffer in strict order since it keeps one buffer per
395 m = re_open.match(line) 593 CPU.
396 if not m: 594 """
397 continue 595 with open(logname, 'rb') as logfile:
398 if not m.group(4).startswith('0'): 596 lines = [f for f in logfile.readlines() if f.strip()]
399 # Called failed. 597 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
400 continue 598 with open(logname, 'wb') as logfile:
401 filepath = m.group(1) 599 logfile.write(''.join(lines))
402 if not filepath.startswith('/'):
403 # TODO(maruel): This is wrong since the cwd dtrace variable on OSX is
404 # not an absolute path.
405 # filepath = os.path.join(m.group(cwd), filepath)
406 pass
407 if blacklist(filepath):
408 continue
409 if (filepath not in files and
410 filepath not in non_existent and
411 filepath not in ignored):
412 if os.path.isfile(filepath):
413 files.add(filepath)
414 elif not os.path.isdir(filepath):
415 # Silently ignore directories.
416 non_existent.add(filepath)
417 else:
418 ignored.add(filepath)
419 return files, non_existent
420 600
421 601
422 def relevant_files(files, root): 602 def relevant_files(files, root):
423 """Trims the list of files to keep the expected files and unexpected files. 603 """Trims the list of files to keep the expected files and unexpected files.
424 604
425 Unexpected files are files that are not based inside the |root| directory. 605 Unexpected files are files that are not based inside the |root| directory.
426 """ 606 """
427 expected = [] 607 expected = []
428 unexpected = [] 608 unexpected = []
429 for f in files: 609 for f in files:
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
605 os.path.abspath(options.log), 785 os.path.abspath(options.log),
606 args, 786 args,
607 options.root_dir, 787 options.root_dir,
608 options.gyp, 788 options.gyp,
609 options.product_dir, 789 options.product_dir,
610 options.force) 790 options.force)
611 791
612 792
613 if __name__ == '__main__': 793 if __name__ == '__main__':
614 sys.exit(main()) 794 sys.exit(main())
OLDNEW
« no previous file with comments | « tools/isolate/isolate_test.py ('k') | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698