Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 9834052: [strace] Add support for interrupted calls and proper chdir handling. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix test again Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/isolate/isolate_test.py ('k') | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Runs strace or dtrace on a test and processes the logs to extract the 6 """Runs strace or dtrace on a test and processes the logs to extract the
7 dependencies from the source tree. 7 dependencies from the source tree.
8 8
9 Automatically extracts directories where all the files are used to make the 9 Automatically extracts directories where all the files are used to make the
10 dependencies list more compact. 10 dependencies list more compact.
(...skipping 11 matching lines...) Expand all
22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) 22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
23 23
24 24
25 def isEnabledFor(level): 25 def isEnabledFor(level):
26 return logging.getLogger().isEnabledFor(level) 26 return logging.getLogger().isEnabledFor(level)
27 27
28 28
29 class Strace(object): 29 class Strace(object):
30 """strace implies linux.""" 30 """strace implies linux."""
31 IGNORED = ( 31 IGNORED = (
32 '/bin',
32 '/dev', 33 '/dev',
33 '/etc', 34 '/etc',
34 '/lib', 35 '/lib',
35 '/proc', 36 '/proc',
36 '/sys', 37 '/sys',
37 '/tmp', 38 '/tmp',
38 '/usr', 39 '/usr',
39 '/var', 40 '/var',
40 ) 41 )
41 42
42 @staticmethod 43 class _Context(object):
43 def gen_trace(cmd, cwd, logname): 44 """Processes a strace log line and keeps the list of existent and non
45 existent files accessed.
46
47 Ignores directories.
48 """
49 # This is the most common format. pid function(args) = result
50 RE_HEADER = re.compile(r'^(\d+)\s+([^\(]+)\((.+?)\)\s+= (.+)$')
51 # An interrupted function call, only grab the minimal header.
52 RE_UNFINISHED = re.compile(r'^(\d+)\s+([^\(]+).*$')
53 UNFINISHED = ' <unfinished ...>'
54 # A resumed function call.
55 RE_RESUMED = re.compile(r'^(\d+)\s+<\.\.\. ([^ ]+) resumed> (.+)$')
56 # A process received a signal.
57 RE_SIGNAL = re.compile(r'^\d+\s+--- SIG[A-Z]+ .+ ---')
58 # A process didn't handle a signal.
59 RE_KILLED = re.compile(r'^(\d+) \+\+\+ killed by ([A-Z]+) \+\+\+$')
60
61 # Arguments parsing.
62 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
63 RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$')
64 RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$')
65 RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$')
66 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
67
68 def __init__(self, blacklist):
69 self._cwd = {}
70 self.blacklist = blacklist
71 self.files = set()
72 self.non_existent = set()
73 # Key is a tuple(pid, function name)
74 self._pending_calls = {}
75
76 @classmethod
77 def traces(cls):
78 prefix = 'handle_'
79 return [i[len(prefix):] for i in dir(cls) if i.startswith(prefix)]
80
81 def on_line(self, line):
82 line = line.strip()
83 if self.RE_SIGNAL.match(line):
84 # Ignore signals.
85 return
86
87 m = self.RE_KILLED.match(line)
88 if m:
89 self.handle_exit_group(int(m.group(1)), m.group(2), None, None)
90 return
91
92 if line.endswith(self.UNFINISHED):
93 line = line[:-len(self.UNFINISHED)]
94 m = self.RE_UNFINISHED.match(line)
95 assert m, line
96 self._pending_calls[(m.group(1), m.group(2))] = line
97 return
98
99 m = self.RE_RESUMED.match(line)
100 if m:
101 pending = self._pending_calls.pop((m.group(1), m.group(2)))
102 # Reconstruct the line.
103 line = pending + m.group(3)
104
105 m = self.RE_HEADER.match(line)
106 assert m, line
107 return getattr(self, 'handle_%s' % m.group(2))(
108 int(m.group(1)),
109 m.group(2),
110 m.group(3),
111 m.group(4))
112
113 def handle_chdir(self, pid, _function, args, result):
114 """Updates cwd."""
115 if result.startswith('0'):
116 cwd = self.RE_CHDIR.match(args).group(1)
117 if not cwd.startswith('/'):
118 cwd2 = os.path.join(self._cwd[pid], cwd)
119 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
120 self._cwd[pid] = cwd2
121 else:
122 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
123 self._cwd[pid] = cwd
124 else:
125 assert False, 'Unexecpected fail: %s' % result
126
127 def handle_clone(self, pid, _function, _args, result):
128 """Transfers cwd."""
129 if result == '? ERESTARTNOINTR (To be restarted)':
130 return
131 self._cwd[int(result)] = self._cwd[pid]
132
133 def handle_execve(self, pid, _function, args, result):
134 self._handle_file(pid, self.RE_EXECVE.match(args).group(1), result)
135
136 def handle_exit_group(self, pid, _function, _args, _result):
137 """Removes cwd."""
138 del self._cwd[pid]
139
140 @staticmethod
141 def handle_fork(_pid, _function, args, result):
142 assert False, (args, result)
143
144 def handle_open(self, pid, _function, args, result):
145 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups()
146 if 'O_DIRECTORY' in args[1]:
147 return
148 self._handle_file(pid, args[0], result)
149
150 def handle_rename(self, pid, _function, args, result):
151 args = self.RE_RENAME.match(args).groups()
152 self._handle_file(pid, args[0], result)
153 self._handle_file(pid, args[1], result)
154
155 @staticmethod
156 def handle_stat64(_pid, _function, args, result):
157 assert False, (args, result)
158
159 @staticmethod
160 def handle_vfork(_pid, _function, args, result):
161 assert False, (args, result)
162
163 def _handle_file(self, pid, filepath, result):
164 if result.startswith('-1'):
165 return
166 if not filepath.startswith('/'):
167 filepath2 = os.path.join(self._cwd[pid], filepath)
168 logging.debug('_handle_file(%d, %s) -> %s' % (pid, filepath, filepath2))
169 filepath = filepath2
170 else:
171 logging.debug('_handle_file(%d, %s)' % (pid, filepath))
172
173 if self.blacklist(filepath):
174 return
175 if filepath not in self.files and filepath not in self.non_existent:
176 if os.path.isfile(filepath):
177 self.files.add(filepath)
178 else:
179 self.non_existent.add(filepath)
180
181 @classmethod
182 def gen_trace(cls, cmd, cwd, logname):
44 """Runs strace on an executable.""" 183 """Runs strace on an executable."""
45 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname)) 184 logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname))
46 silent = not isEnabledFor(logging.INFO) 185 silent = not isEnabledFor(logging.INFO)
47 stdout = stderr = None 186 stdout = stderr = None
48 if silent: 187 if silent:
49 stdout = subprocess.PIPE 188 stdout = subprocess.PIPE
50 stderr = subprocess.PIPE 189 stderr = subprocess.PIPE
51 trace_cmd = ['strace', '-f', '-e', 'trace=open,chdir', '-o', logname] 190 traces = ','.join(cls._Context.traces())
191 trace_cmd = ['strace', '-f', '-e', 'trace=%s' % traces, '-o', logname]
52 p = subprocess.Popen( 192 p = subprocess.Popen(
53 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr) 193 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)
54 out, err = p.communicate() 194 out, err = p.communicate()
55 # Once it's done, inject a chdir() call to cwd to be able to reconstruct 195 # Once it's done, inject a chdir() call to cwd to be able to reconstruct
56 # the full paths. 196 # the full paths.
57 # TODO(maruel): cwd should be saved at each process creation, so forks needs 197 # TODO(maruel): cwd should be saved at each process creation, so forks needs
58 # to be traced properly. 198 # to be traced properly.
59 if os.path.isfile(logname): 199 if os.path.isfile(logname):
60 with open(logname) as f: 200 with open(logname) as f:
61 content = f.read() 201 content = f.read()
62 with open(logname, 'w') as f: 202 with open(logname, 'w') as f:
63 f.write('0 chdir("%s") = 0\n' % cwd) 203 pid = content.split(' ', 1)[0]
204 f.write('%s chdir("%s") = 0\n' % (pid, cwd))
64 f.write(content) 205 f.write(content)
65 206
66 if p.returncode != 0: 207 if p.returncode != 0:
67 print 'Failure: %d' % p.returncode 208 print 'Failure: %d' % p.returncode
68 # pylint: disable=E1103 209 # pylint: disable=E1103
69 if out: 210 if out:
70 print ''.join(out.splitlines(True)[-100:]) 211 print ''.join(out.splitlines(True)[-100:])
71 if err: 212 if err:
72 print ''.join(err.splitlines(True)[-100:]) 213 print ''.join(err.splitlines(True)[-100:])
73 return p.returncode 214 return p.returncode
74 215
75 @staticmethod 216 @classmethod
76 def parse_log(filename, blacklist): 217 def parse_log(cls, filename, blacklist):
77 """Processes a strace log and returns the files opened and the files that do 218 """Processes a strace log and returns the files opened and the files that do
78 not exist. 219 not exist.
79 220
221 It does not track directories.
222
80 Most of the time, files that do not exist are temporary test files that 223 Most of the time, files that do not exist are temporary test files that
81 should be put in /tmp instead. See http://crbug.com/116251 224 should be put in /tmp instead. See http://crbug.com/116251
82 """ 225 """
83 logging.info('parse_log(%s, %s)' % (filename, blacklist)) 226 logging.info('parse_log(%s, %s)' % (filename, blacklist))
84 files = set() 227 context = cls._Context(blacklist)
85 non_existent = set()
86 # 1=pid, 2=filepath, 3=mode, 4=result
87 re_open = re.compile(
88 # PID open(PATH, MODE) = RESULT
89 r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$')
90 # 1=pid 2=path 3=result
91 re_chdir = re.compile(
92 # PID chdir(PATH) = RESULT
93 r'^(\d+)\s+chdir\("([^"]+)"\)\s+= (.+)$')
94
95 # TODO(maruel): This code is totally wrong. cwd is a process local variable
96 # so this needs to be a dict with key = pid.
97 cwd = None
98 for line in open(filename): 228 for line in open(filename):
99 m = re_open.match(line) 229 context.on_line(line)
100 if m: 230 # Resolve any symlink we hit.
101 if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3): 231 return (
102 # Not present or a directory. 232 set(os.path.realpath(f) for f in context.files),
103 continue 233 set(os.path.realpath(f) for f in context.non_existent))
104 filepath = m.group(2)
105 if not filepath.startswith('/'):
106 filepath = os.path.join(cwd, filepath)
107 if blacklist(filepath):
108 continue
109 if filepath not in files and filepath not in non_existent:
110 if os.path.isfile(filepath):
111 files.add(filepath)
112 else:
113 non_existent.add(filepath)
114 m = re_chdir.match(line)
115 if m:
116 if m.group(3).startswith('0'):
117 cwd = m.group(2)
118 else:
119 assert False, 'Unexecpected fail: %s' % line
120
121 return files, non_existent
122 234
123 235
124 class Dtrace(object): 236 class Dtrace(object):
125 """Uses DTrace framework through dtrace. Requires root access. 237 """Uses DTrace framework through dtrace. Requires root access.
126 238
127 Implies Mac OSX. 239 Implies Mac OSX.
128 240
129 dtruss can't be used because it has compatibility issues with python. 241 dtruss can't be used because it has compatibility issues with python.
130 """ 242 """
131 IGNORED = ( 243 IGNORED = (
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
309 421
310 def relevant_files(files, root): 422 def relevant_files(files, root):
311 """Trims the list of files to keep the expected files and unexpected files. 423 """Trims the list of files to keep the expected files and unexpected files.
312 424
313 Unexpected files are files that are not based inside the |root| directory. 425 Unexpected files are files that are not based inside the |root| directory.
314 """ 426 """
315 expected = [] 427 expected = []
316 unexpected = [] 428 unexpected = []
317 for f in files: 429 for f in files:
318 if f.startswith(root): 430 if f.startswith(root):
319 expected.append(f[len(root):]) 431 f = f[len(root):]
432 assert f
433 expected.append(f)
320 else: 434 else:
321 unexpected.append(f) 435 unexpected.append(f)
322 return sorted(set(expected)), sorted(set(unexpected)) 436 return sorted(set(expected)), sorted(set(unexpected))
323 437
324 438
325 def extract_directories(files, root): 439 def extract_directories(files, root):
326 """Detects if all the files in a directory were loaded and if so, replace the 440 """Detects if all the files in a directory were loaded and if so, replace the
327 individual files by the directory entry. 441 individual files by the directory entry.
328 """ 442 """
329 directories = set(os.path.dirname(f) for f in files) 443 directories = set(os.path.dirname(f) for f in files)
(...skipping 10 matching lines...) Expand all
340 return sorted(files) 454 return sorted(files)
341 455
342 456
343 def trace_inputs( 457 def trace_inputs(
344 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace): 458 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace):
345 """Tries to load the logs if available. If not, trace the test. 459 """Tries to load the logs if available. If not, trace the test.
346 460
347 Symlinks are not processed at all. 461 Symlinks are not processed at all.
348 """ 462 """
349 logging.debug( 463 logging.debug(
350 'trace_inputs(%s, %s, %s, %s, %s)' % ( 464 'trace_inputs(%s, %s, %s, %s, %s, %s)' % (
351 logfile, cmd, root_dir, gyp_proj_dir, product_dir)) 465 logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace))
352 466
353 # It is important to have unambiguous path. 467 # It is important to have unambiguous path.
354 assert os.path.isabs(root_dir), root_dir 468 assert os.path.isabs(root_dir), root_dir
355 assert os.path.isabs(logfile), logfile 469 assert os.path.isabs(logfile), logfile
356 assert os.path.isabs(cmd[0]), cmd[0] 470 assert (
471 (os.path.isfile(logfile) and not force_trace) or os.path.isabs(cmd[0])
472 ), cmd[0]
473 # Resolve any symlink
474 root_dir = os.path.realpath(root_dir)
357 475
358 def print_if(txt): 476 def print_if(txt):
359 if gyp_proj_dir is None: 477 if gyp_proj_dir is None:
360 print(txt) 478 print(txt)
361 479
362 if sys.platform == 'linux2': 480 if sys.platform == 'linux2':
363 api = Strace() 481 api = Strace()
364 elif sys.platform == 'darwin': 482 elif sys.platform == 'darwin':
365 api = Dtrace() 483 api = Dtrace()
366 else: 484 else:
367 print >> sys.stderr, 'Unsupported platform' 485 print >> sys.stderr, 'Unsupported platform'
368 return 1 486 return 1
369 487
370 if not os.path.isfile(logfile) or force_trace: 488 if not os.path.isfile(logfile) or force_trace:
371 if os.path.isfile(logfile): 489 if os.path.isfile(logfile):
372 os.remove(logfile) 490 os.remove(logfile)
373 print_if('Tracing... %s' % cmd) 491 print_if('Tracing... %s' % cmd)
374 returncode = api.gen_trace(cmd, root_dir, logfile) 492 cwd = root_dir
493 # TODO(maruel): If --gyp is specified, use it as the cwd.
494 #if gyp_proj_dir:
495 # cwd = os.path.join(cwd, gyp_proj_dir)
496 returncode = api.gen_trace(cmd, cwd, logfile)
375 if returncode and not force_trace: 497 if returncode and not force_trace:
376 return returncode 498 return returncode
377 499
378 def blacklist(f): 500 def blacklist(f):
379 """Strips ignored paths.""" 501 """Strips ignored paths."""
380 return f.startswith(api.IGNORED) or f.endswith('.pyc') 502 return f.startswith(api.IGNORED) or f.endswith('.pyc')
381 503
382 print_if('Loading traces... %s' % logfile) 504 print_if('Loading traces... %s' % logfile)
383 files, non_existent = api.parse_log(logfile, blacklist) 505 files, non_existent = api.parse_log(logfile, blacklist)
384 506
385 print_if('Total: %d' % len(files)) 507 print_if('Total: %d' % len(files))
386 print_if('Non existent: %d' % len(non_existent)) 508 print_if('Non existent: %d' % len(non_existent))
387 for f in non_existent: 509 for f in non_existent:
388 print_if(' %s' % f) 510 print_if(' %s' % f)
389 511
390 expected, unexpected = relevant_files(files, root_dir.rstrip('/') + '/') 512 expected, unexpected = relevant_files(files, root_dir.rstrip('/') + '/')
391 if unexpected: 513 if unexpected:
392 print_if('Unexpected: %d' % len(unexpected)) 514 print_if('Unexpected: %d' % len(unexpected))
393 for f in unexpected: 515 for f in unexpected:
394 print_if(' %s' % f) 516 print_if(' %s' % f)
395 517
396 simplified = extract_directories(expected, root_dir) 518 simplified = extract_directories(expected, root_dir)
397 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified))) 519 print_if('Interesting: %d reduced to %d' % (len(expected), len(simplified)))
398 for f in simplified: 520 for f in simplified:
399 print_if(' %s' % f) 521 print_if(' %s' % f)
400 522
401 if gyp_proj_dir is not None: 523 if gyp_proj_dir is not None:
402 def cleanuppath(x): 524 def cleanuppath(x):
525 """Cleans up a relative path."""
403 if x: 526 if x:
404 x = x.rstrip('/') 527 x = x.rstrip('/')
405 if x == '.': 528 if x == '.':
406 x = '' 529 x = ''
407 if x: 530 if x:
408 x += '/' 531 x += '/'
409 return x 532 return x
410 533
411 gyp_proj_dir = cleanuppath(gyp_proj_dir) 534 gyp_proj_dir = cleanuppath(gyp_proj_dir)
412 product_dir = cleanuppath(product_dir) 535 product_dir = cleanuppath(product_dir)
413 536
414 def fix(f): 537 def fix(f):
415 """Bases the file on the most restrictive variable.""" 538 """Bases the file on the most restrictive variable."""
539 logging.debug('fix(%s)' % f)
416 if product_dir and f.startswith(product_dir): 540 if product_dir and f.startswith(product_dir):
417 return '<(PRODUCT_DIR)/%s' % f[len(product_dir):] 541 return '<(PRODUCT_DIR)/%s' % f[len(product_dir):]
418 elif gyp_proj_dir and f.startswith(gyp_proj_dir): 542 elif gyp_proj_dir and f.startswith(gyp_proj_dir):
419 return f[len(gyp_proj_dir):] 543 # May be empty if the whole directory containing the gyp file is needed.
544 return f[len(gyp_proj_dir):] or './'
420 else: 545 else:
421 return '<(DEPTH)/%s' % f 546 return '<(DEPTH)/%s' % f
422 547
423 corrected = [fix(f) for f in simplified] 548 corrected = [fix(f) for f in simplified]
424 files = [f for f in corrected if not f.endswith('/')] 549 files = [f for f in corrected if not f.endswith('/')]
425 dirs = [f for f in corrected if f.endswith('/')] 550 dirs = [f for f in corrected if f.endswith('/')]
426 # Constructs the python code manually. 551 # Constructs the python code manually.
427 print( 552 print(
428 '{\n' 553 '{\n'
429 ' \'variables\': {\n' 554 ' \'variables\': {\n'
(...skipping 19 matching lines...) Expand all
449 '-g', '--gyp', 574 '-g', '--gyp',
450 help='When specified, outputs the inputs files in a way compatible for ' 575 help='When specified, outputs the inputs files in a way compatible for '
451 'gyp processing. Should be set to the relative path containing the ' 576 'gyp processing. Should be set to the relative path containing the '
452 'gyp file, e.g. \'chrome\' or \'net\'') 577 'gyp file, e.g. \'chrome\' or \'net\'')
453 parser.add_option( 578 parser.add_option(
454 '-p', '--product-dir', default='out/Release', 579 '-p', '--product-dir', default='out/Release',
455 help='Directory for PRODUCT_DIR. Default: %default') 580 help='Directory for PRODUCT_DIR. Default: %default')
456 parser.add_option( 581 parser.add_option(
457 '--root-dir', default=ROOT_DIR, 582 '--root-dir', default=ROOT_DIR,
458 help='Root directory to base everything off. Default: %default') 583 help='Root directory to base everything off. Default: %default')
459 parser.add_option('-f', '--force', help='Force to retrace the file') 584 parser.add_option(
585 '-f', '--force', action='store_true', help='Force to retrace the file')
460 586
461 options, args = parser.parse_args() 587 options, args = parser.parse_args()
462 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] 588 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
463 logging.basicConfig( 589 logging.basicConfig(
464 level=level, 590 level=level,
465 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s') 591 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')
466 592
467 if not args:
468 parser.error('Must supply a command to run')
469 if not options.log: 593 if not options.log:
470 parser.error('Must supply a log file with -l') 594 parser.error('Must supply a log file with -l')
595 if not args:
596 if not os.path.isfile(options.log) or options.force:
597 parser.error('Must supply a command to run')
598 else:
599 args[0] = os.path.abspath(args[0])
471 600
472 args[0] = os.path.abspath(args[0]) 601 if options.root_dir:
602 options.root_dir = os.path.abspath(options.root_dir)
603
473 return trace_inputs( 604 return trace_inputs(
474 os.path.abspath(options.log), 605 os.path.abspath(options.log),
475 args, 606 args,
476 options.root_dir, 607 options.root_dir,
477 options.gyp, 608 options.gyp,
478 options.product_dir, 609 options.product_dir,
479 options.force) 610 options.force)
480 611
481 612
482 if __name__ == '__main__': 613 if __name__ == '__main__':
483 sys.exit(main()) 614 sys.exit(main())
OLDNEW
« no previous file with comments | « tools/isolate/isolate_test.py ('k') | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698