Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 10442068: Add more consitent exception generation and handling (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fix small bug Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding=utf-8 2 # coding=utf-8
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """Traces an executable and its child processes and extract the files accessed 7 """Traces an executable and its child processes and extract the files accessed
8 by them. 8 by them.
9 9
10 The implementation uses OS-specific API. The native Kernel logger and the ETL 10 The implementation uses OS-specific API. The native Kernel logger and the ETL
(...skipping 30 matching lines...) Expand all
41 import Carbon.File # pylint: disable=F0401 41 import Carbon.File # pylint: disable=F0401
42 42
43 43
44 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 44 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
45 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) 45 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
46 46
47 KEY_TRACKED = 'isolate_dependency_tracked' 47 KEY_TRACKED = 'isolate_dependency_tracked'
48 KEY_UNTRACKED = 'isolate_dependency_untracked' 48 KEY_UNTRACKED = 'isolate_dependency_untracked'
49 49
50 50
51 class TracingFailure(Exception):
52 """An exception occured during tracing."""
53 def __init__(self, description, pid, line_number, line, *args):
54 super(TracingFailure, self).__init__(
55 description, pid, line_number, line, *args)
56 self.description = description
57 self.pid = pid
58 self.line_number = line_number
59 self.line = line
60 self.extra = args
61
62
51 ## OS-specific functions 63 ## OS-specific functions
52 64
53 if sys.platform == 'win32': 65 if sys.platform == 'win32':
54 def QueryDosDevice(drive_letter): 66 def QueryDosDevice(drive_letter):
55 """Returns the Windows 'native' path for a DOS drive letter.""" 67 """Returns the Windows 'native' path for a DOS drive letter."""
56 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter 68 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
57 # Guesswork. QueryDosDeviceW never returns the required number of bytes. 69 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
58 chars = 1024 70 chars = 1024
59 drive_letter = unicode(drive_letter) 71 drive_letter = unicode(drive_letter)
60 p = create_unicode_buffer(chars) 72 p = create_unicode_buffer(chars)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
117 """Returns the 'ANSI' code page associated to the process.""" 129 """Returns the 'ANSI' code page associated to the process."""
118 return 'cp%d' % int(windll.kernel32.GetACP()) 130 return 'cp%d' % int(windll.kernel32.GetACP())
119 131
120 132
121 class DosDriveMap(object): 133 class DosDriveMap(object):
122 """Maps \Device\HarddiskVolumeN to N: on Windows.""" 134 """Maps \Device\HarddiskVolumeN to N: on Windows."""
123 # Keep one global cache. 135 # Keep one global cache.
124 _MAPPING = {} 136 _MAPPING = {}
125 137
126 def __init__(self): 138 def __init__(self):
139 """Lazy loads the cache."""
127 if not self._MAPPING: 140 if not self._MAPPING:
128 # This is related to UNC resolver on windows. Ignore that. 141 # This is related to UNC resolver on windows. Ignore that.
129 self._MAPPING['\\Device\\Mup'] = None 142 self._MAPPING['\\Device\\Mup'] = None
130 143
131 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): 144 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
132 try: 145 try:
133 letter = '%s:' % letter 146 letter = '%s:' % letter
134 mapped = QueryDosDevice(letter) 147 mapped = QueryDosDevice(letter)
135 # It can happen. Assert until we see it happens in the wild. In 148 if mapped in self._MAPPING:
136 # practice, prefer the lower drive letter. 149 logging.warn(
137 assert mapped not in self._MAPPING 150 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
138 if mapped not in self._MAPPING: 151 '. Drive letters are a user-mode concept and the kernel '
152 'traces only have NT path, so all accesses will be '
153 'associated with the first drive letter, independent of the '
154 'actual letter used by the code') % (
155 self._MAPPING[mapped], letter))
156 else:
139 self._MAPPING[mapped] = letter 157 self._MAPPING[mapped] = letter
140 except WindowsError: # pylint: disable=E0602 158 except WindowsError: # pylint: disable=E0602
141 pass 159 pass
142 160
143 def to_dos(self, path): 161 def to_win32(self, path):
144 """Converts a native NT path to DOS path.""" 162 """Converts a native NT path to Win32/DOS compatible path."""
145 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path) 163 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
146 assert match, path 164 if not match:
165 raise ValueError(
166 'Can\'t convert %s into a Win32 compatible path' % path,
167 path)
147 if not match.group(1) in self._MAPPING: 168 if not match.group(1) in self._MAPPING:
148 # Unmapped partitions may be accessed by windows for the 169 # Unmapped partitions may be accessed by windows for the
149 # fun of it while the test is running. Discard these. 170 # fun of it while the test is running. Discard these.
150 return None 171 return None
151 drive = self._MAPPING[match.group(1)] 172 drive = self._MAPPING[match.group(1)]
152 if not drive or not match.group(2): 173 if not drive or not match.group(2):
153 return drive 174 return drive
154 return drive + match.group(2) 175 return drive + match.group(2)
155 176
156 177
157 def isabs(path): 178 def isabs(path):
158 """Accepts X: as an absolute path, unlike python's os.path.isabs().""" 179 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
159 return os.path.isabs(path) or len(path) == 2 and path[1] == ':' 180 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
160 181
161 182
162 def get_native_path_case(path): 183 def get_native_path_case(path):
163 """Returns the native path case for an existing file. 184 """Returns the native path case for an existing file.
164 185
165 On Windows, removes any leading '\\?\'. 186 On Windows, removes any leading '\\?\'.
166 """ 187 """
167 assert isabs(path), path 188 if not isabs(path):
189 raise ValueError(
190 'Can\'t get native path case for a non-absolute path: %s' % path,
191 path)
168 # Windows used to have an option to turn on case sensitivity on non Win32 192 # Windows used to have an option to turn on case sensitivity on non Win32
169 # subsystem but that's out of scope here and isn't supported anymore. 193 # subsystem but that's out of scope here and isn't supported anymore.
170 # Go figure why GetShortPathName() is needed. 194 # Go figure why GetShortPathName() is needed.
171 path = GetLongPathName(GetShortPathName(path)) 195 path = GetLongPathName(GetShortPathName(path))
172 if path.startswith('\\\\?\\'): 196 if path.startswith('\\\\?\\'):
173 return path[4:] 197 return path[4:]
174 return path 198 return path
175 199
176 200
177 def CommandLineToArgvW(command_line): 201 def CommandLineToArgvW(command_line):
178 """Splits a commandline into argv using CommandLineToArgvW().""" 202 """Splits a commandline into argv using CommandLineToArgvW()."""
179 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx 203 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
180 size = c_int() 204 size = c_int()
181 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size)) 205 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size))
182 try: 206 try:
183 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] 207 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
184 finally: 208 finally:
185 windll.kernel32.LocalFree(ptr) 209 windll.kernel32.LocalFree(ptr)
186 210
187 211
188 elif sys.platform == 'darwin': 212 elif sys.platform == 'darwin':
189 213
190 214
191 # On non-windows, keep the stdlib behavior. 215 # On non-windows, keep the stdlib behavior.
192 isabs = os.path.isabs 216 isabs = os.path.isabs
193 217
194 218
195 def get_native_path_case(path): 219 def get_native_path_case(path):
196 """Returns the native path case for an existing file.""" 220 """Returns the native path case for an existing file."""
197 assert isabs(path), path 221 if not isabs(path):
222 raise ValueError(
223 'Can\'t get native path case for a non-absolute path: %s' % path,
224 path)
198 # Technically, it's only HFS+ on OSX that is case insensitive. It's the 225 # Technically, it's only HFS+ on OSX that is case insensitive. It's the
199 # default setting on HFS+ but can be changed. 226 # default setting on HFS+ but can be changed.
200 rel_ref, _ = Carbon.File.FSPathMakeRef(path) 227 rel_ref, _ = Carbon.File.FSPathMakeRef(path)
201 return rel_ref.FSRefMakePath() 228 return rel_ref.FSRefMakePath()
202 229
203 230
204 else: # OSes other than Windows and OSX. 231 else: # OSes other than Windows and OSX.
205 232
206 233
207 # On non-windows, keep the stdlib behavior. 234 # On non-windows, keep the stdlib behavior.
208 isabs = os.path.isabs 235 isabs = os.path.isabs
209 236
210 237
211 def get_native_path_case(path): 238 def get_native_path_case(path):
212 """Returns the native path case for an existing file. 239 """Returns the native path case for an existing file.
213 240
214 On OSes other than OSX and Windows, assume the file system is 241 On OSes other than OSX and Windows, assume the file system is
215 case-sensitive. 242 case-sensitive.
216 243
217 TODO(maruel): This is not strictly true. Implement if necessary. 244 TODO(maruel): This is not strictly true. Implement if necessary.
218 """ 245 """
219 assert isabs(path), path 246 if not isabs(path):
247 raise ValueError(
248 'Can\'t get native path case for a non-absolute path: %s' % path,
249 path)
220 # Give up on cygwin, as GetLongPathName() can't be called. 250 # Give up on cygwin, as GetLongPathName() can't be called.
221 return path 251 return path
222 252
223 253
224 def get_flavor(): 254 def get_flavor():
225 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py.""" 255 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
226 flavors = { 256 flavors = {
227 'cygwin': 'win', 257 'cygwin': 'win',
228 'win32': 'win', 258 'win32': 'win',
229 'darwin': 'mac', 259 'darwin': 'mac',
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
285 # a serie of 3 dots or a comma. 315 # a serie of 3 dots or a comma.
286 NEED_SPACE, # Right after a comma 316 NEED_SPACE, # Right after a comma
287 NEED_DOT_2, # Found a dot, need a second one. 317 NEED_DOT_2, # Found a dot, need a second one.
288 NEED_DOT_3, # Found second dot, need a third one. 318 NEED_DOT_3, # Found second dot, need a third one.
289 NEED_COMMA, # Found third dot, need a comma. 319 NEED_COMMA, # Found third dot, need a comma.
290 ) = range(7) 320 ) = range(7)
291 321
292 state = NEED_QUOTE 322 state = NEED_QUOTE
293 current_argument = '' 323 current_argument = ''
294 out = [] 324 out = []
295 for i in text: 325 for index, char in enumerate(text):
296 if i == '"': 326 if char == '"':
297 if state == NEED_QUOTE: 327 if state == NEED_QUOTE:
298 state = INSIDE_STRING 328 state = INSIDE_STRING
299 elif state == INSIDE_STRING: 329 elif state == INSIDE_STRING:
300 # The argument is now closed. 330 # The argument is now closed.
301 out.append(current_argument) 331 out.append(current_argument)
302 current_argument = '' 332 current_argument = ''
303 state = NEED_COMMA_OR_DOT 333 state = NEED_COMMA_OR_DOT
304 else: 334 else:
305 assert False, text 335 raise ValueError(
306 elif i == ',': 336 'Can\'t process char at column %d for: %r' % (index, text),
337 index,
338 text)
339 elif char == ',':
307 if state in (NEED_COMMA_OR_DOT, NEED_COMMA): 340 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
308 state = NEED_SPACE 341 state = NEED_SPACE
309 else: 342 else:
310 assert False, text 343 raise ValueError(
311 elif i == ' ': 344 'Can\'t process char at column %d for: %r' % (index, text),
345 index,
346 text)
347 elif char == ' ':
312 if state == NEED_SPACE: 348 if state == NEED_SPACE:
313 state = NEED_QUOTE 349 state = NEED_QUOTE
314 if state == INSIDE_STRING: 350 if state == INSIDE_STRING:
315 current_argument += i 351 current_argument += char
316 elif i == '.': 352 elif char == '.':
317 if state == NEED_COMMA_OR_DOT: 353 if state == NEED_COMMA_OR_DOT:
318 # The string is incomplete, this mean the strace -s flag should be 354 # The string is incomplete, this mean the strace -s flag should be
319 # increased. 355 # increased.
320 state = NEED_DOT_2 356 state = NEED_DOT_2
321 elif state == NEED_DOT_2: 357 elif state == NEED_DOT_2:
322 state = NEED_DOT_3 358 state = NEED_DOT_3
323 elif state == NEED_DOT_3: 359 elif state == NEED_DOT_3:
324 state = NEED_COMMA 360 state = NEED_COMMA
325 elif state == INSIDE_STRING: 361 elif state == INSIDE_STRING:
326 current_argument += i 362 current_argument += char
327 else: 363 else:
328 assert False, text 364 raise ValueError(
365 'Can\'t process char at column %d for: %r' % (index, text),
366 index,
367 text)
329 else: 368 else:
330 if state == INSIDE_STRING: 369 if state == INSIDE_STRING:
331 current_argument += i 370 current_argument += char
332 else: 371 else:
333 assert False, text 372 raise ValueError(
334 assert state in (NEED_COMMA, NEED_COMMA_OR_DOT) 373 'Can\'t process char at column %d for: %r' % (index, text),
374 index,
375 text)
376 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
377 raise ValueError(
378 'String is incorrectly terminated: %r' % text,
379 text)
335 return out 380 return out
336 381
337 382
338 class ApiBase(object): 383 class ApiBase(object):
339 """OS-agnostic API to trace a process and its children.""" 384 """OS-agnostic API to trace a process and its children."""
340 class Context(object): 385 class Context(object):
341 """Processes one log line at a time and keeps the list of traced processes. 386 """Processes one log line at a time and keeps the list of traced processes.
342 """ 387 """
343 class Process(object): 388 class Process(object):
344 """Keeps context for one traced child process. 389 """Keeps context for one traced child process.
345 390
346 Logs all the files this process touched. Ignores directories. 391 Logs all the files this process touched. Ignores directories.
347 """ 392 """
348 def __init__(self, root, pid, initial_cwd, parentid): 393 def __init__(self, root, pid, initial_cwd, parentid):
349 """root is a reference to the Context.""" 394 """root is a reference to the Context."""
395 # Check internal consistency.
350 assert isinstance(root, ApiBase.Context) 396 assert isinstance(root, ApiBase.Context)
351 assert isinstance(pid, int), repr(pid) 397 assert isinstance(pid, int), repr(pid)
352 self.root = weakref.ref(root) 398 self.root = weakref.ref(root)
353 self.pid = pid 399 self.pid = pid
354 # Children are pids. 400 # Children are pids.
355 self.children = [] 401 self.children = []
356 self.parentid = parentid 402 self.parentid = parentid
357 self.initial_cwd = initial_cwd 403 self.initial_cwd = initial_cwd
358 self.cwd = None 404 self.cwd = None
359 self.files = set() 405 self.files = set()
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
440 """Represents a file accessed. May not be present anymore.""" 486 """Represents a file accessed. May not be present anymore."""
441 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path)) 487 logging.debug('%s(%s, %s)' % (self.__class__.__name__, root, path))
442 self.root = root 488 self.root = root
443 self.path = path 489 self.path = path
444 490
445 self._size = None 491 self._size = None
446 # For compatibility with Directory object interface. 492 # For compatibility with Directory object interface.
447 # Shouldn't be used normally, only exists to simplify algorithms. 493 # Shouldn't be used normally, only exists to simplify algorithms.
448 self.nb_files = 1 494 self.nb_files = 1
449 495
496 # Check internal consistency.
450 assert path, path 497 assert path, path
451 assert bool(root) != bool(isabs(path)), (root, path) 498 assert bool(root) != bool(isabs(path)), (root, path)
452 assert ( 499 assert (
453 not os.path.exists(self.full_path) or 500 not os.path.exists(self.full_path) or
454 self.full_path == get_native_path_case(self.full_path)) 501 self.full_path == get_native_path_case(self.full_path))
455 502
456 @property 503 @property
457 def existent(self): 504 def existent(self):
458 return self.size != -1 505 return self.size != -1
459 506
(...skipping 14 matching lines...) Expand all
474 return self.path 521 return self.path
475 522
476 def flatten(self): 523 def flatten(self):
477 return { 524 return {
478 'path': self.path, 525 'path': self.path,
479 'size': self.size, 526 'size': self.size,
480 } 527 }
481 528
482 def strip_root(self, root): 529 def strip_root(self, root):
483 """Returns a clone of itself with 'root' stripped off.""" 530 """Returns a clone of itself with 'root' stripped off."""
531 # Check internal consistency.
484 assert isabs(root) and root.endswith(os.path.sep), root 532 assert isabs(root) and root.endswith(os.path.sep), root
485 if not self.full_path.startswith(root): 533 if not self.full_path.startswith(root):
486 return None 534 return None
487 out = self.__class__(root, self.full_path[len(root):]) 535 out = self.__class__(root, self.full_path[len(root):])
488 # Keep size cache. 536 # Keep size cache.
489 out._size = self._size 537 out._size = self._size
490 return out 538 return out
491 539
492 class Directory(File): 540 class Directory(File):
493 """A directory of files. Must exist.""" 541 """A directory of files. Must exist."""
(...skipping 14 matching lines...) Expand all
508 """A process that was traced. 556 """A process that was traced.
509 557
510 Contains references to the files accessed by this process and its children. 558 Contains references to the files accessed by this process and its children.
511 """ 559 """
512 def __init__( 560 def __init__(
513 self, pid, files, executable, command, initial_cwd, children): 561 self, pid, files, executable, command, initial_cwd, children):
514 logging.debug('Process(%s, %d, ...)' % (pid, len(files))) 562 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
515 self.pid = pid 563 self.pid = pid
516 self.files = sorted( 564 self.files = sorted(
517 (Results.File(None, f) for f in files), key=lambda x: x.path) 565 (Results.File(None, f) for f in files), key=lambda x: x.path)
518 assert len(set(f.path for f in self.files)) == len(self.files), [
519 f.path for f in self.files]
520 assert isinstance(children, list)
521 assert isinstance(self.files, list)
522 self.children = children 566 self.children = children
523 self.executable = executable 567 self.executable = executable
524 self.command = command 568 self.command = command
525 self.initial_cwd = initial_cwd 569 self.initial_cwd = initial_cwd
526 570
571 # Check internal consistency.
572 assert len(set(f.path for f in self.files)) == len(self.files), [
573 f.path for f in self.files]
574 assert isinstance(self.children, list)
575 assert isinstance(self.files, list)
576
527 @property 577 @property
528 def all(self): 578 def all(self):
529 for child in self.children: 579 for child in self.children:
530 for i in child.all: 580 for i in child.all:
531 yield i 581 yield i
532 yield self 582 yield self
533 583
534 def flatten(self): 584 def flatten(self):
535 return { 585 return {
536 'children': [c.flatten() for c in self.children], 586 'children': [c.flatten() for c in self.children],
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
755 The validity is only guaranteed once all the logs are processed. 805 The validity is only guaranteed once all the logs are processed.
756 """ 806 """
757 return self.initial_cwd.render() 807 return self.initial_cwd.render()
758 808
759 def on_line(self, line): 809 def on_line(self, line):
760 self._line_number += 1 810 self._line_number += 1
761 if self.RE_SIGNAL.match(line): 811 if self.RE_SIGNAL.match(line):
762 # Ignore signals. 812 # Ignore signals.
763 return 813 return
764 814
765 match = self.RE_KILLED.match(line) 815 try:
766 if match: 816 match = self.RE_KILLED.match(line)
767 self.handle_exit_group(match.group(1), None, None) 817 if match:
818 # Converts a '+++ killied by Foo +++' trace into an exit_group().
819 self.handle_exit_group(match.group(1), None)
820 return
821
822 match = self.RE_UNFINISHED.match(line)
823 if match:
824 if match.group(1) in self._pending_calls:
825 raise TracingFailure(
826 'Found two unfinished calls for the same function',
827 None, None, None,
828 self._pending_calls)
829 self._pending_calls[match.group(1)] = (
830 match.group(1) + match.group(2))
831 return
832
833 match = self.RE_UNAVAILABLE.match(line)
834 if match:
835 # This usually means a process was killed and a pending call was
836 # canceled.
837 # TODO(maruel): Look up the last exit_group() trace just above and
838 # make sure any self._pending_calls[anything] is properly flushed.
839 return
840
841 match = self.RE_RESUMED.match(line)
842 if match:
843 if match.group(1) not in self._pending_calls:
844 raise TracingFailure(
845 'Found a resumed call that was not logged as unfinished',
846 None, None, None,
847 self._pending_calls)
848 pending = self._pending_calls.pop(match.group(1))
849 # Reconstruct the line.
850 line = pending + match.group(2)
851
852 match = self.RE_HEADER.match(line)
853 if not match:
854 raise TracingFailure(
855 'Found an invalid line: %s' % line,
856 None, None, None)
857 if match.group(1) == self.UNNAMED_FUNCTION:
858 return
859 handler = getattr(self, 'handle_%s' % match.group(1), None)
860 if not handler:
861 raise TracingFailure(
862 'Found a unhandled trace: %s' % match.group(1),
863 None, None, None)
864
865 return handler(match.group(2), match.group(3))
866 except TracingFailure, e:
867 # Hack in the values since the handler could be a static function.
868 e.pid = self.pid
869 e.line = line
870 e.line_number = self._line_number
871 # Re-raise the modified exception.
872 raise
873 except (KeyError, NotImplementedError, ValueError), e:
874 raise TracingFailure(
875 'Trace generated a %s exception: %s' % (
876 e.__class__.__name__, str(e)),
877 self.pid,
878 self._line_number,
879 line,
880 e)
881
882 def handle_chdir(self, args, result):
883 """Updates cwd."""
884 if not result.startswith('0'):
768 return 885 return
769
770 match = self.RE_UNFINISHED.match(line)
771 if match:
772 assert match.group(1) not in self._pending_calls
773 self._pending_calls[match.group(1)] = match.group(1) + match.group(2)
774 return
775
776 match = self.RE_UNAVAILABLE.match(line)
777 if match:
778 # This usually means a process was killed and a pending call was
779 # canceled.
780 # TODO(maruel): Look up the last exit_group() trace just above and
781 # make sure any self._pending_calls[anything] is properly flushed.
782 return
783
784 match = self.RE_RESUMED.match(line)
785 if match:
786 assert match.group(1) in self._pending_calls, self._pending_calls
787 pending = self._pending_calls.pop(match.group(1))
788 # Reconstruct the line.
789 line = pending + match.group(2)
790
791 match = self.RE_HEADER.match(line)
792 assert match, (self.pid, self._line_number, line)
793 if match.group(1) == self.UNNAMED_FUNCTION:
794 return
795 handler = getattr(self, 'handle_%s' % match.group(1), None)
796 assert handler, (self.pid, self._line_number, line)
797 try:
798 return handler(
799 match.group(1),
800 match.group(2),
801 match.group(3))
802 except Exception:
803 print >> sys.stderr, (self.pid, self._line_number, line)
804 raise
805
806 def handle_chdir(self, _function, args, result):
807 """Updates cwd."""
808 assert result.startswith('0'), 'Unexecpected fail: %s' % result
809 cwd = self.RE_CHDIR.match(args).group(1) 886 cwd = self.RE_CHDIR.match(args).group(1)
810 self.cwd = self.RelativePath(self, cwd) 887 self.cwd = self.RelativePath(self, cwd)
811 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd)) 888 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
812 889
813 def handle_clone(self, _function, _args, result): 890 def handle_clone(self, _args, result):
814 """Transfers cwd.""" 891 """Transfers cwd."""
815 if result == '? ERESTARTNOINTR (To be restarted)': 892 if result == '? ERESTARTNOINTR (To be restarted)':
816 return 893 return
817 # Update the other process right away. 894 # Update the other process right away.
818 childpid = int(result) 895 childpid = int(result)
819 child = self.root().get_or_set_proc(childpid) 896 child = self.root().get_or_set_proc(childpid)
897 if child.parentid is not None or childpid in self.children:
898 raise TracingFailure(
899 'Found internal inconsitency in process lifetime detection',
900 None, None, None)
901
820 # Copy the cwd object. 902 # Copy the cwd object.
821 child.initial_cwd = self.get_cwd() 903 child.initial_cwd = self.get_cwd()
822 assert child.parentid is None
823 child.parentid = self.pid 904 child.parentid = self.pid
824 # It is necessary because the logs are processed out of order. 905 # It is necessary because the logs are processed out of order.
825 assert childpid not in self.children
826 self.children.append(childpid) 906 self.children.append(childpid)
827 907
828 def handle_close(self, _function, _args, _result): 908 def handle_close(self, _args, _result):
829 pass 909 pass
830 910
831 def handle_execve(self, _function, args, result): 911 def handle_execve(self, args, result):
832 if result != '0': 912 if result != '0':
833 return 913 return
834 m = self.RE_EXECVE.match(args) 914 m = self.RE_EXECVE.match(args)
835 filepath = m.group(1) 915 filepath = m.group(1)
836 self._handle_file(filepath, result) 916 self._handle_file(filepath)
837 self.executable = self.RelativePath(self.get_cwd(), filepath) 917 self.executable = self.RelativePath(self.get_cwd(), filepath)
838 self.command = process_quoted_arguments(m.group(2)) 918 self.command = process_quoted_arguments(m.group(2))
839 919
840 def handle_exit_group(self, _function, _args, _result): 920 def handle_exit_group(self, _args, _result):
841 """Removes cwd.""" 921 """Removes cwd."""
842 self.cwd = None 922 self.cwd = None
843 923
844 @staticmethod 924 @staticmethod
845 def handle_fork(_function, args, result): 925 def handle_fork(_args, _result):
846 assert False, (args, result) 926 raise NotImplementedError('Unexpected/unimplemented trace fork()')
847 927
848 def handle_open(self, _function, args, result): 928 def handle_open(self, args, result):
929 if result.startswith('-1'):
930 return
849 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups() 931 args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups()
850 if 'O_DIRECTORY' in args[1]: 932 if 'O_DIRECTORY' in args[1]:
851 return 933 return
852 self._handle_file(args[0], result) 934 self._handle_file(args[0])
853 935
854 def handle_rename(self, _function, args, result): 936 def handle_rename(self, args, result):
937 if result.startswith('-1'):
938 return
855 args = self.RE_RENAME.match(args).groups() 939 args = self.RE_RENAME.match(args).groups()
856 self._handle_file(args[0], result) 940 self._handle_file(args[0])
857 self._handle_file(args[1], result) 941 self._handle_file(args[1])
858 942
859 @staticmethod 943 @staticmethod
860 def handle_stat64(_function, args, result): 944 def handle_stat64(_args, _result):
861 assert False, (args, result) 945 raise NotImplementedError('Unexpected/unimplemented trace stat64()')
862 946
863 @staticmethod 947 @staticmethod
864 def handle_vfork(_function, args, result): 948 def handle_vfork(_args, _result):
865 assert False, (args, result) 949 raise NotImplementedError('Unexpected/unimplemented trace vfork()')
866 950
867 def _handle_file(self, filepath, result): 951 def _handle_file(self, filepath):
868 if result.startswith('-1'):
869 return
870 filepath = self.RelativePath(self.get_cwd(), filepath) 952 filepath = self.RelativePath(self.get_cwd(), filepath)
871 self.add_file(filepath) 953 self.add_file(filepath)
872 954
873 def __init__(self, blacklist, initial_cwd): 955 def __init__(self, blacklist, initial_cwd):
874 super(Strace.Context, self).__init__(blacklist) 956 super(Strace.Context, self).__init__(blacklist)
875 self.initial_cwd = initial_cwd 957 self.initial_cwd = initial_cwd
876 958
877 def render(self): 959 def render(self):
878 """Returns the string value of the initial cwd of the root process. 960 """Returns the string value of the initial cwd of the root process.
879 961
880 Used by RelativePath. 962 Used by RelativePath.
881 """ 963 """
882 return self.initial_cwd 964 return self.initial_cwd
883 965
884 def on_line(self, pid, line): 966 def on_line(self, pid, line):
967 """Transfers control into the Process.on_line() function."""
885 self.get_or_set_proc(pid).on_line(line.strip()) 968 self.get_or_set_proc(pid).on_line(line.strip())
886 969
887 def to_results(self): 970 def to_results(self):
888 """Finds back the root process and verify consistency.""" 971 """Finds back the root process and verify consistency."""
889 # TODO(maruel): Absolutely unecessary, fix me. 972 # TODO(maruel): Absolutely unecessary, fix me.
890 root = [p for p in self.processes.itervalues() if not p.parentid] 973 root = [p for p in self.processes.itervalues() if not p.parentid]
891 assert len(root) == 1 974 if len(root) != 1:
975 raise TracingFailure(
976 'Found internal inconsitency in process lifetime detection',
977 None,
978 None,
979 None,
980 root)
892 process = root[0].to_results_process() 981 process = root[0].to_results_process()
893 assert sorted(self.processes) == sorted(p.pid for p in process.all) 982 if sorted(self.processes) != sorted(p.pid for p in process.all):
983 raise TracingFailure(
984 'Found internal inconsitency in process lifetime detection',
985 None,
986 None,
987 None,
988 sorted(self.processes),
989 sorted(p.pid for p in process.all))
894 return Results(process) 990 return Results(process)
895 991
896 def get_or_set_proc(self, pid): 992 def get_or_set_proc(self, pid):
897 """Returns the Context.Process instance for this pid or creates a new one. 993 """Returns the Context.Process instance for this pid or creates a new one.
898 """ 994 """
899 assert isinstance(pid, int) and pid 995 if not pid or not isinstance(pid, int):
996 raise TracingFailure(
997 'Unpexpected value for pid: %r' % pid,
998 pid,
999 None,
1000 None,
1001 pid)
900 return self.processes.setdefault(pid, self.Process(self, pid)) 1002 return self.processes.setdefault(pid, self.Process(self, pid))
901 1003
902 @classmethod 1004 @classmethod
903 def traces(cls): 1005 def traces(cls):
1006 """Returns the list of all handled traces to pass this as an argument to
1007 strace.
1008 """
904 prefix = 'handle_' 1009 prefix = 'handle_'
905 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] 1010 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
906 1011
907 @staticmethod 1012 @staticmethod
908 def clean_trace(logname): 1013 def clean_trace(logname):
909 if os.path.isfile(logname): 1014 if os.path.isfile(logname):
910 os.remove(logname) 1015 os.remove(logname)
911 # Also delete any pid specific file from previous traces. 1016 # Also delete any pid specific file from previous traces.
912 for i in glob.iglob(logname + '.*'): 1017 for i in glob.iglob(logname + '.*'):
913 if i.rsplit('.', 1)[1].isdigit(): 1018 if i.rsplit('.', 1)[1].isdigit():
(...skipping 20 matching lines...) Expand all
934 ] 1039 ]
935 child = subprocess.Popen( 1040 child = subprocess.Popen(
936 trace_cmd + cmd, 1041 trace_cmd + cmd,
937 cwd=cwd, 1042 cwd=cwd,
938 stdin=subprocess.PIPE, 1043 stdin=subprocess.PIPE,
939 stdout=stdout, 1044 stdout=stdout,
940 stderr=stderr) 1045 stderr=stderr)
941 out = child.communicate()[0] 1046 out = child.communicate()[0]
942 # Once it's done, write metadata into the log file to be able to follow the 1047 # Once it's done, write metadata into the log file to be able to follow the
943 # pid files. 1048 # pid files.
944 assert not os.path.isfile(logname)
945 with open(logname, 'wb') as f: 1049 with open(logname, 'wb') as f:
946 json.dump( 1050 json.dump(
947 { 1051 {
948 'cwd': cwd, 1052 'cwd': cwd,
949 # The pid of strace process, not very useful. 1053 # The pid of strace process, not very useful.
950 'pid': child.pid, 1054 'pid': child.pid,
951 }, 1055 },
952 f) 1056 f)
953 return child.returncode, out 1057 return child.returncode, out
954 1058
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after
1315 # 1 is for tracked processes. 1419 # 1 is for tracked processes.
1316 # 2 is for trace_child_process.py only. It is not tracked itself but 1420 # 2 is for trace_child_process.py only. It is not tracked itself but
1317 # all its decendants are. 1421 # all its decendants are.
1318 ' trackedpid[' + pid + '] = 2;\n' 1422 ' trackedpid[' + pid + '] = 2;\n'
1319 ' printf("%d %d %s_%s(\\"' + cwd + '\\")\\n",\n' 1423 ' printf("%d %d %s_%s(\\"' + cwd + '\\")\\n",\n'
1320 ' logindex, ' + pid + ', probeprov, probename);\n' 1424 ' logindex, ' + pid + ', probeprov, probename);\n'
1321 ' logindex++;\n' 1425 ' logindex++;\n'
1322 '}\n') + cls.D_CODE 1426 '}\n') + cls.D_CODE
1323 1427
1324 class Context(ApiBase.Context): 1428 class Context(ApiBase.Context):
1325 # This is the most common format. index pid function(args) 1429 # Format: index pid function(args)
1326 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$') 1430 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1327 1431
1328 # Arguments parsing. 1432 # Arguments parsing.
1329 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') 1433 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1330 RE_CHDIR = re.compile(r'^\"(.+?)\"$') 1434 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1331 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') 1435 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1332 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') 1436 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1333 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') 1437 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1334 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') 1438 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1335 1439
1336 O_DIRECTORY = 0x100000 1440 O_DIRECTORY = 0x100000
1337 1441
1338 class Process(ApiBase.Context.Process): 1442 class Process(ApiBase.Context.Process):
1339 pass 1443 pass
1340 1444
1341 def __init__(self, blacklist): 1445 def __init__(self, blacklist):
1342 super(Dtrace.Context, self).__init__(blacklist) 1446 super(Dtrace.Context, self).__init__(blacklist)
1343 # Process ID of the trace_child_process.py wrapper script instance. 1447 # Process ID of the trace_child_process.py wrapper script instance.
1344 self._tracer_pid = None 1448 self._tracer_pid = None
1345 # First process to be started by self._tracer_pid. 1449 # First process to be started by self._tracer_pid.
1346 self._initial_pid = None 1450 self._initial_pid = None
1347 self._initial_cwd = None 1451 self._initial_cwd = None
1452 self._line_number = 0
1348 1453
1349 def on_line(self, line): 1454 def on_line(self, line):
1455 self._line_number += 1
1350 match = self.RE_HEADER.match(line) 1456 match = self.RE_HEADER.match(line)
1351 assert match, line 1457 if not match:
1458 raise TracingFailure(
1459 'Found malformed line: %s' % line,
1460 None,
1461 self._line_number,
1462 line)
1352 fn = getattr( 1463 fn = getattr(
1353 self, 1464 self,
1354 'handle_%s' % match.group(2).replace('-', '_'), 1465 'handle_%s' % match.group(2).replace('-', '_'),
1355 self._handle_ignored) 1466 self._handle_ignored)
1356 return fn( 1467 # It is guaranteed to succeed because of the regexp. Or at least I thought
1357 int(match.group(1)), 1468 # it would.
1358 match.group(3)) 1469 pid = int(match.group(1))
1470 try:
1471 return fn(pid, match.group(3))
1472 except TracingFailure, e:
1473 # Hack in the values since the handler could be a static function.
1474 e.pid = pid
1475 e.line = line
1476 e.line_number = self._line_number
1477 # Re-raise the modified exception.
1478 raise
1479 except (KeyError, NotImplementedError, ValueError), e:
1480 raise TracingFailure(
1481 'Trace generated a %s exception: %s' % (
1482 e.__class__.__name__, str(e)),
1483 pid,
1484 self._line_number,
1485 line,
1486 e)
1359 1487
1360 def to_results(self): 1488 def to_results(self):
1361 """Uses self._initial_pid to determine the initial process.""" 1489 """Uses self._initial_pid to determine the initial process."""
1362 process = self.processes[self._initial_pid].to_results_process() 1490 process = self.processes[self._initial_pid].to_results_process()
1363 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( 1491 # Internal concistency check.
1364 sorted(self.processes), sorted(p.pid for p in process.all)) 1492 if sorted(self.processes) != sorted(p.pid for p in process.all):
1493 raise TracingFailure(
1494 'Found internal inconsitency in process lifetime detection',
1495 None,
1496 None,
1497 None,
1498 sorted(self.processes),
1499 sorted(p.pid for p in process.all))
1365 return Results(process) 1500 return Results(process)
1366 1501
1367 def handle_dtrace_BEGIN(self, pid, args): 1502 def handle_dtrace_BEGIN(self, pid, args):
1368 assert not self._tracer_pid and not self._initial_pid 1503 if self._tracer_pid or self._initial_pid:
1504 raise TracingFailure(
1505 'Found internal inconsitency in dtrace_BEGIN log line',
1506 None, None, None)
1369 self._tracer_pid = pid 1507 self._tracer_pid = pid
1370 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) 1508 self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1)
1371 1509
1372 def handle_proc_start(self, pid, args): 1510 def handle_proc_start(self, pid, args):
1373 """Transfers cwd. 1511 """Transfers cwd.
1374 1512
1375 The dtrace script already takes care of only tracing the processes that 1513 The dtrace script already takes care of only tracing the processes that
1376 are child of the traced processes so there is no need to verify the 1514 are child of the traced processes so there is no need to verify the
1377 process hierarchy. 1515 process hierarchy.
1378 """ 1516 """
1379 assert pid not in self.processes 1517 if pid in self.processes:
1518 raise TracingFailure(
1519 'Found internal inconsitency in proc_start: %d started two times' %
1520 pid,
1521 None, None, None)
1380 match = self.RE_PROC_START.match(args) 1522 match = self.RE_PROC_START.match(args)
1523 if not match:
1524 raise TracingFailure(
1525 'Failed to parse arguments: %s' % args,
1526 None, None, None)
1381 ppid = int(match.group(1)) 1527 ppid = int(match.group(1))
1382 assert (ppid == self._tracer_pid) != bool(self._initial_pid) 1528 if (ppid == self._tracer_pid) == bool(self._initial_pid):
1529 raise TracingFailure(
1530 ( 'Parent process is _tracer_pid(%d) but _initial_pid(%d) is '
1531 'already set') % (self._tracer_pid, self._initial_pid),
1532 None, None, None)
1383 if not self._initial_pid: 1533 if not self._initial_pid:
1384 self._initial_pid = pid 1534 self._initial_pid = pid
1385 ppid = None 1535 ppid = None
1386 cwd = self._initial_cwd 1536 cwd = self._initial_cwd
1387 else: 1537 else:
1388 parent = self.processes[ppid] 1538 parent = self.processes[ppid]
1389 cwd = parent.cwd 1539 cwd = parent.cwd
1390 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid) 1540 proc = self.processes[pid] = self.Process(self, pid, cwd, ppid)
1391 proc.cwd = cwd 1541 proc.cwd = cwd
1392 logging.debug('New child: %s -> %d' % (ppid, pid)) 1542 logging.debug('New child: %s -> %d' % (ppid, pid))
1393 1543
1394 def handle_proc_exit(self, pid, _args): 1544 def handle_proc_exit(self, pid, _args):
1395 """Removes cwd.""" 1545 """Removes cwd."""
1396 if pid != self._tracer_pid: 1546 if pid != self._tracer_pid:
1397 # self._tracer_pid is not traced itself. 1547 # self._tracer_pid is not traced itself.
1398 self.processes[pid].cwd = None 1548 self.processes[pid].cwd = None
1399 1549
1400 def handle_execve(self, pid, args): 1550 def handle_execve(self, pid, args):
1401 """Sets the process' executable. 1551 """Sets the process' executable.
1402 1552
1403 TODO(maruel): Read command line arguments. See 1553 TODO(maruel): Read command line arguments. See
1404 https://discussions.apple.com/thread/1980539 for an example. 1554 https://discussions.apple.com/thread/1980539 for an example.
1405 https://gist.github.com/1242279 1555 https://gist.github.com/1242279
1406 1556
1407 Will have to put the answer at http://stackoverflow.com/questions/7556249. 1557 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1408 :) 1558 :)
1409 """ 1559 """
1410 match = self.RE_EXECVE.match(args) 1560 match = self.RE_EXECVE.match(args)
1411 assert match, args 1561 if not match:
1562 raise TracingFailure(
1563 'Failed to parse arguments: %s' % args,
1564 None, None, None)
1412 proc = self.processes[pid] 1565 proc = self.processes[pid]
1413 proc.executable = match.group(1) 1566 proc.executable = match.group(1)
1414 proc.command = process_quoted_arguments(match.group(3)) 1567 proc.command = process_quoted_arguments(match.group(3))
1415 assert int(match.group(2)) == len(proc.command), args 1568 if int(match.group(2)) != len(proc.command):
1569 raise TracingFailure(
1570 'Failed to parse execve() arguments: %s' % args,
1571 None, None, None)
1416 1572
1417 def handle_chdir(self, pid, args): 1573 def handle_chdir(self, pid, args):
1418 """Updates cwd.""" 1574 """Updates cwd."""
1419 assert self._tracer_pid 1575 if not self._tracer_pid or pid not in self.processes:
1420 assert pid in self.processes 1576 raise TracingFailure(
1577 'Found inconsistency in the log, is it an old log?',
1578 None, None, None)
1421 cwd = self.RE_CHDIR.match(args).group(1) 1579 cwd = self.RE_CHDIR.match(args).group(1)
1422 if not cwd.startswith('/'): 1580 if not cwd.startswith('/'):
1423 cwd2 = os.path.join(self.processes[pid].cwd, cwd) 1581 cwd2 = os.path.join(self.processes[pid].cwd, cwd)
1424 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) 1582 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1425 else: 1583 else:
1426 logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) 1584 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1427 cwd2 = cwd 1585 cwd2 = cwd
1428 self.processes[pid].cwd = cwd2 1586 self.processes[pid].cwd = cwd2
1429 1587
1430 def handle_open_nocancel(self, pid, args): 1588 def handle_open_nocancel(self, pid, args):
1431 return self.handle_open(pid, args) 1589 return self.handle_open(pid, args)
1432 1590
1433 def handle_open(self, pid, args): 1591 def handle_open(self, pid, args):
1434 match = self.RE_OPEN.match(args) 1592 match = self.RE_OPEN.match(args)
1435 assert match, (pid, args) 1593 if not match:
1594 raise TracingFailure(
1595 'Failed to parse arguments: %s' % args,
1596 None, None, None)
1436 flag = int(match.group(2), 16) 1597 flag = int(match.group(2), 16)
1437 if self.O_DIRECTORY & flag == self.O_DIRECTORY: 1598 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1438 # Ignore directories. 1599 # Ignore directories.
1439 return 1600 return
1440 self._handle_file(pid, match.group(1)) 1601 self._handle_file(pid, match.group(1))
1441 1602
1442 def handle_rename(self, pid, args): 1603 def handle_rename(self, pid, args):
1443 match = self.RE_RENAME.match(args) 1604 match = self.RE_RENAME.match(args)
1444 assert match, (pid, args) 1605 if not match:
1606 raise TracingFailure(
1607 'Failed to parse arguments: %s' % args,
1608 None, None, None)
1445 self._handle_file(pid, match.group(1)) 1609 self._handle_file(pid, match.group(1))
1446 self._handle_file(pid, match.group(2)) 1610 self._handle_file(pid, match.group(2))
1447 1611
1448 def _handle_file(self, pid, filepath): 1612 def _handle_file(self, pid, filepath):
1449 if not filepath.startswith('/'): 1613 if not filepath.startswith('/'):
1450 filepath = os.path.join(self.processes[pid].cwd, filepath) 1614 filepath = os.path.join(self.processes[pid].cwd, filepath)
1451 # We can get '..' in the path. 1615 # We can get '..' in the path.
1452 filepath = os.path.normpath(filepath) 1616 filepath = os.path.normpath(filepath)
1453 # Sadly, still need to filter out directories here; 1617 # Sadly, still need to filter out directories here;
1454 # saw open_nocancel(".", 0, 0) = 0 lines. 1618 # saw open_nocancel(".", 0, 0) = 0 lines.
1455 if os.path.isdir(filepath): 1619 if os.path.isdir(filepath):
1456 return 1620 return
1457 self.processes[pid].add_file(filepath) 1621 self.processes[pid].add_file(filepath)
1458 1622
1459 @staticmethod 1623 @staticmethod
1460 def _handle_ignored(pid, args): 1624 def _handle_ignored(pid, args):
1461 """Is called for all the event traces that are not handled.""" 1625 """Is called for all the event traces that are not handled."""
1462 assert False, '%d ??(%s)' % (pid, args) 1626 raise NotImplementedError('Please implement me')
1463 1627
1464 @staticmethod 1628 @staticmethod
1465 def clean_trace(logname): 1629 def clean_trace(logname):
1466 if os.path.isfile(logname): 1630 if os.path.isfile(logname):
1467 os.remove(logname) 1631 os.remove(logname)
1468 1632
1469 @classmethod 1633 @classmethod
1470 def gen_trace(cls, cmd, cwd, logname, output): 1634 def gen_trace(cls, cmd, cwd, logname, output):
1471 """Runs dtrace on an executable. 1635 """Runs dtrace on an executable.
1472 1636
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
1559 def _sort_log(logname): 1723 def _sort_log(logname):
1560 """Sorts the log back in order when each call occured. 1724 """Sorts the log back in order when each call occured.
1561 1725
1562 dtrace doesn't save the buffer in strict order since it keeps one buffer per 1726 dtrace doesn't save the buffer in strict order since it keeps one buffer per
1563 CPU. 1727 CPU.
1564 """ 1728 """
1565 with open(logname, 'rb') as logfile: 1729 with open(logname, 'rb') as logfile:
1566 lines = [l for l in logfile if l.strip()] 1730 lines = [l for l in logfile if l.strip()]
1567 errors = [l for l in lines if l.startswith('dtrace:')] 1731 errors = [l for l in lines if l.startswith('dtrace:')]
1568 if errors: 1732 if errors:
1569 print >> sys.stderr, 'Failed to load: %s' % logname 1733 raise TracingFailure(
1570 print >> sys.stderr, '\n'.join(errors) 1734 'Found errors in the trace: %s' % '\n'.join(errors),
1571 assert not errors, errors 1735 None, None, None, logname)
1572 try: 1736 try:
1573 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) 1737 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
1574 except ValueError: 1738 except ValueError:
1575 print >> sys.stderr, 'Failed to load: %s' % logname 1739 raise TracingFailure(
1576 print >> sys.stderr, '\n'.join(lines) 1740 'Found errors in the trace: %s' % '\n'.join(
1577 raise 1741 l for l in lines if l.split(' ', 1)[0].isdigit()),
1742 None, None, None, logname)
1578 with open(logname, 'wb') as logfile: 1743 with open(logname, 'wb') as logfile:
1579 logfile.write(''.join(lines)) 1744 logfile.write(''.join(lines))
1580 1745
1581 1746
1582 class LogmanTrace(ApiBase): 1747 class LogmanTrace(ApiBase):
1583 """Uses the native Windows ETW based tracing functionality to trace a child 1748 """Uses the native Windows ETW based tracing functionality to trace a child
1584 process. 1749 process.
1585 1750
1586 Caveat: this implementations doesn't track cwd or initial_cwd. It is because 1751 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
1587 the Windows Kernel doesn't have a concept of current working directory. 1752 the Windows Kernel doesn't have a concept of current working directory.
1588 Windows has a map of current directories, one per drive letter and it is 1753 Windows has a map of current directories, one per drive letter and it is
1589 managed by the user mode kernel32.dll. In kernel, a file is always opened 1754 managed by the user mode kernel32.dll. In kernel, a file is always opened
1590 relative to another file_object or as an absolute path. 1755 relative to another file_object or as an absolute path.
1591 """ 1756 """
1592 class Context(ApiBase.Context): 1757 class Context(ApiBase.Context):
1593 """Processes a ETW log line and keeps the list of existent and non 1758 """Processes a ETW log line and keeps the list of existent and non
1594 existent files accessed. 1759 existent files accessed.
1595 1760
1596 Ignores directories. 1761 Ignores directories.
1597 """ 1762 """
1763 # The basic headers.
1764 EXPECTED_HEADER = [
1765 u'Event Name',
1766 u'Type',
1767 u'Event ID',
1768 u'Version',
1769 u'Channel',
1770 u'Level', # 5
1771 u'Opcode',
1772 u'Task',
1773 u'Keyword',
1774 u'PID',
1775 u'TID', # 10
1776 u'Processor Number',
1777 u'Instance ID',
1778 u'Parent Instance ID',
1779 u'Activity ID',
1780 u'Related Activity ID', # 15
1781 u'Clock-Time',
1782 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
1783 u'User(ms)', # pretty much useless.
1784 u'User Data', # Extra arguments that are event-specific.
1785 ]
1598 # Only the useful headers common to all entries are listed there. Any column 1786 # Only the useful headers common to all entries are listed there. Any column
1599 # at 19 or higher is dependent on the specific event. 1787 # at 19 or higher is dependent on the specific event.
1600 EVENT_NAME = 0 1788 EVENT_NAME = 0
1601 TYPE = 1 1789 TYPE = 1
1602 PID = 9 1790 PID = 9
1603 TID = 10 1791 TID = 10
1604 PROCESSOR_ID = 11 1792 PROCESSOR_ID = 11
1605 TIMESTAMP = 16 1793 TIMESTAMP = 16
1794 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
1606 1795
1607 class Process(ApiBase.Context.Process): 1796 class Process(ApiBase.Context.Process):
1608 def __init__(self, *args): 1797 def __init__(self, *args):
1609 super(LogmanTrace.Context.Process, self).__init__(*args) 1798 super(LogmanTrace.Context.Process, self).__init__(*args)
1610 # Handle file objects that succeeded. 1799 # Handle file objects that succeeded.
1611 self.file_objects = {} 1800 self.file_objects = {}
1612 1801
1613 def __init__(self, blacklist, tracer_pid): 1802 def __init__(self, blacklist, tracer_pid):
1614 super(LogmanTrace.Context, self).__init__(blacklist) 1803 super(LogmanTrace.Context, self).__init__(blacklist)
1615 self._drive_map = DosDriveMap() 1804 self._drive_map = DosDriveMap()
1616 # Threads mapping to the corresponding process id. 1805 # Threads mapping to the corresponding process id.
1617 self._threads_active = {} 1806 self._threads_active = {}
1618 # Process ID of the tracer, e.g. tracer_inputs.py 1807 # Process ID of the tracer, e.g. tracer_inputs.py
1619 self._tracer_pid = tracer_pid 1808 self._tracer_pid = tracer_pid
1620 # First process to be started by self._tracer_pid is the executable 1809 # First process to be started by self._tracer_pid is the executable
1621 # traced. 1810 # traced.
1622 self._initial_pid = None 1811 self._initial_pid = None
1623 self._line_number = 0 1812 self._line_number = 0
1624 1813
1625 def on_csv_line(self, line): 1814 def on_csv_line(self, raw_line):
1626 """Processes a CSV Event line.""" 1815 """Processes a CSV Event line."""
1627 # So much white space! 1816 # So much white space!
1628 line = [i.strip() for i in line] 1817 line = [i.strip() for i in raw_line]
1629 self._line_number += 1 1818 self._line_number += 1
1630 if self._line_number == 1: 1819 try:
1631 assert line == [ 1820 if self._line_number == 1:
1632 u'Event Name', 1821 if line != self.EXPECTED_HEADER:
1633 u'Type', 1822 raise TracingFailure(
1634 u'Event ID', 1823 'Found malformed header: %s' % ' '.join(line),
1635 u'Version', 1824 None, None, None)
1636 u'Channel', 1825 return
1637 u'Level', # 5
1638 u'Opcode',
1639 u'Task',
1640 u'Keyword',
1641 u'PID',
1642 u'TID', # 10
1643 u'Processor Number',
1644 u'Instance ID',
1645 u'Parent Instance ID',
1646 u'Activity ID',
1647 u'Related Activity ID', # 15
1648 u'Clock-Time',
1649 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
1650 u'User(ms)', # pretty much useless.
1651 u'User Data',
1652 ]
1653 return
1654 1826
1655 # As you can see, the CSV is full of useful non-redundant information: 1827 # As you can see, the CSV is full of useful non-redundant information:
1656 # Event ID 1828 if (line[2] != '0' or # Event ID
1657 assert line[2] == '0' 1829 line[3] not in ('2', '3') or # Version
1658 # Version 1830 line[4] != '0' or # Channel
1659 assert line[3] in ('2', '3'), line[3] 1831 line[5] != '0' or # Level
1660 # Channel 1832 line[7] != '0' or # Task
1661 assert line[4] == '0' 1833 line[8] != '0x0000000000000000' or # Keyword
1662 # Level 1834 line[12] != '' or # Instance ID
1663 assert line[5] == '0' 1835 line[13] != '' or # Parent Instance ID
1664 # Task 1836 line[14] != self.NULL_GUID or # Activity ID
1665 assert line[7] == '0' 1837 line[15] != ''): # Related Activity ID
1666 # Keyword 1838 raise TracingFailure(
1667 assert line[8] == '0x0000000000000000' 1839 'Found unexpected values in line: %s' % ' '.join(line),
1668 # Instance ID 1840 None, None, None)
1669 assert line[12] == '' 1841 if line[0].startswith('{'):
1670 # Parent Instance ID 1842 # Skip GUIDs.
1671 assert line[13] == '' 1843 return
1672 # Activity ID
1673 assert line[14] == '{00000000-0000-0000-0000-000000000000}'
1674 # Related Activity ID
1675 assert line[15] == ''
1676 1844
1677 if line[0].startswith('{'): 1845 # Convert the PID in-place from hex.
1678 # Skip GUIDs. 1846 line[self.PID] = int(line[self.PID], 16)
1679 return
1680 1847
1681 # Convert the PID in-place from hex. 1848 # By Opcode
1682 line[self.PID] = int(line[self.PID], 16) 1849 handler = getattr(
1683 1850 self,
1684 # By Opcode 1851 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
1685 handler = getattr( 1852 None)
1686 self, 1853 if not handler:
1687 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), 1854 # Try to get an universal fallback for the event class.
1688 None) 1855 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None)
1689 if not handler: 1856 if not handler:
1690 # Try to get an universal fallback 1857 raise TracingFailure(
1691 handler = getattr(self, 'handle_%s_Any' % line[self.EVENT_NAME], None) 1858 'Unexpected event %s_%s' % (
1692 if handler: 1859 line[self.EVENT_NAME], line[self.TYPE]),
1860 None, None, None)
1693 handler(line) 1861 handler(line)
1694 else: 1862 except TracingFailure, e:
1695 assert False, '%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]) 1863 # Hack in the values since the handler could be a static function.
1864 e.pid = line[self.PID]
1865 e.line = line
1866 e.line_number = self._line_number
1867 # Re-raise the modified exception.
1868 raise
1869 except (KeyError, NotImplementedError, ValueError), e:
1870 raise TracingFailure(
1871 'Trace generated a %s exception: %s' % (
1872 e.__class__.__name__, str(e)),
1873 line[self.PID],
1874 self._line_number,
1875 line,
1876 e)
1696 1877
1697 def to_results(self): 1878 def to_results(self):
1698 """Uses self._initial_pid to determine the initial process.""" 1879 """Uses self._initial_pid to determine the initial process."""
1699 process = self.processes[self._initial_pid].to_results_process() 1880 process = self.processes[self._initial_pid].to_results_process()
1700 assert sorted(self.processes) == sorted(p.pid for p in process.all), ( 1881 # Internal concistency check.
1701 sorted(self.processes), sorted(p.pid for p in process.all)) 1882 if sorted(self.processes) != sorted(p.pid for p in process.all):
1883 raise TracingFailure(
1884 'Found internal inconsitency in process lifetime detection',
1885 None,
1886 None,
1887 None,
1888 sorted(self.processes),
1889 sorted(p.pid for p in process.all))
1702 return Results(process) 1890 return Results(process)
1703 1891
1704 def _thread_to_process(self, tid): 1892 def _thread_to_process(self, tid):
1705 """Finds the process from the thread id.""" 1893 """Finds the process from the thread id."""
1706 tid = int(tid, 16) 1894 tid = int(tid, 16)
1707 return self.processes.get(self._threads_active.get(tid)) 1895 return self.processes.get(self._threads_active.get(tid))
1708 1896
1709 @staticmethod 1897 @staticmethod
1710 def handle_EventTrace_Header(line): 1898 def handle_EventTrace_Header(line):
1711 """Verifies no event was dropped, e.g. no buffer overrun occured.""" 1899 """Verifies no event was dropped, e.g. no buffer overrun occured."""
1712 #BUFFER_SIZE = 19 1900 BUFFER_SIZE = 19
1713 #VERSION = 20 1901 #VERSION = 20
1714 #PROVIDER_VERSION = 21 1902 #PROVIDER_VERSION = 21
1715 #NUMBER_OF_PROCESSORS = 22 1903 #NUMBER_OF_PROCESSORS = 22
1716 #END_TIME = 23 1904 #END_TIME = 23
1717 #TIMER_RESOLUTION = 24 1905 #TIMER_RESOLUTION = 24
1718 #MAX_FILE_SIZE = 25 1906 #MAX_FILE_SIZE = 25
1719 #LOG_FILE_MODE = 26 1907 #LOG_FILE_MODE = 26
1720 #BUFFERS_WRITTEN = 27 1908 #BUFFERS_WRITTEN = 27
1721 #START_BUFFERS = 28 1909 #START_BUFFERS = 28
1722 #POINTER_SIZE = 29 1910 #POINTER_SIZE = 29
1723 EVENTS_LOST = 30 1911 EVENTS_LOST = 30
1724 #CPU_SPEED = 31 1912 #CPU_SPEED = 31
1725 #LOGGER_NAME = 32 1913 #LOGGER_NAME = 32
1726 #LOG_FILE_NAME = 33 1914 #LOG_FILE_NAME = 33
1727 #BOOT_TIME = 34 1915 #BOOT_TIME = 34
1728 #PERF_FREQ = 35 1916 #PERF_FREQ = 35
1729 #START_TIME = 36 1917 #START_TIME = 36
1730 #RESERVED_FLAGS = 37 1918 #RESERVED_FLAGS = 37
1731 #BUFFERS_LOST = 38 1919 #BUFFERS_LOST = 38
1732 #SESSION_NAME_STRING = 39 1920 #SESSION_NAME_STRING = 39
1733 #LOG_FILE_NAME_STRING = 40 1921 #LOG_FILE_NAME_STRING = 40
1734 assert line[EVENTS_LOST] == '0' 1922 if line[EVENTS_LOST] != '0':
1923 raise TracingFailure(
1924 ( 'Events were lost during trace, please increase the buffer size '
1925 'from %s') % line[BUFFER_SIZE],
1926 None, None, None)
1735 1927
1736 def handle_EventTrace_Any(self, line): 1928 def handle_EventTrace_Any(self, line):
1737 pass 1929 pass
1738 1930
1739 def handle_FileIo_Cleanup(self, line): 1931 def handle_FileIo_Cleanup(self, line):
1740 """General wisdom: if a file is closed, it's because it was opened. 1932 """General wisdom: if a file is closed, it's because it was opened.
1741 1933
1742 Note that FileIo_Close is not used since if a file was opened properly but 1934 Note that FileIo_Close is not used since if a file was opened properly but
1743 not closed before the process exits, only Cleanup will be logged. 1935 not closed before the process exits, only Cleanup will be logged.
1744 """ 1936 """
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1779 proc = self._thread_to_process(line[TTID]) 1971 proc = self._thread_to_process(line[TTID])
1780 if not proc: 1972 if not proc:
1781 # Not a process we care about. 1973 # Not a process we care about.
1782 return 1974 return
1783 1975
1784 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH]) 1976 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH])
1785 raw_path = match.group(1) 1977 raw_path = match.group(1)
1786 # Ignore directories and bare drive right away. 1978 # Ignore directories and bare drive right away.
1787 if raw_path.endswith(os.path.sep): 1979 if raw_path.endswith(os.path.sep):
1788 return 1980 return
1789 filename = self._drive_map.to_dos(raw_path) 1981 filename = self._drive_map.to_win32(raw_path)
1790 # Ignore bare drive right away. Some may still fall through with format 1982 # Ignore bare drive right away. Some may still fall through with format
1791 # like '\\?\X:' 1983 # like '\\?\X:'
1792 if len(filename) == 2: 1984 if len(filename) == 2:
1793 return 1985 return
1794 file_object = line[FILE_OBJECT] 1986 file_object = line[FILE_OBJECT]
1795 # Override any stale file object 1987 # Override any stale file object
1796 proc.file_objects[file_object] = filename 1988 proc.file_objects[file_object] = filename
1797 1989
1798 def handle_FileIo_Rename(self, line): 1990 def handle_FileIo_Rename(self, line):
1799 # TODO(maruel): Handle? 1991 # TODO(maruel): Handle?
(...skipping 21 matching lines...) Expand all
1821 #DIRECTORY_TABLE_BASE = 24 2013 #DIRECTORY_TABLE_BASE = 24
1822 #USER_SID = 25 2014 #USER_SID = 25
1823 IMAGE_FILE_NAME = 26 2015 IMAGE_FILE_NAME = 26
1824 COMMAND_LINE = 27 2016 COMMAND_LINE = 27
1825 2017
1826 ppid = line[self.PID] 2018 ppid = line[self.PID]
1827 pid = int(line[PROCESS_ID], 16) 2019 pid = int(line[PROCESS_ID], 16)
1828 if ppid == self._tracer_pid: 2020 if ppid == self._tracer_pid:
1829 # Need to ignore processes we don't know about because the log is 2021 # Need to ignore processes we don't know about because the log is
1830 # system-wide. self._tracer_pid shall start only one process. 2022 # system-wide. self._tracer_pid shall start only one process.
1831 assert not self._initial_pid 2023 if self._initial_pid:
2024 raise TracingFailure(
2025 ( 'Parent process is _tracer_pid(%d) but _initial_pid(%d) is '
2026 'already set') % (self._tracer_pid, self._initial_pid),
2027 None, None, None)
1832 self._initial_pid = pid 2028 self._initial_pid = pid
1833 ppid = None 2029 ppid = None
1834 elif ppid not in self.processes: 2030 elif ppid not in self.processes:
1835 # Ignore 2031 # Ignore
1836 return 2032 return
1837 assert pid not in self.processes 2033 if pid in self.processes:
2034 raise TracingFailure(
2035 'Process %d started two times' % pid,
2036 None, None, None)
1838 proc = self.processes[pid] = self.Process(self, pid, None, ppid) 2037 proc = self.processes[pid] = self.Process(self, pid, None, ppid)
2038
2039 if (not line[IMAGE_FILE_NAME].startswith('"') or
2040 not line[IMAGE_FILE_NAME].endswith('"')):
2041 raise TracingFailure(
2042 'Command line is not properly quoted: %s' % line[IMAGE_FILE_NAME],
2043 None, None, None)
2044
1839 # TODO(maruel): Process escapes. 2045 # TODO(maruel): Process escapes.
1840 assert ( 2046 if (not line[COMMAND_LINE].startswith('"') or
1841 line[COMMAND_LINE].startswith('"') and 2047 not line[COMMAND_LINE].endswith('"')):
1842 line[COMMAND_LINE].endswith('"')) 2048 raise TracingFailure(
2049 'Command line is not properly quoted: %s' % line[COMMAND_LINE],
2050 None, None, None)
1843 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1]) 2051 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1])
1844 assert (
1845 line[IMAGE_FILE_NAME].startswith('"') and
1846 line[IMAGE_FILE_NAME].endswith('"'))
1847 proc.executable = line[IMAGE_FILE_NAME][1:-1] 2052 proc.executable = line[IMAGE_FILE_NAME][1:-1]
1848 # proc.command[0] may be the absolute path of 'executable' but it may be 2053 # proc.command[0] may be the absolute path of 'executable' but it may be
1849 # anything else too. If it happens that command[0] ends with executable, 2054 # anything else too. If it happens that command[0] ends with executable,
1850 # use it, otherwise defaults to the base name. 2055 # use it, otherwise defaults to the base name.
1851 cmd0 = proc.command[0].lower() 2056 cmd0 = proc.command[0].lower()
1852 if not cmd0.endswith('.exe'): 2057 if not cmd0.endswith('.exe'):
1853 # TODO(maruel): That's not strictly true either. 2058 # TODO(maruel): That's not strictly true either.
1854 cmd0 += '.exe' 2059 cmd0 += '.exe'
1855 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): 2060 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
1856 proc.executable = get_native_path_case(cmd0) 2061 proc.executable = get_native_path_case(cmd0)
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
2013 # tracerpt localizes the 'Type' column, for major brainfuck 2218 # tracerpt localizes the 'Type' column, for major brainfuck
2014 # entertainment. I can't imagine any sane reason to do that. 2219 # entertainment. I can't imagine any sane reason to do that.
2015 cmd_convert.extend(['-of', 'CSV']) 2220 cmd_convert.extend(['-of', 'CSV'])
2016 elif logformat == 'csv_utf16': 2221 elif logformat == 'csv_utf16':
2017 # This causes it to use UTF-16, which doubles the log size but ensures the 2222 # This causes it to use UTF-16, which doubles the log size but ensures the
2018 # log is readable for non-ASCII characters. 2223 # log is readable for non-ASCII characters.
2019 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) 2224 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
2020 elif logformat == 'xml': 2225 elif logformat == 'xml':
2021 cmd_convert.extend(['-of', 'XML']) 2226 cmd_convert.extend(['-of', 'XML'])
2022 else: 2227 else:
2023 assert False, logformat 2228 raise ValueError('Unexpected log format \'%s\'' % logformat)
2024 logging.debug('Running: %s' % cmd_convert) 2229 logging.debug('Running: %s' % cmd_convert)
2025 # This can takes tens of minutes for large logs. 2230 # This can takes tens of minutes for large logs.
2026 subprocess.check_call( 2231 subprocess.check_call(
2027 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) 2232 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr)
2028 2233
2029 @classmethod 2234 @classmethod
2030 def gen_trace(cls, cmd, cwd, logname, output): 2235 def gen_trace(cls, cmd, cwd, logname, output):
2031 """Uses logman.exe to start and stop the NT Kernel Logger while the 2236 """Uses logman.exe to start and stop the NT Kernel Logger while the
2032 executable to be traced is run. 2237 executable to be traced is run.
2033 """ 2238 """
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after
2406 os.path.abspath(options.log), 2611 os.path.abspath(options.log),
2407 args, 2612 args,
2408 options.root_dir, 2613 options.root_dir,
2409 options.cwd, 2614 options.cwd,
2410 options.product_dir, 2615 options.product_dir,
2411 options.force) 2616 options.force)
2412 2617
2413 2618
2414 if __name__ == '__main__': 2619 if __name__ == '__main__':
2415 sys.exit(main()) 2620 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/isolate/trace_inputs_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698