OLD | NEW |
(Empty) | |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import copy |
| 6 import datetime |
| 7 import logging |
| 8 import os |
| 9 import re |
| 10 import time |
| 11 |
| 12 from lib.bucket import BUCKET_ID |
| 13 from lib.exceptions import EmptyDumpException, InvalidDumpException |
| 14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException |
| 15 from lib.pageframe import PageFrame |
| 16 from lib.range_dict import ExclusiveRangeDict |
| 17 from lib.symbol import proc_maps |
| 18 |
| 19 |
| 20 LOGGER = logging.getLogger('dmprof') |
| 21 |
| 22 |
| 23 # Heap Profile Dump versions |
| 24 |
| 25 # DUMP_DEEP_[1-4] are obsolete. |
| 26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
| 27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
| 28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
| 29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
| 30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
| 31 DUMP_DEEP_1 = 'DUMP_DEEP_1' |
| 32 DUMP_DEEP_2 = 'DUMP_DEEP_2' |
| 33 DUMP_DEEP_3 = 'DUMP_DEEP_3' |
| 34 DUMP_DEEP_4 = 'DUMP_DEEP_4' |
| 35 |
| 36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) |
| 37 |
| 38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. |
| 39 # malloc and mmap are identified in bucket files. |
| 40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. |
| 41 DUMP_DEEP_5 = 'DUMP_DEEP_5' |
| 42 |
| 43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. |
| 44 DUMP_DEEP_6 = 'DUMP_DEEP_6' |
| 45 |
| 46 |
| 47 class Dump(object): |
| 48 """Represents a heap profile dump.""" |
| 49 |
| 50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
| 51 |
| 52 _HOOK_PATTERN = re.compile( |
| 53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' |
| 54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) |
| 55 |
| 56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
| 57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') |
| 58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
| 59 '(?P<RESERVED>[0-9]+)') |
| 60 |
| 61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') |
| 62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') |
| 63 |
| 64 _TIME_PATTERN_FORMAT = re.compile( |
| 65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') |
| 66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') |
| 67 |
| 68 def __init__(self, path, modified_time): |
| 69 self._path = path |
| 70 matched = self._PATH_PATTERN.match(path) |
| 71 self._pid = int(matched.group(2)) |
| 72 self._count = int(matched.group(3)) |
| 73 self._time = modified_time |
| 74 self._map = {} |
| 75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) |
| 76 self._stacktrace_lines = [] |
| 77 self._global_stats = {} # used only in apply_policy |
| 78 |
| 79 self._run_id = '' |
| 80 self._pagesize = 4096 |
| 81 self._pageframe_length = 0 |
| 82 self._pageframe_encoding = '' |
| 83 self._has_pagecount = False |
| 84 |
| 85 self._version = '' |
| 86 self._lines = [] |
| 87 |
| 88 @property |
| 89 def path(self): |
| 90 return self._path |
| 91 |
| 92 @property |
| 93 def count(self): |
| 94 return self._count |
| 95 |
| 96 @property |
| 97 def time(self): |
| 98 return self._time |
| 99 |
| 100 @property |
| 101 def iter_map(self): |
| 102 for region in sorted(self._map.iteritems()): |
| 103 yield region[0], region[1] |
| 104 |
| 105 def iter_procmaps(self): |
| 106 for begin, end, attr in self._map.iter_range(): |
| 107 yield begin, end, attr |
| 108 |
| 109 @property |
| 110 def iter_stacktrace(self): |
| 111 for line in self._stacktrace_lines: |
| 112 yield line |
| 113 |
| 114 def global_stat(self, name): |
| 115 return self._global_stats[name] |
| 116 |
| 117 @property |
| 118 def run_id(self): |
| 119 return self._run_id |
| 120 |
| 121 @property |
| 122 def pagesize(self): |
| 123 return self._pagesize |
| 124 |
| 125 @property |
| 126 def pageframe_length(self): |
| 127 return self._pageframe_length |
| 128 |
| 129 @property |
| 130 def pageframe_encoding(self): |
| 131 return self._pageframe_encoding |
| 132 |
| 133 @property |
| 134 def has_pagecount(self): |
| 135 return self._has_pagecount |
| 136 |
| 137 @staticmethod |
| 138 def load(path, log_header='Loading a heap profile dump: '): |
| 139 """Loads a heap profile dump. |
| 140 |
| 141 Args: |
| 142 path: A file path string to load. |
| 143 log_header: A preceding string for log messages. |
| 144 |
| 145 Returns: |
| 146 A loaded Dump object. |
| 147 |
| 148 Raises: |
| 149 ParsingException for invalid heap profile dumps. |
| 150 """ |
| 151 dump = Dump(path, os.stat(path).st_mtime) |
| 152 with open(path, 'r') as f: |
| 153 dump.load_file(f, log_header) |
| 154 return dump |
| 155 |
| 156 def load_file(self, f, log_header): |
| 157 self._lines = [line for line in f |
| 158 if line and not line.startswith('#')] |
| 159 |
| 160 try: |
| 161 self._version, ln = self._parse_version() |
| 162 self._parse_meta_information() |
| 163 if self._version == DUMP_DEEP_6: |
| 164 self._parse_mmap_list() |
| 165 self._parse_global_stats() |
| 166 self._extract_stacktrace_lines(ln) |
| 167 except EmptyDumpException: |
| 168 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) |
| 169 except ParsingException, e: |
| 170 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) |
| 171 raise |
| 172 else: |
| 173 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) |
| 174 |
| 175 def _parse_version(self): |
| 176 """Parses a version string in self._lines. |
| 177 |
| 178 Returns: |
| 179 A pair of (a string representing a version of the stacktrace dump, |
| 180 and an integer indicating a line number next to the version string). |
| 181 |
| 182 Raises: |
| 183 ParsingException for invalid dump versions. |
| 184 """ |
| 185 version = '' |
| 186 |
| 187 # Skip until an identifiable line. |
| 188 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
| 189 if not self._lines: |
| 190 raise EmptyDumpException('Empty heap dump file.') |
| 191 (ln, found) = skip_while( |
| 192 0, len(self._lines), |
| 193 lambda n: not self._lines[n].startswith(headers)) |
| 194 if not found: |
| 195 raise InvalidDumpException('No version header.') |
| 196 |
| 197 # Identify a version. |
| 198 if self._lines[ln].startswith('heap profile: '): |
| 199 version = self._lines[ln][13:].strip() |
| 200 if version in (DUMP_DEEP_5, DUMP_DEEP_6): |
| 201 (ln, _) = skip_while( |
| 202 ln, len(self._lines), |
| 203 lambda n: self._lines[n] != 'STACKTRACES:\n') |
| 204 elif version in DUMP_DEEP_OBSOLETE: |
| 205 raise ObsoleteDumpVersionException(version) |
| 206 else: |
| 207 raise InvalidDumpException('Invalid version: %s' % version) |
| 208 elif self._lines[ln] == 'STACKTRACES:\n': |
| 209 raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
| 210 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': |
| 211 raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
| 212 |
| 213 return (version, ln) |
| 214 |
| 215 def _parse_global_stats(self): |
| 216 """Parses lines in self._lines as global stats.""" |
| 217 (ln, _) = skip_while( |
| 218 0, len(self._lines), |
| 219 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') |
| 220 |
| 221 global_stat_names = [ |
| 222 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', |
| 223 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', |
| 224 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
| 225 'nonprofiled-stack', 'nonprofiled-other', |
| 226 'profiled-mmap', 'profiled-malloc'] |
| 227 |
| 228 for prefix in global_stat_names: |
| 229 (ln, _) = skip_while( |
| 230 ln, len(self._lines), |
| 231 lambda n: self._lines[n].split()[0] != prefix) |
| 232 words = self._lines[ln].split() |
| 233 self._global_stats[prefix + '_virtual'] = int(words[-2]) |
| 234 self._global_stats[prefix + '_committed'] = int(words[-1]) |
| 235 |
| 236 def _parse_meta_information(self): |
| 237 """Parses lines in self._lines for meta information.""" |
| 238 (ln, found) = skip_while( |
| 239 0, len(self._lines), |
| 240 lambda n: self._lines[n] != 'META:\n') |
| 241 if not found: |
| 242 return |
| 243 ln += 1 |
| 244 |
| 245 while True: |
| 246 if self._lines[ln].startswith('Time:'): |
| 247 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) |
| 248 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) |
| 249 if matched_format: |
| 250 self._time = time.mktime(datetime.datetime.strptime( |
| 251 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) |
| 252 if matched_format.group(2): |
| 253 self._time += float(matched_format.group(2)[1:]) / 1000.0 |
| 254 elif matched_seconds: |
| 255 self._time = float(matched_seconds.group(1)) |
| 256 elif self._lines[ln].startswith('Reason:'): |
| 257 pass # Nothing to do for 'Reason:' |
| 258 elif self._lines[ln].startswith('PageSize: '): |
| 259 self._pagesize = int(self._lines[ln][10:]) |
| 260 elif self._lines[ln].startswith('CommandLine:'): |
| 261 pass |
| 262 elif (self._lines[ln].startswith('PageFrame: ') or |
| 263 self._lines[ln].startswith('PFN: ')): |
| 264 if self._lines[ln].startswith('PageFrame: '): |
| 265 words = self._lines[ln][11:].split(',') |
| 266 else: |
| 267 words = self._lines[ln][5:].split(',') |
| 268 for word in words: |
| 269 if word == '24': |
| 270 self._pageframe_length = 24 |
| 271 elif word == 'Base64': |
| 272 self._pageframe_encoding = 'base64' |
| 273 elif word == 'PageCount': |
| 274 self._has_pagecount = True |
| 275 elif self._lines[ln].startswith('RunID: '): |
| 276 self._run_id = self._lines[ln][7:].strip() |
| 277 elif (self._lines[ln].startswith('MMAP_LIST:') or |
| 278 self._lines[ln].startswith('GLOBAL_STATS:')): |
| 279 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. |
| 280 break |
| 281 else: |
| 282 pass |
| 283 ln += 1 |
| 284 |
| 285 def _parse_mmap_list(self): |
| 286 """Parses lines in self._lines as a mmap list.""" |
| 287 (ln, found) = skip_while( |
| 288 0, len(self._lines), |
| 289 lambda n: self._lines[n] != 'MMAP_LIST:\n') |
| 290 if not found: |
| 291 return {} |
| 292 |
| 293 ln += 1 |
| 294 self._map = {} |
| 295 current_vma = {} |
| 296 pageframe_list = [] |
| 297 while True: |
| 298 entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) |
| 299 if entry: |
| 300 current_vma = {} |
| 301 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): |
| 302 for key, value in entry.as_dict().iteritems(): |
| 303 attr[key] = value |
| 304 current_vma[key] = value |
| 305 ln += 1 |
| 306 continue |
| 307 |
| 308 if self._lines[ln].startswith(' PF: '): |
| 309 for pageframe in self._lines[ln][5:].split(): |
| 310 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) |
| 311 ln += 1 |
| 312 continue |
| 313 |
| 314 matched = self._HOOK_PATTERN.match(self._lines[ln]) |
| 315 if not matched: |
| 316 break |
| 317 # 2: starting address |
| 318 # 5: end address |
| 319 # 7: hooked or unhooked |
| 320 # 8: additional information |
| 321 if matched.group(7) == 'hooked': |
| 322 submatched = self._HOOKED_PATTERN.match(matched.group(8)) |
| 323 if not submatched: |
| 324 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) |
| 325 elif matched.group(7) == 'unhooked': |
| 326 submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) |
| 327 if not submatched: |
| 328 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) |
| 329 else: |
| 330 assert matched.group(7) in ['hooked', 'unhooked'] |
| 331 |
| 332 submatched_dict = submatched.groupdict() |
| 333 region_info = { 'vma': current_vma } |
| 334 if submatched_dict.get('TYPE'): |
| 335 region_info['type'] = submatched_dict['TYPE'].strip() |
| 336 if submatched_dict.get('COMMITTED'): |
| 337 region_info['committed'] = int(submatched_dict['COMMITTED']) |
| 338 if submatched_dict.get('RESERVED'): |
| 339 region_info['reserved'] = int(submatched_dict['RESERVED']) |
| 340 if submatched_dict.get('BUCKETID'): |
| 341 region_info['bucket_id'] = int(submatched_dict['BUCKETID']) |
| 342 |
| 343 if matched.group(1) == '(': |
| 344 start = current_vma['begin'] |
| 345 else: |
| 346 start = int(matched.group(2), 16) |
| 347 if matched.group(4) == '(': |
| 348 end = current_vma['end'] |
| 349 else: |
| 350 end = int(matched.group(5), 16) |
| 351 |
| 352 if pageframe_list and pageframe_list[0].start_truncated: |
| 353 pageframe_list[0].set_size( |
| 354 pageframe_list[0].size - start % self._pagesize) |
| 355 if pageframe_list and pageframe_list[-1].end_truncated: |
| 356 pageframe_list[-1].set_size( |
| 357 pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) |
| 358 region_info['pageframe'] = pageframe_list |
| 359 pageframe_list = [] |
| 360 |
| 361 self._map[(start, end)] = (matched.group(7), region_info) |
| 362 ln += 1 |
| 363 |
| 364 def _extract_stacktrace_lines(self, line_number): |
| 365 """Extracts the position of stacktrace lines. |
| 366 |
| 367 Valid stacktrace lines are stored into self._stacktrace_lines. |
| 368 |
| 369 Args: |
| 370 line_number: A line number to start parsing in lines. |
| 371 |
| 372 Raises: |
| 373 ParsingException for invalid dump versions. |
| 374 """ |
| 375 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): |
| 376 (line_number, _) = skip_while( |
| 377 line_number, len(self._lines), |
| 378 lambda n: not self._lines[n].split()[0].isdigit()) |
| 379 stacktrace_start = line_number |
| 380 (line_number, _) = skip_while( |
| 381 line_number, len(self._lines), |
| 382 lambda n: self._check_stacktrace_line(self._lines[n])) |
| 383 self._stacktrace_lines = self._lines[stacktrace_start:line_number] |
| 384 |
| 385 elif self._version in DUMP_DEEP_OBSOLETE: |
| 386 raise ObsoleteDumpVersionException(self._version) |
| 387 |
| 388 else: |
| 389 raise InvalidDumpException('Invalid version: %s' % self._version) |
| 390 |
| 391 @staticmethod |
| 392 def _check_stacktrace_line(stacktrace_line): |
| 393 """Checks if a given stacktrace_line is valid as stacktrace. |
| 394 |
| 395 Args: |
| 396 stacktrace_line: A string to be checked. |
| 397 |
| 398 Returns: |
| 399 True if the given stacktrace_line is valid. |
| 400 """ |
| 401 words = stacktrace_line.split() |
| 402 if len(words) < BUCKET_ID + 1: |
| 403 return False |
| 404 if words[BUCKET_ID - 1] != '@': |
| 405 return False |
| 406 return True |
| 407 |
| 408 |
| 409 class DumpList(object): |
| 410 """Represents a sequence of heap profile dumps.""" |
| 411 |
| 412 def __init__(self, dump_list): |
| 413 self._dump_list = dump_list |
| 414 |
| 415 @staticmethod |
| 416 def load(path_list): |
| 417 LOGGER.info('Loading heap dump profiles.') |
| 418 dump_list = [] |
| 419 for path in path_list: |
| 420 dump_list.append(Dump.load(path, ' ')) |
| 421 return DumpList(dump_list) |
| 422 |
| 423 def __len__(self): |
| 424 return len(self._dump_list) |
| 425 |
| 426 def __iter__(self): |
| 427 for dump in self._dump_list: |
| 428 yield dump |
| 429 |
| 430 def __getitem__(self, index): |
| 431 return self._dump_list[index] |
| 432 |
| 433 |
| 434 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): |
| 435 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" |
| 436 _DUMMY_ENTRY = proc_maps.ProcMapsEntry( |
| 437 0, # begin |
| 438 0, # end |
| 439 '-', # readable |
| 440 '-', # writable |
| 441 '-', # executable |
| 442 '-', # private |
| 443 0, # offset |
| 444 '00', # major |
| 445 '00', # minor |
| 446 0, # inode |
| 447 '' # name |
| 448 ) |
| 449 |
| 450 def __init__(self): |
| 451 super(ProcMapsEntryAttribute, self).__init__() |
| 452 self._entry = self._DUMMY_ENTRY.as_dict() |
| 453 |
| 454 def __str__(self): |
| 455 return str(self._entry) |
| 456 |
| 457 def __repr__(self): |
| 458 return 'ProcMapsEntryAttribute' + str(self._entry) |
| 459 |
| 460 def __getitem__(self, key): |
| 461 return self._entry[key] |
| 462 |
| 463 def __setitem__(self, key, value): |
| 464 if key not in self._entry: |
| 465 raise KeyError(key) |
| 466 self._entry[key] = value |
| 467 |
| 468 def copy(self): |
| 469 new_entry = ProcMapsEntryAttribute() |
| 470 for key, value in self._entry.iteritems(): |
| 471 new_entry[key] = copy.deepcopy(value) |
| 472 return new_entry |
| 473 |
| 474 |
| 475 def skip_while(index, max_index, skipping_condition): |
| 476 """Increments |index| until |skipping_condition|(|index|) is False. |
| 477 |
| 478 Returns: |
| 479 A pair of an integer indicating a line number after skipped, and a |
| 480 boolean value which is True if found a line which skipping_condition |
| 481 is False for. |
| 482 """ |
| 483 while skipping_condition(index): |
| 484 index += 1 |
| 485 if index >= max_index: |
| 486 return index, False |
| 487 return index, True |
OLD | NEW |