Index: tools/deep_memory_profiler/lib/dump.py |
diff --git a/tools/deep_memory_profiler/lib/dump.py b/tools/deep_memory_profiler/lib/dump.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..115979e0f6a2486935fe0c5f9beb880b53466da8 |
--- /dev/null |
+++ b/tools/deep_memory_profiler/lib/dump.py |
@@ -0,0 +1,487 @@ |
+# Copyright 2013 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import copy |
+import datetime |
+import logging |
+import os |
+import re |
+import time |
+ |
+from lib.bucket import BUCKET_ID |
+from lib.exceptions import EmptyDumpException, InvalidDumpException |
+from lib.exceptions import ObsoleteDumpVersionException, ParsingException |
+from lib.pageframe import PageFrame |
+from lib.range_dict import ExclusiveRangeDict |
+from lib.symbol import proc_maps |
+ |
+ |
+LOGGER = logging.getLogger('dmprof') |
+ |
+ |
+# Heap Profile Dump versions |
+ |
+# DUMP_DEEP_[1-4] are obsolete. |
+# DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
+# DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
+# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
+# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
+# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
+DUMP_DEEP_1 = 'DUMP_DEEP_1' |
+DUMP_DEEP_2 = 'DUMP_DEEP_2' |
+DUMP_DEEP_3 = 'DUMP_DEEP_3' |
+DUMP_DEEP_4 = 'DUMP_DEEP_4' |
+ |
+DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) |
+ |
+# DUMP_DEEP_5 doesn't separate sections for malloc and mmap. |
+# malloc and mmap are identified in bucket files. |
+# DUMP_DEEP_5 should be processed by POLICY_DEEP_4. |
+DUMP_DEEP_5 = 'DUMP_DEEP_5' |
+ |
+# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. |
+DUMP_DEEP_6 = 'DUMP_DEEP_6' |
+ |
+ |
+class Dump(object): |
+ """Represents a heap profile dump.""" |
+ |
+ _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
+ |
+ _HOOK_PATTERN = re.compile( |
+ r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' |
+ r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) |
+ |
+ _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
+ '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') |
+ _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
+ '(?P<RESERVED>[0-9]+)') |
+ |
+ _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') |
+ _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') |
+ |
+ _TIME_PATTERN_FORMAT = re.compile( |
+ r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') |
+ _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') |
+ |
+ def __init__(self, path, modified_time): |
+ self._path = path |
+ matched = self._PATH_PATTERN.match(path) |
+ self._pid = int(matched.group(2)) |
+ self._count = int(matched.group(3)) |
+ self._time = modified_time |
+ self._map = {} |
+ self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) |
+ self._stacktrace_lines = [] |
+ self._global_stats = {} # used only in apply_policy |
+ |
+ self._run_id = '' |
+ self._pagesize = 4096 |
+ self._pageframe_length = 0 |
+ self._pageframe_encoding = '' |
+ self._has_pagecount = False |
+ |
+ self._version = '' |
+ self._lines = [] |
+ |
+ @property |
+ def path(self): |
+ return self._path |
+ |
+ @property |
+ def count(self): |
+ return self._count |
+ |
+ @property |
+ def time(self): |
+ return self._time |
+ |
+ @property |
+ def iter_map(self): |
+ for region in sorted(self._map.iteritems()): |
+ yield region[0], region[1] |
+ |
+ def iter_procmaps(self): |
+ for begin, end, attr in self._map.iter_range(): |
+ yield begin, end, attr |
+ |
+ @property |
+ def iter_stacktrace(self): |
+ for line in self._stacktrace_lines: |
+ yield line |
+ |
+ def global_stat(self, name): |
+ return self._global_stats[name] |
+ |
+ @property |
+ def run_id(self): |
+ return self._run_id |
+ |
+ @property |
+ def pagesize(self): |
+ return self._pagesize |
+ |
+ @property |
+ def pageframe_length(self): |
+ return self._pageframe_length |
+ |
+ @property |
+ def pageframe_encoding(self): |
+ return self._pageframe_encoding |
+ |
+ @property |
+ def has_pagecount(self): |
+ return self._has_pagecount |
+ |
+ @staticmethod |
+ def load(path, log_header='Loading a heap profile dump: '): |
+ """Loads a heap profile dump. |
+ |
+ Args: |
+ path: A file path string to load. |
+ log_header: A preceding string for log messages. |
+ |
+ Returns: |
+ A loaded Dump object. |
+ |
+ Raises: |
+ ParsingException for invalid heap profile dumps. |
+ """ |
+ dump = Dump(path, os.stat(path).st_mtime) |
+ with open(path, 'r') as f: |
+ dump.load_file(f, log_header) |
+ return dump |
+ |
+ def load_file(self, f, log_header): |
+ self._lines = [line for line in f |
+ if line and not line.startswith('#')] |
+ |
+ try: |
+ self._version, ln = self._parse_version() |
+ self._parse_meta_information() |
+ if self._version == DUMP_DEEP_6: |
+ self._parse_mmap_list() |
+ self._parse_global_stats() |
+ self._extract_stacktrace_lines(ln) |
+ except EmptyDumpException: |
+ LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) |
+ except ParsingException, e: |
+ LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) |
+ raise |
+ else: |
+ LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) |
+ |
+ def _parse_version(self): |
+ """Parses a version string in self._lines. |
+ |
+ Returns: |
+ A pair of (a string representing a version of the stacktrace dump, |
+ and an integer indicating a line number next to the version string). |
+ |
+ Raises: |
+ ParsingException for invalid dump versions. |
+ """ |
+ version = '' |
+ |
+ # Skip until an identifiable line. |
+ headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
+ if not self._lines: |
+ raise EmptyDumpException('Empty heap dump file.') |
+ (ln, found) = skip_while( |
+ 0, len(self._lines), |
+ lambda n: not self._lines[n].startswith(headers)) |
+ if not found: |
+ raise InvalidDumpException('No version header.') |
+ |
+ # Identify a version. |
+ if self._lines[ln].startswith('heap profile: '): |
+ version = self._lines[ln][13:].strip() |
+ if version in (DUMP_DEEP_5, DUMP_DEEP_6): |
+ (ln, _) = skip_while( |
+ ln, len(self._lines), |
+ lambda n: self._lines[n] != 'STACKTRACES:\n') |
+ elif version in DUMP_DEEP_OBSOLETE: |
+ raise ObsoleteDumpVersionException(version) |
+ else: |
+ raise InvalidDumpException('Invalid version: %s' % version) |
+ elif self._lines[ln] == 'STACKTRACES:\n': |
+ raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
+ elif self._lines[ln] == 'MMAP_STACKTRACES:\n': |
+ raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
+ |
+ return (version, ln) |
+ |
+ def _parse_global_stats(self): |
+ """Parses lines in self._lines as global stats.""" |
+ (ln, _) = skip_while( |
+ 0, len(self._lines), |
+ lambda n: self._lines[n] != 'GLOBAL_STATS:\n') |
+ |
+ global_stat_names = [ |
+ 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', |
+ 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', |
+ 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
+ 'nonprofiled-stack', 'nonprofiled-other', |
+ 'profiled-mmap', 'profiled-malloc'] |
+ |
+ for prefix in global_stat_names: |
+ (ln, _) = skip_while( |
+ ln, len(self._lines), |
+ lambda n: self._lines[n].split()[0] != prefix) |
+ words = self._lines[ln].split() |
+ self._global_stats[prefix + '_virtual'] = int(words[-2]) |
+ self._global_stats[prefix + '_committed'] = int(words[-1]) |
+ |
+ def _parse_meta_information(self): |
+ """Parses lines in self._lines for meta information.""" |
+ (ln, found) = skip_while( |
+ 0, len(self._lines), |
+ lambda n: self._lines[n] != 'META:\n') |
+ if not found: |
+ return |
+ ln += 1 |
+ |
+ while True: |
+ if self._lines[ln].startswith('Time:'): |
+ matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) |
+ matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) |
+ if matched_format: |
+ self._time = time.mktime(datetime.datetime.strptime( |
+ matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) |
+ if matched_format.group(2): |
+ self._time += float(matched_format.group(2)[1:]) / 1000.0 |
+ elif matched_seconds: |
+ self._time = float(matched_seconds.group(1)) |
+ elif self._lines[ln].startswith('Reason:'): |
+ pass # Nothing to do for 'Reason:' |
+ elif self._lines[ln].startswith('PageSize: '): |
+ self._pagesize = int(self._lines[ln][10:]) |
+ elif self._lines[ln].startswith('CommandLine:'): |
+ pass |
+ elif (self._lines[ln].startswith('PageFrame: ') or |
+ self._lines[ln].startswith('PFN: ')): |
+ if self._lines[ln].startswith('PageFrame: '): |
+ words = self._lines[ln][11:].split(',') |
+ else: |
+ words = self._lines[ln][5:].split(',') |
+ for word in words: |
+ if word == '24': |
+ self._pageframe_length = 24 |
+ elif word == 'Base64': |
+ self._pageframe_encoding = 'base64' |
+ elif word == 'PageCount': |
+ self._has_pagecount = True |
+ elif self._lines[ln].startswith('RunID: '): |
+ self._run_id = self._lines[ln][7:].strip() |
+ elif (self._lines[ln].startswith('MMAP_LIST:') or |
+ self._lines[ln].startswith('GLOBAL_STATS:')): |
+ # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. |
+ break |
+ else: |
+ pass |
+ ln += 1 |
+ |
+ def _parse_mmap_list(self): |
+ """Parses lines in self._lines as a mmap list.""" |
+ (ln, found) = skip_while( |
+ 0, len(self._lines), |
+ lambda n: self._lines[n] != 'MMAP_LIST:\n') |
+ if not found: |
+ return {} |
+ |
+ ln += 1 |
+ self._map = {} |
+ current_vma = {} |
+ pageframe_list = [] |
+ while True: |
+ entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) |
+ if entry: |
+ current_vma = {} |
+ for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): |
+ for key, value in entry.as_dict().iteritems(): |
+ attr[key] = value |
+ current_vma[key] = value |
+ ln += 1 |
+ continue |
+ |
+ if self._lines[ln].startswith(' PF: '): |
+ for pageframe in self._lines[ln][5:].split(): |
+ pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) |
+ ln += 1 |
+ continue |
+ |
+ matched = self._HOOK_PATTERN.match(self._lines[ln]) |
+ if not matched: |
+ break |
+ # 2: starting address |
+ # 5: end address |
+ # 7: hooked or unhooked |
+ # 8: additional information |
+ if matched.group(7) == 'hooked': |
+ submatched = self._HOOKED_PATTERN.match(matched.group(8)) |
+ if not submatched: |
+ submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) |
+ elif matched.group(7) == 'unhooked': |
+ submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) |
+ if not submatched: |
+ submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) |
+ else: |
+ assert matched.group(7) in ['hooked', 'unhooked'] |
+ |
+ submatched_dict = submatched.groupdict() |
+ region_info = { 'vma': current_vma } |
+ if submatched_dict.get('TYPE'): |
+ region_info['type'] = submatched_dict['TYPE'].strip() |
+ if submatched_dict.get('COMMITTED'): |
+ region_info['committed'] = int(submatched_dict['COMMITTED']) |
+ if submatched_dict.get('RESERVED'): |
+ region_info['reserved'] = int(submatched_dict['RESERVED']) |
+ if submatched_dict.get('BUCKETID'): |
+ region_info['bucket_id'] = int(submatched_dict['BUCKETID']) |
+ |
+ if matched.group(1) == '(': |
+ start = current_vma['begin'] |
+ else: |
+ start = int(matched.group(2), 16) |
+ if matched.group(4) == '(': |
+ end = current_vma['end'] |
+ else: |
+ end = int(matched.group(5), 16) |
+ |
+ if pageframe_list and pageframe_list[0].start_truncated: |
+ pageframe_list[0].set_size( |
+ pageframe_list[0].size - start % self._pagesize) |
+ if pageframe_list and pageframe_list[-1].end_truncated: |
+ pageframe_list[-1].set_size( |
+ pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) |
+ region_info['pageframe'] = pageframe_list |
+ pageframe_list = [] |
+ |
+ self._map[(start, end)] = (matched.group(7), region_info) |
+ ln += 1 |
+ |
+ def _extract_stacktrace_lines(self, line_number): |
+ """Extracts the position of stacktrace lines. |
+ |
+ Valid stacktrace lines are stored into self._stacktrace_lines. |
+ |
+ Args: |
+ line_number: A line number to start parsing in lines. |
+ |
+ Raises: |
+ ParsingException for invalid dump versions. |
+ """ |
+ if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): |
+ (line_number, _) = skip_while( |
+ line_number, len(self._lines), |
+ lambda n: not self._lines[n].split()[0].isdigit()) |
+ stacktrace_start = line_number |
+ (line_number, _) = skip_while( |
+ line_number, len(self._lines), |
+ lambda n: self._check_stacktrace_line(self._lines[n])) |
+ self._stacktrace_lines = self._lines[stacktrace_start:line_number] |
+ |
+ elif self._version in DUMP_DEEP_OBSOLETE: |
+ raise ObsoleteDumpVersionException(self._version) |
+ |
+ else: |
+ raise InvalidDumpException('Invalid version: %s' % self._version) |
+ |
+ @staticmethod |
+ def _check_stacktrace_line(stacktrace_line): |
+ """Checks if a given stacktrace_line is valid as stacktrace. |
+ |
+ Args: |
+ stacktrace_line: A string to be checked. |
+ |
+ Returns: |
+ True if the given stacktrace_line is valid. |
+ """ |
+ words = stacktrace_line.split() |
+ if len(words) < BUCKET_ID + 1: |
+ return False |
+ if words[BUCKET_ID - 1] != '@': |
+ return False |
+ return True |
+ |
+ |
+class DumpList(object): |
+ """Represents a sequence of heap profile dumps.""" |
+ |
+ def __init__(self, dump_list): |
+ self._dump_list = dump_list |
+ |
+ @staticmethod |
+ def load(path_list): |
+ LOGGER.info('Loading heap dump profiles.') |
+ dump_list = [] |
+ for path in path_list: |
+ dump_list.append(Dump.load(path, ' ')) |
+ return DumpList(dump_list) |
+ |
+ def __len__(self): |
+ return len(self._dump_list) |
+ |
+ def __iter__(self): |
+ for dump in self._dump_list: |
+ yield dump |
+ |
+ def __getitem__(self, index): |
+ return self._dump_list[index] |
+ |
+ |
+class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): |
+ """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" |
+ _DUMMY_ENTRY = proc_maps.ProcMapsEntry( |
+ 0, # begin |
+ 0, # end |
+ '-', # readable |
+ '-', # writable |
+ '-', # executable |
+ '-', # private |
+ 0, # offset |
+ '00', # major |
+ '00', # minor |
+ 0, # inode |
+ '' # name |
+ ) |
+ |
+ def __init__(self): |
+ super(ProcMapsEntryAttribute, self).__init__() |
+ self._entry = self._DUMMY_ENTRY.as_dict() |
+ |
+ def __str__(self): |
+ return str(self._entry) |
+ |
+ def __repr__(self): |
+ return 'ProcMapsEntryAttribute' + str(self._entry) |
+ |
+ def __getitem__(self, key): |
+ return self._entry[key] |
+ |
+ def __setitem__(self, key, value): |
+ if key not in self._entry: |
+ raise KeyError(key) |
+ self._entry[key] = value |
+ |
+ def copy(self): |
+ new_entry = ProcMapsEntryAttribute() |
+ for key, value in self._entry.iteritems(): |
+ new_entry[key] = copy.deepcopy(value) |
+ return new_entry |
+ |
+ |
+def skip_while(index, max_index, skipping_condition): |
+ """Increments |index| until |skipping_condition|(|index|) is False. |
+ |
+ Returns: |
+ A pair of an integer indicating a line number after skipped, and a |
+ boolean value which is True if found a line which skipping_condition |
+ is False for. |
+ """ |
+ while skipping_condition(index): |
+ index += 1 |
+ if index >= max_index: |
+ return index, False |
+ return index, True |