Index: tools/deep_memory_profiler/dmprof.py |
diff --git a/tools/deep_memory_profiler/dmprof.py b/tools/deep_memory_profiler/dmprof.py |
index 533cbb9ee9a0b8e7219266be160f92ca93ab412d..0cb030a32fe7f3adaa401897413f5e93401efdff 100644 |
--- a/tools/deep_memory_profiler/dmprof.py |
+++ b/tools/deep_memory_profiler/dmprof.py |
@@ -2,2988 +2,33 @@ |
# Use of this source code is governed by a BSD-style license that can be |
# found in the LICENSE file. |
-"""The deep heap profiler script for Chrome.""" |
+"""The Deep Memory Profiler analyzer script. |
+ |
+See http://dev.chromium.org/developers/deep-memory-profiler for details. |
+""" |
-import copy |
-import cStringIO |
-import datetime |
-import json |
import logging |
-import optparse |
-import os |
-import re |
-import struct |
-import subprocess |
import sys |
-import tempfile |
-import time |
-import zipfile |
- |
-try: |
- from collections import OrderedDict # pylint: disable=E0611 |
-except ImportError: |
- # TODO(dmikurube): Remove this once Python 2.7 is required. |
- BASE_PATH = os.path.dirname(os.path.abspath(__file__)) |
- SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party') |
- sys.path.insert(0, SIMPLEJSON_PATH) |
- from simplejson import OrderedDict |
- |
-from range_dict import ExclusiveRangeDict |
- |
-BASE_PATH = os.path.dirname(os.path.abspath(__file__)) |
-FIND_RUNTIME_SYMBOLS_PATH = os.path.join( |
- BASE_PATH, os.pardir, 'find_runtime_symbols') |
-sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) |
-import find_runtime_symbols |
-import prepare_symbol_info |
-import proc_maps |
+from lib.exceptions import ParsingException |
+import subcommands |
-from find_runtime_symbols import FUNCTION_SYMBOLS |
-from find_runtime_symbols import SOURCEFILE_SYMBOLS |
-from find_runtime_symbols import TYPEINFO_SYMBOLS |
- |
-BUCKET_ID = 5 |
-VIRTUAL = 0 |
-COMMITTED = 1 |
-ALLOC_COUNT = 2 |
-FREE_COUNT = 3 |
-NULL_REGEX = re.compile('') |
LOGGER = logging.getLogger('dmprof') |
-POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') |
-CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir) |
- |
-DEFAULT_SORTERS = [ |
- os.path.join(BASE_PATH, 'sorter.malloc-component.json'), |
- os.path.join(BASE_PATH, 'sorter.malloc-type.json'), |
- os.path.join(BASE_PATH, 'sorter.vm-map.json'), |
- os.path.join(BASE_PATH, 'sorter.vm-sharing.json'), |
- ] |
- |
- |
-# Heap Profile Dump versions |
- |
-# DUMP_DEEP_[1-4] are obsolete. |
-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
-DUMP_DEEP_1 = 'DUMP_DEEP_1' |
-DUMP_DEEP_2 = 'DUMP_DEEP_2' |
-DUMP_DEEP_3 = 'DUMP_DEEP_3' |
-DUMP_DEEP_4 = 'DUMP_DEEP_4' |
- |
-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) |
- |
-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap. |
-# malloc and mmap are identified in bucket files. |
-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4. |
-DUMP_DEEP_5 = 'DUMP_DEEP_5' |
- |
-# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. |
-DUMP_DEEP_6 = 'DUMP_DEEP_6' |
- |
-# Heap Profile Policy versions |
- |
-# POLICY_DEEP_1 DOES NOT include allocation_type columns. |
-# mmap regions are distincted w/ mmap frames in the pattern column. |
-POLICY_DEEP_1 = 'POLICY_DEEP_1' |
- |
-# POLICY_DEEP_2 DOES include allocation_type columns. |
-# mmap regions are distincted w/ the allocation_type column. |
-POLICY_DEEP_2 = 'POLICY_DEEP_2' |
- |
-# POLICY_DEEP_3 is in JSON format. |
-POLICY_DEEP_3 = 'POLICY_DEEP_3' |
- |
-# POLICY_DEEP_3 contains typeinfo. |
-POLICY_DEEP_4 = 'POLICY_DEEP_4' |
- |
- |
-class EmptyDumpException(Exception): |
- def __init__(self, value=''): |
- super(EmptyDumpException, self).__init__() |
- self.value = value |
- def __str__(self): |
- return repr(self.value) |
- |
- |
-class ParsingException(Exception): |
- def __init__(self, value=''): |
- super(ParsingException, self).__init__() |
- self.value = value |
- def __str__(self): |
- return repr(self.value) |
- |
- |
-class InvalidDumpException(ParsingException): |
- def __init__(self, value): |
- super(InvalidDumpException, self).__init__() |
- self.value = value |
- def __str__(self): |
- return "invalid heap profile dump: %s" % repr(self.value) |
- |
- |
-class ObsoleteDumpVersionException(ParsingException): |
- def __init__(self, value): |
- super(ObsoleteDumpVersionException, self).__init__() |
- self.value = value |
- def __str__(self): |
- return "obsolete heap profile dump version: %s" % repr(self.value) |
- |
- |
-class ListAttribute(ExclusiveRangeDict.RangeAttribute): |
- """Represents a list for an attribute in range_dict.ExclusiveRangeDict.""" |
- def __init__(self): |
- super(ListAttribute, self).__init__() |
- self._list = [] |
- |
- def __str__(self): |
- return str(self._list) |
- |
- def __repr__(self): |
- return 'ListAttribute' + str(self._list) |
- |
- def __len__(self): |
- return len(self._list) |
- |
- def __iter__(self): |
- for x in self._list: |
- yield x |
- |
- def __getitem__(self, index): |
- return self._list[index] |
- |
- def __setitem__(self, index, value): |
- if index >= len(self._list): |
- self._list.extend([None] * (index + 1 - len(self._list))) |
- self._list[index] = value |
- |
- def copy(self): |
- new_list = ListAttribute() |
- for index, item in enumerate(self._list): |
- new_list[index] = copy.deepcopy(item) |
- return new_list |
- |
- |
-class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): |
- """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" |
- _DUMMY_ENTRY = proc_maps.ProcMapsEntry( |
- 0, # begin |
- 0, # end |
- '-', # readable |
- '-', # writable |
- '-', # executable |
- '-', # private |
- 0, # offset |
- '00', # major |
- '00', # minor |
- 0, # inode |
- '' # name |
- ) |
- |
- def __init__(self): |
- super(ProcMapsEntryAttribute, self).__init__() |
- self._entry = self._DUMMY_ENTRY.as_dict() |
- |
- def __str__(self): |
- return str(self._entry) |
- |
- def __repr__(self): |
- return 'ProcMapsEntryAttribute' + str(self._entry) |
- |
- def __getitem__(self, key): |
- return self._entry[key] |
- |
- def __setitem__(self, key, value): |
- if key not in self._entry: |
- raise KeyError(key) |
- self._entry[key] = value |
- |
- def copy(self): |
- new_entry = ProcMapsEntryAttribute() |
- for key, value in self._entry.iteritems(): |
- new_entry[key] = copy.deepcopy(value) |
- return new_entry |
- |
- |
-def skip_while(index, max_index, skipping_condition): |
- """Increments |index| until |skipping_condition|(|index|) is False. |
- |
- Returns: |
- A pair of an integer indicating a line number after skipped, and a |
- boolean value which is True if found a line which skipping_condition |
- is False for. |
- """ |
- while skipping_condition(index): |
- index += 1 |
- if index >= max_index: |
- return index, False |
- return index, True |
- |
- |
-class SymbolDataSources(object): |
- """Manages symbol data sources in a process. |
- |
- The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and |
- so on. They are collected into a directory '|prefix|.symmap' from the binary |
- files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. |
- |
- Binaries are not mandatory to profile. The prepared data sources work in |
- place of the binary even if the binary has been overwritten with another |
- binary. |
- |
- Note that loading the symbol data sources takes a long time. They are often |
- very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' |
- which caches actually used symbols. |
- """ |
- def __init__(self, prefix, alternative_dirs=None): |
- self._prefix = prefix |
- self._prepared_symbol_data_sources_path = None |
- self._loaded_symbol_data_sources = None |
- self._alternative_dirs = alternative_dirs or {} |
- |
- def prepare(self): |
- """Prepares symbol data sources by extracting mapping from a binary. |
- |
- The prepared symbol data sources are stored in a directory. The directory |
- name is stored in |self._prepared_symbol_data_sources_path|. |
- |
- Returns: |
- True if succeeded. |
- """ |
- LOGGER.info('Preparing symbol mapping...') |
- self._prepared_symbol_data_sources_path, used_tempdir = ( |
- prepare_symbol_info.prepare_symbol_info( |
- self._prefix + '.maps', |
- output_dir_path=self._prefix + '.symmap', |
- alternative_dirs=self._alternative_dirs, |
- use_tempdir=True, |
- use_source_file_name=True)) |
- if self._prepared_symbol_data_sources_path: |
- LOGGER.info(' Prepared symbol mapping.') |
- if used_tempdir: |
- LOGGER.warn(' Using a temporary directory for symbol mapping.') |
- LOGGER.warn(' Delete it by yourself.') |
- LOGGER.warn(' Or, move the directory by yourself to use it later.') |
- return True |
- else: |
- LOGGER.warn(' Failed to prepare symbol mapping.') |
- return False |
- |
- def get(self): |
- """Returns the prepared symbol data sources. |
- |
- Returns: |
- The prepared symbol data sources. None if failed. |
- """ |
- if not self._prepared_symbol_data_sources_path and not self.prepare(): |
- return None |
- if not self._loaded_symbol_data_sources: |
- LOGGER.info('Loading symbol mapping...') |
- self._loaded_symbol_data_sources = ( |
- find_runtime_symbols.RuntimeSymbolsInProcess.load( |
- self._prepared_symbol_data_sources_path)) |
- return self._loaded_symbol_data_sources |
- |
- def path(self): |
- """Returns the path of the prepared symbol data sources if possible.""" |
- if not self._prepared_symbol_data_sources_path and not self.prepare(): |
- return None |
- return self._prepared_symbol_data_sources_path |
- |
- |
-class SymbolFinder(object): |
- """Finds corresponding symbols from addresses. |
- |
- This class does only 'find()' symbols from a specified |address_list|. |
- It is introduced to make a finder mockable. |
- """ |
- def __init__(self, symbol_type, symbol_data_sources): |
- self._symbol_type = symbol_type |
- self._symbol_data_sources = symbol_data_sources |
- |
- def find(self, address_list): |
- return find_runtime_symbols.find_runtime_symbols( |
- self._symbol_type, self._symbol_data_sources.get(), address_list) |
- |
- |
-class SymbolMappingCache(object): |
- """Caches mapping from actually used addresses to symbols. |
- |
- 'update()' updates the cache from the original symbol data sources via |
- 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. |
- """ |
- def __init__(self): |
- self._symbol_mapping_caches = { |
- FUNCTION_SYMBOLS: {}, |
- SOURCEFILE_SYMBOLS: {}, |
- TYPEINFO_SYMBOLS: {}, |
- } |
- |
- def update(self, symbol_type, bucket_set, symbol_finder, cache_f): |
- """Updates symbol mapping cache on memory and in a symbol cache file. |
- |
- It reads cached symbol mapping from a symbol cache file |cache_f| if it |
- exists. Unresolved addresses are then resolved and added to the cache |
- both on memory and in the symbol cache file with using 'SymbolFinder'. |
- |
- A cache file is formatted as follows: |
- <Address> <Symbol> |
- <Address> <Symbol> |
- <Address> <Symbol> |
- ... |
- |
- Args: |
- symbol_type: A type of symbols to update. It should be one of |
- FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. |
- bucket_set: A BucketSet object. |
- symbol_finder: A SymbolFinder object to find symbols. |
- cache_f: A readable and writable IO object of the symbol cache file. |
- """ |
- cache_f.seek(0, os.SEEK_SET) |
- self._load(cache_f, symbol_type) |
- |
- unresolved_addresses = sorted( |
- address for address in bucket_set.iter_addresses(symbol_type) |
- if address not in self._symbol_mapping_caches[symbol_type]) |
- |
- if not unresolved_addresses: |
- LOGGER.info('No need to resolve any more addresses.') |
- return |
- |
- cache_f.seek(0, os.SEEK_END) |
- LOGGER.info('Loading %d unresolved addresses.' % |
- len(unresolved_addresses)) |
- symbol_dict = symbol_finder.find(unresolved_addresses) |
- |
- for address, symbol in symbol_dict.iteritems(): |
- stripped_symbol = symbol.strip() or '?' |
- self._symbol_mapping_caches[symbol_type][address] = stripped_symbol |
- cache_f.write('%x %s\n' % (address, stripped_symbol)) |
- |
- def lookup(self, symbol_type, address): |
- """Looks up a symbol for a given |address|. |
- |
- Args: |
- symbol_type: A type of symbols to update. It should be one of |
- FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. |
- address: An integer that represents an address. |
- |
- Returns: |
- A string that represents a symbol. |
- """ |
- return self._symbol_mapping_caches[symbol_type].get(address) |
- |
- def _load(self, cache_f, symbol_type): |
- try: |
- for line in cache_f: |
- items = line.rstrip().split(None, 1) |
- if len(items) == 1: |
- items.append('??') |
- self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] |
- LOGGER.info('Loaded %d entries from symbol cache.' % |
- len(self._symbol_mapping_caches[symbol_type])) |
- except IOError as e: |
- LOGGER.info('The symbol cache file is invalid: %s' % e) |
- |
- |
-class Rule(object): |
- """Represents one matching rule in a policy file.""" |
- |
- def __init__(self, |
- name, |
- allocator_type, |
- stackfunction_pattern=None, |
- stacksourcefile_pattern=None, |
- typeinfo_pattern=None, |
- mappedpathname_pattern=None, |
- mappedpermission_pattern=None, |
- sharedwith=None): |
- self._name = name |
- self._allocator_type = allocator_type |
- |
- self._stackfunction_pattern = None |
- if stackfunction_pattern: |
- self._stackfunction_pattern = re.compile( |
- stackfunction_pattern + r'\Z') |
- |
- self._stacksourcefile_pattern = None |
- if stacksourcefile_pattern: |
- self._stacksourcefile_pattern = re.compile( |
- stacksourcefile_pattern + r'\Z') |
- |
- self._typeinfo_pattern = None |
- if typeinfo_pattern: |
- self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') |
- |
- self._mappedpathname_pattern = None |
- if mappedpathname_pattern: |
- self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') |
- |
- self._mappedpermission_pattern = None |
- if mappedpermission_pattern: |
- self._mappedpermission_pattern = re.compile( |
- mappedpermission_pattern + r'\Z') |
- |
- self._sharedwith = [] |
- if sharedwith: |
- self._sharedwith = sharedwith |
- |
- @property |
- def name(self): |
- return self._name |
- |
- @property |
- def allocator_type(self): |
- return self._allocator_type |
- |
- @property |
- def stackfunction_pattern(self): |
- return self._stackfunction_pattern |
- |
- @property |
- def stacksourcefile_pattern(self): |
- return self._stacksourcefile_pattern |
- |
- @property |
- def typeinfo_pattern(self): |
- return self._typeinfo_pattern |
- |
- @property |
- def mappedpathname_pattern(self): |
- return self._mappedpathname_pattern |
- |
- @property |
- def mappedpermission_pattern(self): |
- return self._mappedpermission_pattern |
- |
- @property |
- def sharedwith(self): |
- return self._sharedwith |
- |
- |
-class Policy(object): |
- """Represents a policy, a content of a policy file.""" |
- |
- def __init__(self, rules, version, components): |
- self._rules = rules |
- self._version = version |
- self._components = components |
- |
- @property |
- def rules(self): |
- return self._rules |
- |
- @property |
- def version(self): |
- return self._version |
- |
- @property |
- def components(self): |
- return self._components |
- |
- def find_rule(self, component_name): |
- """Finds a rule whose name is |component_name|. """ |
- for rule in self._rules: |
- if rule.name == component_name: |
- return rule |
- return None |
- |
- def find_malloc(self, bucket): |
- """Finds a matching component name which a given |bucket| belongs to. |
- |
- Args: |
- bucket: A Bucket object to be searched for. |
- |
- Returns: |
- A string representing a component name. |
- """ |
- assert not bucket or bucket.allocator_type == 'malloc' |
- |
- if not bucket: |
- return 'no-bucket' |
- if bucket.component_cache: |
- return bucket.component_cache |
- |
- stackfunction = bucket.symbolized_joined_stackfunction |
- stacksourcefile = bucket.symbolized_joined_stacksourcefile |
- typeinfo = bucket.symbolized_typeinfo |
- if typeinfo.startswith('0x'): |
- typeinfo = bucket.typeinfo_name |
- |
- for rule in self._rules: |
- if (rule.allocator_type == 'malloc' and |
- (not rule.stackfunction_pattern or |
- rule.stackfunction_pattern.match(stackfunction)) and |
- (not rule.stacksourcefile_pattern or |
- rule.stacksourcefile_pattern.match(stacksourcefile)) and |
- (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): |
- bucket.component_cache = rule.name |
- return rule.name |
- |
- assert False |
- |
- def find_mmap(self, region, bucket_set, |
- pageframe=None, group_pfn_counts=None): |
- """Finds a matching component which a given mmap |region| belongs to. |
- |
- It uses |bucket_set| to match with backtraces. If |pageframe| is given, |
- it considers memory sharing among processes. |
- |
- NOTE: Don't use Bucket's |component_cache| for mmap regions because they're |
- classified not only with bucket information (mappedpathname for example). |
- |
- Args: |
- region: A tuple representing a memory region. |
- bucket_set: A BucketSet object to look up backtraces. |
- pageframe: A PageFrame object representing a pageframe maybe including |
- a pagecount. |
- group_pfn_counts: A dict mapping a PFN to the number of times the |
- the pageframe is mapped by the known "group (Chrome)" processes. |
- |
- Returns: |
- A string representing a component name. |
- """ |
- assert region[0] == 'hooked' |
- bucket = bucket_set.get(region[1]['bucket_id']) |
- assert not bucket or bucket.allocator_type == 'mmap' |
- |
- if not bucket: |
- return 'no-bucket', None |
- |
- stackfunction = bucket.symbolized_joined_stackfunction |
- stacksourcefile = bucket.symbolized_joined_stacksourcefile |
- sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) |
- |
- for rule in self._rules: |
- if (rule.allocator_type == 'mmap' and |
- (not rule.stackfunction_pattern or |
- rule.stackfunction_pattern.match(stackfunction)) and |
- (not rule.stacksourcefile_pattern or |
- rule.stacksourcefile_pattern.match(stacksourcefile)) and |
- (not rule.mappedpathname_pattern or |
- rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and |
- (not rule.mappedpermission_pattern or |
- rule.mappedpermission_pattern.match( |
- region[1]['vma']['readable'] + |
- region[1]['vma']['writable'] + |
- region[1]['vma']['executable'] + |
- region[1]['vma']['private'])) and |
- (not rule.sharedwith or |
- not pageframe or sharedwith in rule.sharedwith)): |
- return rule.name, bucket |
- |
- assert False |
- |
- def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): |
- """Finds a matching component which a given unhooked |region| belongs to. |
- |
- If |pageframe| is given, it considers memory sharing among processes. |
- |
- Args: |
- region: A tuple representing a memory region. |
- pageframe: A PageFrame object representing a pageframe maybe including |
- a pagecount. |
- group_pfn_counts: A dict mapping a PFN to the number of times the |
- the pageframe is mapped by the known "group (Chrome)" processes. |
- |
- Returns: |
- A string representing a component name. |
- """ |
- assert region[0] == 'unhooked' |
- sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) |
- |
- for rule in self._rules: |
- if (rule.allocator_type == 'unhooked' and |
- (not rule.mappedpathname_pattern or |
- rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and |
- (not rule.mappedpermission_pattern or |
- rule.mappedpermission_pattern.match( |
- region[1]['vma']['readable'] + |
- region[1]['vma']['writable'] + |
- region[1]['vma']['executable'] + |
- region[1]['vma']['private'])) and |
- (not rule.sharedwith or |
- not pageframe or sharedwith in rule.sharedwith)): |
- return rule.name |
- |
- assert False |
- |
- @staticmethod |
- def load(filename, filetype): |
- """Loads a policy file of |filename| in a |format|. |
- |
- Args: |
- filename: A filename to be loaded. |
- filetype: A string to specify a type of the file. Only 'json' is |
- supported for now. |
- |
- Returns: |
- A loaded Policy object. |
- """ |
- with open(os.path.join(BASE_PATH, filename)) as policy_f: |
- return Policy.parse(policy_f, filetype) |
- |
- @staticmethod |
- def parse(policy_f, filetype): |
- """Parses a policy file content in a |format|. |
- |
- Args: |
- policy_f: An IO object to be loaded. |
- filetype: A string to specify a type of the file. Only 'json' is |
- supported for now. |
- |
- Returns: |
- A loaded Policy object. |
- """ |
- if filetype == 'json': |
- return Policy._parse_json(policy_f) |
- else: |
- return None |
- |
- @staticmethod |
- def _parse_json(policy_f): |
- """Parses policy file in json format. |
- |
- A policy file contains component's names and their stacktrace pattern |
- written in regular expression. Those patterns are matched against each |
- symbols of each stacktraces in the order written in the policy file |
- |
- Args: |
- policy_f: A File/IO object to read. |
- |
- Returns: |
- A loaded policy object. |
- """ |
- policy = json.load(policy_f) |
- |
- rules = [] |
- for rule in policy['rules']: |
- stackfunction = rule.get('stackfunction') or rule.get('stacktrace') |
- stacksourcefile = rule.get('stacksourcefile') |
- rules.append(Rule( |
- rule['name'], |
- rule['allocator'], # allocator_type |
- stackfunction, |
- stacksourcefile, |
- rule['typeinfo'] if 'typeinfo' in rule else None, |
- rule.get('mappedpathname'), |
- rule.get('mappedpermission'), |
- rule.get('sharedwith'))) |
- |
- return Policy(rules, policy['version'], policy['components']) |
- |
- @staticmethod |
- def _categorize_pageframe(pageframe, group_pfn_counts): |
- """Categorizes a pageframe based on its sharing status. |
- |
- Returns: |
- 'private' if |pageframe| is not shared with other processes. 'group' |
- if |pageframe| is shared only with group (Chrome-related) processes. |
- 'others' if |pageframe| is shared with non-group processes. |
- """ |
- if not pageframe: |
- return 'private' |
- |
- if pageframe.pagecount: |
- if pageframe.pagecount == 1: |
- return 'private' |
- elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: |
- return 'group' |
- else: |
- return 'others' |
- else: |
- if pageframe.pfn in group_pfn_counts: |
- return 'group' |
- else: |
- return 'private' |
- |
- |
-class PolicySet(object): |
- """Represents a set of policies.""" |
- |
- def __init__(self, policy_directory): |
- self._policy_directory = policy_directory |
- |
- @staticmethod |
- def load(labels=None): |
- """Loads a set of policies via the "default policy directory". |
- |
- The "default policy directory" contains pairs of policies and their labels. |
- For example, a policy "policy.l0.json" is labeled "l0" in the default |
- policy directory "policies.json". |
- |
- All policies in the directory are loaded by default. Policies can be |
- limited by |labels|. |
- |
- Args: |
- labels: An array that contains policy labels to be loaded. |
- |
- Returns: |
- A PolicySet object. |
- """ |
- default_policy_directory = PolicySet._load_default_policy_directory() |
- if labels: |
- specified_policy_directory = {} |
- for label in labels: |
- if label in default_policy_directory: |
- specified_policy_directory[label] = default_policy_directory[label] |
- # TODO(dmikurube): Load an un-labeled policy file. |
- return PolicySet._load_policies(specified_policy_directory) |
- else: |
- return PolicySet._load_policies(default_policy_directory) |
- |
- def __len__(self): |
- return len(self._policy_directory) |
- |
- def __iter__(self): |
- for label in self._policy_directory: |
- yield label |
- |
- def __getitem__(self, label): |
- return self._policy_directory[label] |
- |
- @staticmethod |
- def _load_default_policy_directory(): |
- with open(POLICIES_JSON_PATH, mode='r') as policies_f: |
- default_policy_directory = json.load(policies_f) |
- return default_policy_directory |
- |
- @staticmethod |
- def _load_policies(directory): |
- LOGGER.info('Loading policy files.') |
- policies = {} |
- for label in directory: |
- LOGGER.info(' %s: %s' % (label, directory[label]['file'])) |
- loaded = Policy.load(directory[label]['file'], directory[label]['format']) |
- if loaded: |
- policies[label] = loaded |
- return PolicySet(policies) |
- |
- |
-class Bucket(object): |
- """Represents a bucket, which is a unit of memory block classification.""" |
- |
- def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name): |
- self._stacktrace = stacktrace |
- self._allocator_type = allocator_type |
- self._typeinfo = typeinfo |
- self._typeinfo_name = typeinfo_name |
- |
- self._symbolized_stackfunction = stacktrace |
- self._symbolized_joined_stackfunction = '' |
- self._symbolized_stacksourcefile = stacktrace |
- self._symbolized_joined_stacksourcefile = '' |
- self._symbolized_typeinfo = typeinfo_name |
- |
- self.component_cache = '' |
- |
- def __str__(self): |
- result = [] |
- result.append(self._allocator_type) |
- if self._symbolized_typeinfo == 'no typeinfo': |
- result.append('tno_typeinfo') |
- else: |
- result.append('t' + self._symbolized_typeinfo) |
- result.append('n' + self._typeinfo_name) |
- result.extend(['%s(@%s)' % (function, sourcefile) |
- for function, sourcefile |
- in zip(self._symbolized_stackfunction, |
- self._symbolized_stacksourcefile)]) |
- return ' '.join(result) |
- |
- def symbolize(self, symbol_mapping_cache): |
- """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. |
- |
- Args: |
- symbol_mapping_cache: A SymbolMappingCache object. |
- """ |
- # TODO(dmikurube): Fill explicitly with numbers if symbol not found. |
- self._symbolized_stackfunction = [ |
- symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) |
- for address in self._stacktrace] |
- self._symbolized_joined_stackfunction = ' '.join( |
- self._symbolized_stackfunction) |
- self._symbolized_stacksourcefile = [ |
- symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) |
- for address in self._stacktrace] |
- self._symbolized_joined_stacksourcefile = ' '.join( |
- self._symbolized_stacksourcefile) |
- if not self._typeinfo: |
- self._symbolized_typeinfo = 'no typeinfo' |
- else: |
- self._symbolized_typeinfo = symbol_mapping_cache.lookup( |
- TYPEINFO_SYMBOLS, self._typeinfo) |
- if not self._symbolized_typeinfo: |
- self._symbolized_typeinfo = 'no typeinfo' |
- |
- def clear_component_cache(self): |
- self.component_cache = '' |
- |
- @property |
- def stacktrace(self): |
- return self._stacktrace |
- |
- @property |
- def allocator_type(self): |
- return self._allocator_type |
- |
- @property |
- def typeinfo(self): |
- return self._typeinfo |
- |
- @property |
- def typeinfo_name(self): |
- return self._typeinfo_name |
- |
- @property |
- def symbolized_stackfunction(self): |
- return self._symbolized_stackfunction |
- |
- @property |
- def symbolized_joined_stackfunction(self): |
- return self._symbolized_joined_stackfunction |
- |
- @property |
- def symbolized_stacksourcefile(self): |
- return self._symbolized_stacksourcefile |
- |
- @property |
- def symbolized_joined_stacksourcefile(self): |
- return self._symbolized_joined_stacksourcefile |
- |
- @property |
- def symbolized_typeinfo(self): |
- return self._symbolized_typeinfo |
- |
- |
-class BucketSet(object): |
- """Represents a set of bucket.""" |
- def __init__(self): |
- self._buckets = {} |
- self._code_addresses = set() |
- self._typeinfo_addresses = set() |
- |
- def load(self, prefix): |
- """Loads all related bucket files. |
- |
- Args: |
- prefix: A prefix string for bucket file names. |
- """ |
- LOGGER.info('Loading bucket files.') |
- |
- n = 0 |
- skipped = 0 |
- while True: |
- path = '%s.%04d.buckets' % (prefix, n) |
- if not os.path.exists(path) or not os.stat(path).st_size: |
- if skipped > 10: |
- break |
- n += 1 |
- skipped += 1 |
- continue |
- LOGGER.info(' %s' % path) |
- with open(path, 'r') as f: |
- self._load_file(f) |
- n += 1 |
- skipped = 0 |
- |
- def _load_file(self, bucket_f): |
- for line in bucket_f: |
- words = line.split() |
- typeinfo = None |
- typeinfo_name = '' |
- stacktrace_begin = 2 |
- for index, word in enumerate(words): |
- if index < 2: |
- continue |
- if word[0] == 't': |
- typeinfo = int(word[1:], 16) |
- self._typeinfo_addresses.add(typeinfo) |
- elif word[0] == 'n': |
- typeinfo_name = word[1:] |
- else: |
- stacktrace_begin = index |
- break |
- stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] |
- for frame in stacktrace: |
- self._code_addresses.add(frame) |
- self._buckets[int(words[0])] = Bucket( |
- stacktrace, words[1], typeinfo, typeinfo_name) |
- |
- def __iter__(self): |
- for bucket_id, bucket_content in self._buckets.iteritems(): |
- yield bucket_id, bucket_content |
- |
- def __getitem__(self, bucket_id): |
- return self._buckets[bucket_id] |
- |
- def get(self, bucket_id): |
- return self._buckets.get(bucket_id) |
- |
- def symbolize(self, symbol_mapping_cache): |
- for bucket_content in self._buckets.itervalues(): |
- bucket_content.symbolize(symbol_mapping_cache) |
- |
- def clear_component_cache(self): |
- for bucket_content in self._buckets.itervalues(): |
- bucket_content.clear_component_cache() |
- |
- def iter_addresses(self, symbol_type): |
- if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: |
- for function in self._code_addresses: |
- yield function |
- else: |
- for function in self._typeinfo_addresses: |
- yield function |
- |
- |
-class PageFrame(object): |
- """Represents a pageframe and maybe its shared count.""" |
- def __init__(self, pfn, size, pagecount, start_truncated, end_truncated): |
- self._pfn = pfn |
- self._size = size |
- self._pagecount = pagecount |
- self._start_truncated = start_truncated |
- self._end_truncated = end_truncated |
- |
- def __str__(self): |
- result = str() |
- if self._start_truncated: |
- result += '<' |
- result += '%06x#%d' % (self._pfn, self._pagecount) |
- if self._end_truncated: |
- result += '>' |
- return result |
- |
- def __repr__(self): |
- return str(self) |
- |
- @staticmethod |
- def parse(encoded_pfn, size): |
- start = 0 |
- end = len(encoded_pfn) |
- end_truncated = False |
- if encoded_pfn.endswith('>'): |
- end = len(encoded_pfn) - 1 |
- end_truncated = True |
- pagecount_found = encoded_pfn.find('#') |
- pagecount = None |
- if pagecount_found >= 0: |
- encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end] |
- pagecount = struct.unpack( |
- '>I', '\x00' + encoded_pagecount.decode('base64'))[0] |
- end = pagecount_found |
- start_truncated = False |
- if encoded_pfn.startswith('<'): |
- start = 1 |
- start_truncated = True |
- |
- pfn = struct.unpack( |
- '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0] |
- |
- return PageFrame(pfn, size, pagecount, start_truncated, end_truncated) |
- |
- @property |
- def pfn(self): |
- return self._pfn |
- |
- @property |
- def size(self): |
- return self._size |
- |
- def set_size(self, size): |
- self._size = size |
- |
- @property |
- def pagecount(self): |
- return self._pagecount |
- |
- @property |
- def start_truncated(self): |
- return self._start_truncated |
- |
- @property |
- def end_truncated(self): |
- return self._end_truncated |
- |
- |
-class PFNCounts(object): |
- """Represents counts of PFNs in a process.""" |
- |
- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
- |
- def __init__(self, path, modified_time): |
- matched = self._PATH_PATTERN.match(path) |
- if matched: |
- self._pid = int(matched.group(2)) |
- else: |
- self._pid = 0 |
- self._command_line = '' |
- self._pagesize = 4096 |
- self._path = path |
- self._pfn_meta = '' |
- self._pfnset = {} |
- self._reason = '' |
- self._time = modified_time |
- |
- @staticmethod |
- def load(path, log_header='Loading PFNs from a heap profile dump: '): |
- pfnset = PFNCounts(path, float(os.stat(path).st_mtime)) |
- LOGGER.info('%s%s' % (log_header, path)) |
- |
- with open(path, 'r') as pfnset_f: |
- pfnset.load_file(pfnset_f) |
- |
- return pfnset |
- |
- @property |
- def path(self): |
- return self._path |
- |
- @property |
- def pid(self): |
- return self._pid |
- |
- @property |
- def time(self): |
- return self._time |
- |
- @property |
- def reason(self): |
- return self._reason |
- |
- @property |
- def iter_pfn(self): |
- for pfn, count in self._pfnset.iteritems(): |
- yield pfn, count |
- |
- def load_file(self, pfnset_f): |
- prev_pfn_end_truncated = None |
- for line in pfnset_f: |
- line = line.strip() |
- if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'): |
- break |
- elif line.startswith('PF: '): |
- for encoded_pfn in line[3:].split(): |
- page_frame = PageFrame.parse(encoded_pfn, self._pagesize) |
- if page_frame.start_truncated and ( |
- not prev_pfn_end_truncated or |
- prev_pfn_end_truncated != page_frame.pfn): |
- LOGGER.error('Broken page frame number: %s.' % encoded_pfn) |
- self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1 |
- if page_frame.end_truncated: |
- prev_pfn_end_truncated = page_frame.pfn |
- else: |
- prev_pfn_end_truncated = None |
- elif line.startswith('PageSize: '): |
- self._pagesize = int(line[10:]) |
- elif line.startswith('PFN: '): |
- self._pfn_meta = line[5:] |
- elif line.startswith('PageFrame: '): |
- self._pfn_meta = line[11:] |
- elif line.startswith('Time: '): |
- self._time = float(line[6:]) |
- elif line.startswith('CommandLine: '): |
- self._command_line = line[13:] |
- elif line.startswith('Reason: '): |
- self._reason = line[8:] |
- |
- |
-class Dump(object): |
- """Represents a heap profile dump.""" |
- |
- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
- |
- _HOOK_PATTERN = re.compile( |
- r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' |
- r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) |
- |
- _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
- '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') |
- _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
- '(?P<RESERVED>[0-9]+)') |
- |
- _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') |
- _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') |
- |
- _TIME_PATTERN_FORMAT = re.compile( |
- r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') |
- _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') |
- |
- def __init__(self, path, modified_time): |
- self._path = path |
- matched = self._PATH_PATTERN.match(path) |
- self._pid = int(matched.group(2)) |
- self._count = int(matched.group(3)) |
- self._time = modified_time |
- self._map = {} |
- self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) |
- self._stacktrace_lines = [] |
- self._global_stats = {} # used only in apply_policy |
- |
- self._run_id = '' |
- self._pagesize = 4096 |
- self._pageframe_length = 0 |
- self._pageframe_encoding = '' |
- self._has_pagecount = False |
- |
- self._version = '' |
- self._lines = [] |
- |
- @property |
- def path(self): |
- return self._path |
- |
- @property |
- def count(self): |
- return self._count |
- |
- @property |
- def time(self): |
- return self._time |
- |
- @property |
- def iter_map(self): |
- for region in sorted(self._map.iteritems()): |
- yield region[0], region[1] |
- |
- def iter_procmaps(self): |
- for begin, end, attr in self._map.iter_range(): |
- yield begin, end, attr |
- |
- @property |
- def iter_stacktrace(self): |
- for line in self._stacktrace_lines: |
- yield line |
- |
- def global_stat(self, name): |
- return self._global_stats[name] |
- |
- @property |
- def run_id(self): |
- return self._run_id |
- |
- @property |
- def pagesize(self): |
- return self._pagesize |
- |
- @property |
- def pageframe_length(self): |
- return self._pageframe_length |
- |
- @property |
- def pageframe_encoding(self): |
- return self._pageframe_encoding |
- |
- @property |
- def has_pagecount(self): |
- return self._has_pagecount |
- |
- @staticmethod |
- def load(path, log_header='Loading a heap profile dump: '): |
- """Loads a heap profile dump. |
- |
- Args: |
- path: A file path string to load. |
- log_header: A preceding string for log messages. |
- |
- Returns: |
- A loaded Dump object. |
- |
- Raises: |
- ParsingException for invalid heap profile dumps. |
- """ |
- dump = Dump(path, os.stat(path).st_mtime) |
- with open(path, 'r') as f: |
- dump.load_file(f, log_header) |
- return dump |
- |
- def load_file(self, f, log_header): |
- self._lines = [line for line in f |
- if line and not line.startswith('#')] |
- |
- try: |
- self._version, ln = self._parse_version() |
- self._parse_meta_information() |
- if self._version == DUMP_DEEP_6: |
- self._parse_mmap_list() |
- self._parse_global_stats() |
- self._extract_stacktrace_lines(ln) |
- except EmptyDumpException: |
- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) |
- except ParsingException, e: |
- LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) |
- raise |
- else: |
- LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) |
- |
- def _parse_version(self): |
- """Parses a version string in self._lines. |
- |
- Returns: |
- A pair of (a string representing a version of the stacktrace dump, |
- and an integer indicating a line number next to the version string). |
- |
- Raises: |
- ParsingException for invalid dump versions. |
- """ |
- version = '' |
- |
- # Skip until an identifiable line. |
- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
- if not self._lines: |
- raise EmptyDumpException('Empty heap dump file.') |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: not self._lines[n].startswith(headers)) |
- if not found: |
- raise InvalidDumpException('No version header.') |
- |
- # Identify a version. |
- if self._lines[ln].startswith('heap profile: '): |
- version = self._lines[ln][13:].strip() |
- if version in (DUMP_DEEP_5, DUMP_DEEP_6): |
- (ln, _) = skip_while( |
- ln, len(self._lines), |
- lambda n: self._lines[n] != 'STACKTRACES:\n') |
- elif version in DUMP_DEEP_OBSOLETE: |
- raise ObsoleteDumpVersionException(version) |
- else: |
- raise InvalidDumpException('Invalid version: %s' % version) |
- elif self._lines[ln] == 'STACKTRACES:\n': |
- raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
- elif self._lines[ln] == 'MMAP_STACKTRACES:\n': |
- raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
- |
- return (version, ln) |
- |
- def _parse_global_stats(self): |
- """Parses lines in self._lines as global stats.""" |
- (ln, _) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'GLOBAL_STATS:\n') |
- |
- global_stat_names = [ |
- 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', |
- 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', |
- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
- 'nonprofiled-stack', 'nonprofiled-other', |
- 'profiled-mmap', 'profiled-malloc'] |
- |
- for prefix in global_stat_names: |
- (ln, _) = skip_while( |
- ln, len(self._lines), |
- lambda n: self._lines[n].split()[0] != prefix) |
- words = self._lines[ln].split() |
- self._global_stats[prefix + '_virtual'] = int(words[-2]) |
- self._global_stats[prefix + '_committed'] = int(words[-1]) |
- |
- def _parse_meta_information(self): |
- """Parses lines in self._lines for meta information.""" |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'META:\n') |
- if not found: |
- return |
- ln += 1 |
- |
- while True: |
- if self._lines[ln].startswith('Time:'): |
- matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) |
- matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) |
- if matched_format: |
- self._time = time.mktime(datetime.datetime.strptime( |
- matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) |
- if matched_format.group(2): |
- self._time += float(matched_format.group(2)[1:]) / 1000.0 |
- elif matched_seconds: |
- self._time = float(matched_seconds.group(1)) |
- elif self._lines[ln].startswith('Reason:'): |
- pass # Nothing to do for 'Reason:' |
- elif self._lines[ln].startswith('PageSize: '): |
- self._pagesize = int(self._lines[ln][10:]) |
- elif self._lines[ln].startswith('CommandLine:'): |
- pass |
- elif (self._lines[ln].startswith('PageFrame: ') or |
- self._lines[ln].startswith('PFN: ')): |
- if self._lines[ln].startswith('PageFrame: '): |
- words = self._lines[ln][11:].split(',') |
- else: |
- words = self._lines[ln][5:].split(',') |
- for word in words: |
- if word == '24': |
- self._pageframe_length = 24 |
- elif word == 'Base64': |
- self._pageframe_encoding = 'base64' |
- elif word == 'PageCount': |
- self._has_pagecount = True |
- elif self._lines[ln].startswith('RunID: '): |
- self._run_id = self._lines[ln][7:].strip() |
- elif (self._lines[ln].startswith('MMAP_LIST:') or |
- self._lines[ln].startswith('GLOBAL_STATS:')): |
- # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. |
- break |
- else: |
- pass |
- ln += 1 |
- |
- def _parse_mmap_list(self): |
- """Parses lines in self._lines as a mmap list.""" |
- (ln, found) = skip_while( |
- 0, len(self._lines), |
- lambda n: self._lines[n] != 'MMAP_LIST:\n') |
- if not found: |
- return {} |
- |
- ln += 1 |
- self._map = {} |
- current_vma = {} |
- pageframe_list = [] |
- while True: |
- entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) |
- if entry: |
- current_vma = {} |
- for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): |
- for key, value in entry.as_dict().iteritems(): |
- attr[key] = value |
- current_vma[key] = value |
- ln += 1 |
- continue |
- |
- if self._lines[ln].startswith(' PF: '): |
- for pageframe in self._lines[ln][5:].split(): |
- pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) |
- ln += 1 |
- continue |
- |
- matched = self._HOOK_PATTERN.match(self._lines[ln]) |
- if not matched: |
- break |
- # 2: starting address |
- # 5: end address |
- # 7: hooked or unhooked |
- # 8: additional information |
- if matched.group(7) == 'hooked': |
- submatched = self._HOOKED_PATTERN.match(matched.group(8)) |
- if not submatched: |
- submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) |
- elif matched.group(7) == 'unhooked': |
- submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) |
- if not submatched: |
- submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) |
- else: |
- assert matched.group(7) in ['hooked', 'unhooked'] |
- |
- submatched_dict = submatched.groupdict() |
- region_info = { 'vma': current_vma } |
- if submatched_dict.get('TYPE'): |
- region_info['type'] = submatched_dict['TYPE'].strip() |
- if submatched_dict.get('COMMITTED'): |
- region_info['committed'] = int(submatched_dict['COMMITTED']) |
- if submatched_dict.get('RESERVED'): |
- region_info['reserved'] = int(submatched_dict['RESERVED']) |
- if submatched_dict.get('BUCKETID'): |
- region_info['bucket_id'] = int(submatched_dict['BUCKETID']) |
- |
- if matched.group(1) == '(': |
- start = current_vma['begin'] |
- else: |
- start = int(matched.group(2), 16) |
- if matched.group(4) == '(': |
- end = current_vma['end'] |
- else: |
- end = int(matched.group(5), 16) |
- |
- if pageframe_list and pageframe_list[0].start_truncated: |
- pageframe_list[0].set_size( |
- pageframe_list[0].size - start % self._pagesize) |
- if pageframe_list and pageframe_list[-1].end_truncated: |
- pageframe_list[-1].set_size( |
- pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) |
- region_info['pageframe'] = pageframe_list |
- pageframe_list = [] |
- |
- self._map[(start, end)] = (matched.group(7), region_info) |
- ln += 1 |
- |
- def _extract_stacktrace_lines(self, line_number): |
- """Extracts the position of stacktrace lines. |
- |
- Valid stacktrace lines are stored into self._stacktrace_lines. |
- |
- Args: |
- line_number: A line number to start parsing in lines. |
- |
- Raises: |
- ParsingException for invalid dump versions. |
- """ |
- if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): |
- (line_number, _) = skip_while( |
- line_number, len(self._lines), |
- lambda n: not self._lines[n].split()[0].isdigit()) |
- stacktrace_start = line_number |
- (line_number, _) = skip_while( |
- line_number, len(self._lines), |
- lambda n: self._check_stacktrace_line(self._lines[n])) |
- self._stacktrace_lines = self._lines[stacktrace_start:line_number] |
- |
- elif self._version in DUMP_DEEP_OBSOLETE: |
- raise ObsoleteDumpVersionException(self._version) |
- |
- else: |
- raise InvalidDumpException('Invalid version: %s' % self._version) |
- |
- @staticmethod |
- def _check_stacktrace_line(stacktrace_line): |
- """Checks if a given stacktrace_line is valid as stacktrace. |
- |
- Args: |
- stacktrace_line: A string to be checked. |
- |
- Returns: |
- True if the given stacktrace_line is valid. |
- """ |
- words = stacktrace_line.split() |
- if len(words) < BUCKET_ID + 1: |
- return False |
- if words[BUCKET_ID - 1] != '@': |
- return False |
- return True |
- |
- |
-class DumpList(object): |
- """Represents a sequence of heap profile dumps.""" |
- |
- def __init__(self, dump_list): |
- self._dump_list = dump_list |
- |
- @staticmethod |
- def load(path_list): |
- LOGGER.info('Loading heap dump profiles.') |
- dump_list = [] |
- for path in path_list: |
- dump_list.append(Dump.load(path, ' ')) |
- return DumpList(dump_list) |
- |
- def __len__(self): |
- return len(self._dump_list) |
- |
- def __iter__(self): |
- for dump in self._dump_list: |
- yield dump |
- |
- def __getitem__(self, index): |
- return self._dump_list[index] |
- |
- |
-class Unit(object): |
- """Represents a minimum unit of memory usage categorization. |
- |
- It is supposed to be inherited for some different spaces like the entire |
- virtual memory and malloc arena. Such different spaces are called "worlds" |
- in dmprof. (For example, the "vm" world and the "malloc" world.) |
- """ |
- def __init__(self, unit_id, size): |
- self._unit_id = unit_id |
- self._size = size |
- |
- @property |
- def unit_id(self): |
- return self._unit_id |
- |
- @property |
- def size(self): |
- return self._size |
- |
- |
-class VMUnit(Unit): |
- """Represents a Unit for a memory region on virtual memory.""" |
- def __init__(self, unit_id, committed, reserved, mmap, region, |
- pageframe=None, group_pfn_counts=None): |
- super(VMUnit, self).__init__(unit_id, committed) |
- self._reserved = reserved |
- self._mmap = mmap |
- self._region = region |
- self._pageframe = pageframe |
- self._group_pfn_counts = group_pfn_counts |
- |
- @property |
- def committed(self): |
- return self._size |
- |
- @property |
- def reserved(self): |
- return self._reserved |
- |
- @property |
- def mmap(self): |
- return self._mmap |
- |
- @property |
- def region(self): |
- return self._region |
- |
- @property |
- def pageframe(self): |
- return self._pageframe |
- |
- @property |
- def group_pfn_counts(self): |
- return self._group_pfn_counts |
- |
- |
-class MMapUnit(VMUnit): |
- """Represents a Unit for a mmap'ed region.""" |
- def __init__(self, unit_id, committed, reserved, region, bucket_set, |
- pageframe=None, group_pfn_counts=None): |
- super(MMapUnit, self).__init__(unit_id, committed, reserved, True, |
- region, pageframe, group_pfn_counts) |
- self._bucket_set = bucket_set |
- |
- def __repr__(self): |
- return str(self.region) |
- |
- @property |
- def bucket_set(self): |
- return self._bucket_set |
- |
- |
-class UnhookedUnit(VMUnit): |
- """Represents a Unit for a non-mmap'ed memory region on virtual memory.""" |
- def __init__(self, unit_id, committed, reserved, region, |
- pageframe=None, group_pfn_counts=None): |
- super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False, |
- region, pageframe, group_pfn_counts) |
- |
- def __repr__(self): |
- return str(self.region) |
- |
- |
-class MallocUnit(Unit): |
- """Represents a Unit for a malloc'ed memory block.""" |
- def __init__(self, unit_id, size, alloc_count, free_count, bucket): |
- super(MallocUnit, self).__init__(unit_id, size) |
- self._bucket = bucket |
- self._alloc_count = alloc_count |
- self._free_count = free_count |
- |
- def __repr__(self): |
- return str(self.bucket) |
- |
- @property |
- def bucket(self): |
- return self._bucket |
- |
- @property |
- def alloc_count(self): |
- return self._alloc_count |
- |
- @property |
- def free_count(self): |
- return self._free_count |
- |
- |
-class UnitSet(object): |
- """Represents an iterable set of Units.""" |
- def __init__(self, world): |
- self._units = {} |
- self._world = world |
- |
- def __repr__(self): |
- return str(self._units) |
- |
- def __iter__(self): |
- for unit_id in sorted(self._units): |
- yield self._units[unit_id] |
- |
- def append(self, unit, overwrite=False): |
- if not overwrite and unit.unit_id in self._units: |
- LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id)) |
- self._units[unit.unit_id] = unit |
- |
- |
-class AbstractRule(object): |
- """An abstract class for rules to be matched with units.""" |
- def __init__(self, dct): |
- self._name = dct['name'] |
- self._hidden = dct.get('hidden', False) |
- self._subworlds = dct.get('subworlds', []) |
- |
- def match(self, unit): |
- raise NotImplementedError() |
- |
- @property |
- def name(self): |
- return self._name |
- |
- @property |
- def hidden(self): |
- return self._hidden |
- |
- def iter_subworld(self): |
- for subworld in self._subworlds: |
- yield subworld |
- |
- |
-class VMRule(AbstractRule): |
- """Represents a Rule to match with virtual memory regions.""" |
- def __init__(self, dct): |
- super(VMRule, self).__init__(dct) |
- self._backtrace_function = dct.get('backtrace_function', None) |
- if self._backtrace_function: |
- self._backtrace_function = re.compile(self._backtrace_function) |
- self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) |
- if self._backtrace_sourcefile: |
- self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) |
- self._mmap = dct.get('mmap', None) |
- self._sharedwith = dct.get('sharedwith', []) |
- self._mapped_pathname = dct.get('mapped_pathname', None) |
- if self._mapped_pathname: |
- self._mapped_pathname = re.compile(self._mapped_pathname) |
- self._mapped_permission = dct.get('mapped_permission', None) |
- if self._mapped_permission: |
- self._mapped_permission = re.compile(self._mapped_permission) |
- |
- def __repr__(self): |
- result = cStringIO.StringIO() |
- result.write('{"%s"=>' % self._name) |
- attributes = [] |
- attributes.append('mmap: %s' % self._mmap) |
- if self._backtrace_function: |
- attributes.append('backtrace_function: "%s"' % |
- self._backtrace_function.pattern) |
- if self._sharedwith: |
- attributes.append('sharedwith: "%s"' % self._sharedwith) |
- if self._mapped_pathname: |
- attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern) |
- if self._mapped_permission: |
- attributes.append('mapped_permission: "%s"' % |
- self._mapped_permission.pattern) |
- result.write('%s}' % ', '.join(attributes)) |
- return result.getvalue() |
- |
- def match(self, unit): |
- if unit.mmap: |
- assert unit.region[0] == 'hooked' |
- bucket = unit.bucket_set.get(unit.region[1]['bucket_id']) |
- assert bucket |
- assert bucket.allocator_type == 'mmap' |
- |
- stackfunction = bucket.symbolized_joined_stackfunction |
- stacksourcefile = bucket.symbolized_joined_stacksourcefile |
- |
- # TODO(dmikurube): Support shared memory. |
- sharedwith = None |
- |
- if self._mmap == False: # (self._mmap == None) should go through. |
- return False |
- if (self._backtrace_function and |
- not self._backtrace_function.match(stackfunction)): |
- return False |
- if (self._backtrace_sourcefile and |
- not self._backtrace_sourcefile.match(stacksourcefile)): |
- return False |
- if (self._mapped_pathname and |
- not self._mapped_pathname.match(unit.region[1]['vma']['name'])): |
- return False |
- if (self._mapped_permission and |
- not self._mapped_permission.match( |
- unit.region[1]['vma']['readable'] + |
- unit.region[1]['vma']['writable'] + |
- unit.region[1]['vma']['executable'] + |
- unit.region[1]['vma']['private'])): |
- return False |
- if (self._sharedwith and |
- unit.pageframe and sharedwith not in self._sharedwith): |
- return False |
- |
- return True |
- |
- else: |
- assert unit.region[0] == 'unhooked' |
- |
- # TODO(dmikurube): Support shared memory. |
- sharedwith = None |
- |
- if self._mmap == True: # (self._mmap == None) should go through. |
- return False |
- if (self._mapped_pathname and |
- not self._mapped_pathname.match(unit.region[1]['vma']['name'])): |
- return False |
- if (self._mapped_permission and |
- not self._mapped_permission.match( |
- unit.region[1]['vma']['readable'] + |
- unit.region[1]['vma']['writable'] + |
- unit.region[1]['vma']['executable'] + |
- unit.region[1]['vma']['private'])): |
- return False |
- if (self._sharedwith and |
- unit.pageframe and sharedwith not in self._sharedwith): |
- return False |
- |
- return True |
- |
- |
-class MallocRule(AbstractRule): |
- """Represents a Rule to match with malloc'ed blocks.""" |
- def __init__(self, dct): |
- super(MallocRule, self).__init__(dct) |
- self._backtrace_function = dct.get('backtrace_function', None) |
- if self._backtrace_function: |
- self._backtrace_function = re.compile(self._backtrace_function) |
- self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) |
- if self._backtrace_sourcefile: |
- self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) |
- self._typeinfo = dct.get('typeinfo', None) |
- if self._typeinfo: |
- self._typeinfo = re.compile(self._typeinfo) |
- |
- def __repr__(self): |
- result = cStringIO.StringIO() |
- result.write('{"%s"=>' % self._name) |
- attributes = [] |
- if self._backtrace_function: |
- attributes.append('backtrace_function: "%s"' % self._backtrace_function) |
- if self._typeinfo: |
- attributes.append('typeinfo: "%s"' % self._typeinfo) |
- result.write('%s}' % ', '.join(attributes)) |
- return result.getvalue() |
- |
- def match(self, unit): |
- assert unit.bucket.allocator_type == 'malloc' |
- |
- stackfunction = unit.bucket.symbolized_joined_stackfunction |
- stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile |
- typeinfo = unit.bucket.symbolized_typeinfo |
- if typeinfo.startswith('0x'): |
- typeinfo = unit.bucket.typeinfo_name |
- |
- return ((not self._backtrace_function or |
- self._backtrace_function.match(stackfunction)) and |
- (not self._backtrace_sourcefile or |
- self._backtrace_sourcefile.match(stacksourcefile)) and |
- (not self._typeinfo or self._typeinfo.match(typeinfo))) |
- |
- |
-class NoBucketMallocRule(MallocRule): |
- """Represents a Rule that small ignorable units match with.""" |
- def __init__(self): |
- super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'}) |
- self._no_bucket = True |
- |
- @property |
- def no_bucket(self): |
- return self._no_bucket |
- |
- |
-class AbstractSorter(object): |
- """An abstract class for classifying Units with a set of Rules.""" |
- def __init__(self, dct): |
- self._type = 'sorter' |
- self._version = dct['version'] |
- self._world = dct['world'] |
- self._name = dct['name'] |
- self._order = dct['order'] |
- |
- self._rules = [] |
- for rule in dct['rules']: |
- if dct['world'] == 'vm': |
- self._rules.append(VMRule(rule)) |
- elif dct['world'] == 'malloc': |
- self._rules.append(MallocRule(rule)) |
- else: |
- LOGGER.error('Unknown sorter world type') |
- |
- def __repr__(self): |
- result = cStringIO.StringIO() |
- result.write('world=%s' % self._world) |
- result.write('order=%s' % self._order) |
- result.write('rules:') |
- for rule in self._rules: |
- result.write(' %s' % rule) |
- return result.getvalue() |
- |
- @staticmethod |
- def load(filename): |
- with open(filename) as sorter_f: |
- sorter_dict = json.load(sorter_f) |
- if sorter_dict['world'] == 'vm': |
- return VMSorter(sorter_dict) |
- elif sorter_dict['world'] == 'malloc': |
- return MallocSorter(sorter_dict) |
- else: |
- LOGGER.error('Unknown sorter world type') |
- return None |
- |
- @property |
- def world(self): |
- return self._world |
- |
- @property |
- def name(self): |
- return self._name |
- |
- def find(self, unit): |
- raise NotImplementedError() |
- |
- def find_rule(self, name): |
- """Finds a rule whose name is |name|. """ |
- for rule in self._rules: |
- if rule.name == name: |
- return rule |
- return None |
- |
- |
-class VMSorter(AbstractSorter): |
- """Represents a Sorter for memory regions on virtual memory.""" |
- def __init__(self, dct): |
- assert dct['world'] == 'vm' |
- super(VMSorter, self).__init__(dct) |
- |
- def find(self, unit): |
- for rule in self._rules: |
- if rule.match(unit): |
- return rule |
- assert False |
- |
- |
-class MallocSorter(AbstractSorter): |
- """Represents a Sorter for malloc'ed blocks.""" |
- def __init__(self, dct): |
- assert dct['world'] == 'malloc' |
- super(MallocSorter, self).__init__(dct) |
- self._no_bucket_rule = NoBucketMallocRule() |
- |
- def find(self, unit): |
- if not unit.bucket: |
- return self._no_bucket_rule |
- assert unit.bucket.allocator_type == 'malloc' |
- |
- if unit.bucket.component_cache: |
- return unit.bucket.component_cache |
- |
- for rule in self._rules: |
- if rule.match(unit): |
- unit.bucket.component_cache = rule |
- return rule |
- assert False |
- |
- |
-class SorterSet(object): |
- """Represents an iterable set of Sorters.""" |
- def __init__(self, additional=None, default=None): |
- if not additional: |
- additional = [] |
- if not default: |
- default = DEFAULT_SORTERS |
- self._sorters = {} |
- for filename in default + additional: |
- sorter = AbstractSorter.load(filename) |
- if sorter.world not in self._sorters: |
- self._sorters[sorter.world] = [] |
- self._sorters[sorter.world].append(sorter) |
- |
- def __repr__(self): |
- result = cStringIO.StringIO() |
- result.write(self._sorters) |
- return result.getvalue() |
- |
- def __iter__(self): |
- for sorters in self._sorters.itervalues(): |
- for sorter in sorters: |
- yield sorter |
- |
- def iter_world(self, world): |
- for sorter in self._sorters.get(world, []): |
- yield sorter |
- |
- |
-class Command(object): |
- """Subclasses are a subcommand for this executable. |
- |
- See COMMANDS in main(). |
- """ |
- _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp'] |
- |
- def __init__(self, usage): |
- self._parser = optparse.OptionParser(usage) |
- |
- @staticmethod |
- def load_basic_files( |
- dump_path, multiple, no_dump=False, alternative_dirs=None): |
- prefix = Command._find_prefix(dump_path) |
- # If the target process is estimated to be working on Android, converts |
- # a path in the Android device to a path estimated to be corresponding in |
- # the host. Use --alternative-dirs to specify the conversion manually. |
- if not alternative_dirs: |
- alternative_dirs = Command._estimate_alternative_dirs(prefix) |
- if alternative_dirs: |
- for device, host in alternative_dirs.iteritems(): |
- LOGGER.info('Assuming %s on device as %s on host' % (device, host)) |
- symbol_data_sources = SymbolDataSources(prefix, alternative_dirs) |
- symbol_data_sources.prepare() |
- bucket_set = BucketSet() |
- bucket_set.load(prefix) |
- if not no_dump: |
- if multiple: |
- dump_list = DumpList.load(Command._find_all_dumps(dump_path)) |
- else: |
- dump = Dump.load(dump_path) |
- symbol_mapping_cache = SymbolMappingCache() |
- with open(prefix + '.cache.function', 'a+') as cache_f: |
- symbol_mapping_cache.update( |
- FUNCTION_SYMBOLS, bucket_set, |
- SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) |
- with open(prefix + '.cache.typeinfo', 'a+') as cache_f: |
- symbol_mapping_cache.update( |
- TYPEINFO_SYMBOLS, bucket_set, |
- SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) |
- with open(prefix + '.cache.sourcefile', 'a+') as cache_f: |
- symbol_mapping_cache.update( |
- SOURCEFILE_SYMBOLS, bucket_set, |
- SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) |
- bucket_set.symbolize(symbol_mapping_cache) |
- if no_dump: |
- return bucket_set |
- elif multiple: |
- return (bucket_set, dump_list) |
- else: |
- return (bucket_set, dump) |
- |
- @staticmethod |
- def _find_prefix(path): |
- return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
- |
- @staticmethod |
- def _estimate_alternative_dirs(prefix): |
- """Estimates a path in host from a corresponding path in target device. |
- |
- For Android, dmprof.py should find symbol information from binaries in |
- the host instead of the Android device because dmprof.py doesn't run on |
- the Android device. This method estimates a path in the host |
- corresponding to a path in the Android device. |
- |
- Returns: |
- A dict that maps a path in the Android device to a path in the host. |
- If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it |
- assumes the process was running on Android and maps the path to |
- "out/Debug/lib" in the Chromium directory. An empty dict is returned |
- unless Android. |
- """ |
- device_lib_path_candidates = set() |
- |
- with open(prefix + '.maps') as maps_f: |
- maps = proc_maps.ProcMaps.load(maps_f) |
- for entry in maps: |
- name = entry.as_dict()['name'] |
- if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]): |
- device_lib_path_candidates.add(os.path.dirname(name)) |
- |
- if len(device_lib_path_candidates) == 1: |
- return {device_lib_path_candidates.pop(): os.path.join( |
- CHROME_SRC_PATH, 'out', 'Debug', 'lib')} |
- else: |
- return {} |
- |
- @staticmethod |
- def _find_all_dumps(dump_path): |
- prefix = Command._find_prefix(dump_path) |
- dump_path_list = [dump_path] |
- |
- n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) |
- n += 1 |
- skipped = 0 |
- while True: |
- p = '%s.%04d.heap' % (prefix, n) |
- if os.path.exists(p) and os.stat(p).st_size: |
- dump_path_list.append(p) |
- else: |
- if skipped > 10: |
- break |
- skipped += 1 |
- n += 1 |
- |
- return dump_path_list |
- |
- @staticmethod |
- def _find_all_buckets(dump_path): |
- prefix = Command._find_prefix(dump_path) |
- bucket_path_list = [] |
- |
- n = 0 |
- while True: |
- path = '%s.%04d.buckets' % (prefix, n) |
- if not os.path.exists(path): |
- if n > 10: |
- break |
- n += 1 |
- continue |
- bucket_path_list.append(path) |
- n += 1 |
- |
- return bucket_path_list |
- |
- def _parse_args(self, sys_argv, required): |
- options, args = self._parser.parse_args(sys_argv) |
- if len(args) < required + 1: |
- self._parser.error('needs %d argument(s).\n' % required) |
- return None |
- return (options, args) |
- |
- @staticmethod |
- def _parse_policy_list(options_policy): |
- if options_policy: |
- return options_policy.split(',') |
- else: |
- return None |
- |
- |
-class BucketsCommand(Command): |
- def __init__(self): |
- super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>') |
- |
- def do(self, sys_argv, out=sys.stdout): |
- _, args = self._parse_args(sys_argv, 1) |
- dump_path = args[1] |
- bucket_set = Command.load_basic_files(dump_path, True, True) |
- |
- BucketsCommand._output(bucket_set, out) |
- return 0 |
- |
- @staticmethod |
- def _output(bucket_set, out): |
- """Prints all buckets with resolving symbols. |
- |
- Args: |
- bucket_set: A BucketSet object. |
- out: An IO object to output. |
- """ |
- for bucket_id, bucket in sorted(bucket_set): |
- out.write('%d: %s\n' % (bucket_id, bucket)) |
- |
- |
-class StacktraceCommand(Command): |
- def __init__(self): |
- super(StacktraceCommand, self).__init__( |
- 'Usage: %prog stacktrace <dump>') |
- |
- def do(self, sys_argv): |
- _, args = self._parse_args(sys_argv, 1) |
- dump_path = args[1] |
- (bucket_set, dump) = Command.load_basic_files(dump_path, False) |
- |
- StacktraceCommand._output(dump, bucket_set, sys.stdout) |
- return 0 |
- |
- @staticmethod |
- def _output(dump, bucket_set, out): |
- """Outputs a given stacktrace. |
- |
- Args: |
- bucket_set: A BucketSet object. |
- out: A file object to output. |
- """ |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if not bucket: |
- continue |
- for i in range(0, BUCKET_ID - 1): |
- out.write(words[i] + ' ') |
- for frame in bucket.symbolized_stackfunction: |
- out.write(frame + ' ') |
- out.write('\n') |
- |
- |
-class PolicyCommands(Command): |
- def __init__(self, command): |
- super(PolicyCommands, self).__init__( |
- 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' % |
- command) |
- self._parser.add_option('-p', '--policy', type='string', dest='policy', |
- help='profile with POLICY', metavar='POLICY') |
- self._parser.add_option('--alternative-dirs', dest='alternative_dirs', |
- metavar='/path/on/target@/path/on/host[:...]', |
- help='Read files in /path/on/host/ instead of ' |
- 'files in /path/on/target/.') |
- |
- def _set_up(self, sys_argv): |
- options, args = self._parse_args(sys_argv, 1) |
- dump_path = args[1] |
- shared_first_dump_paths = args[2:] |
- alternative_dirs_dict = {} |
- if options.alternative_dirs: |
- for alternative_dir_pair in options.alternative_dirs.split(':'): |
- target_path, host_path = alternative_dir_pair.split('@', 1) |
- alternative_dirs_dict[target_path] = host_path |
- (bucket_set, dumps) = Command.load_basic_files( |
- dump_path, True, alternative_dirs=alternative_dirs_dict) |
- |
- pfn_counts_dict = {} |
- for shared_first_dump_path in shared_first_dump_paths: |
- shared_dumps = Command._find_all_dumps(shared_first_dump_path) |
- for shared_dump in shared_dumps: |
- pfn_counts = PFNCounts.load(shared_dump) |
- if pfn_counts.pid not in pfn_counts_dict: |
- pfn_counts_dict[pfn_counts.pid] = [] |
- pfn_counts_dict[pfn_counts.pid].append(pfn_counts) |
- |
- policy_set = PolicySet.load(Command._parse_policy_list(options.policy)) |
- return policy_set, dumps, pfn_counts_dict, bucket_set |
- |
- @staticmethod |
- def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time): |
- """Aggregates the total memory size of each component. |
- |
- Iterate through all stacktraces and attribute them to one of the components |
- based on the policy. It is important to apply policy in right order. |
- |
- Args: |
- dump: A Dump object. |
- pfn_counts_dict: A dict mapping a pid to a list of PFNCounts. |
- policy: A Policy object. |
- bucket_set: A BucketSet object. |
- first_dump_time: An integer representing time when the first dump is |
- dumped. |
- |
- Returns: |
- A dict mapping components and their corresponding sizes. |
- """ |
- LOGGER.info(' %s' % dump.path) |
- all_pfn_dict = {} |
- if pfn_counts_dict: |
- LOGGER.info(' shared with...') |
- for pid, pfnset_list in pfn_counts_dict.iteritems(): |
- closest_pfnset_index = None |
- closest_pfnset_difference = 1024.0 |
- for index, pfnset in enumerate(pfnset_list): |
- time_difference = pfnset.time - dump.time |
- if time_difference >= 3.0: |
- break |
- elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or |
- (0.0 <= time_difference and time_difference < 3.0)): |
- closest_pfnset_index = index |
- closest_pfnset_difference = time_difference |
- elif time_difference < 0.0 and pfnset.reason == 'Exiting': |
- closest_pfnset_index = None |
- break |
- if closest_pfnset_index: |
- for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn: |
- all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count |
- LOGGER.info(' %s (time difference = %f)' % |
- (pfnset_list[closest_pfnset_index].path, |
- closest_pfnset_difference)) |
- else: |
- LOGGER.info(' (no match with pid:%d)' % pid) |
- |
- sizes = dict((c, 0) for c in policy.components) |
- |
- PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes) |
- verify_global_stats = PolicyCommands._accumulate_maps( |
- dump, all_pfn_dict, policy, bucket_set, sizes) |
- |
- # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed. |
- # http://crbug.com/245603. |
- for verify_key, verify_value in verify_global_stats.iteritems(): |
- dump_value = dump.global_stat('%s_committed' % verify_key) |
- if dump_value != verify_value: |
- LOGGER.warn('%25s: %12d != %d (%d)' % ( |
- verify_key, dump_value, verify_value, dump_value - verify_value)) |
- |
- sizes['mmap-no-log'] = ( |
- dump.global_stat('profiled-mmap_committed') - |
- sizes['mmap-total-log']) |
- sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') |
- sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') |
- |
- sizes['tc-no-log'] = ( |
- dump.global_stat('profiled-malloc_committed') - |
- sizes['tc-total-log']) |
- sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') |
- sizes['tc-unused'] = ( |
- sizes['mmap-tcmalloc'] - |
- dump.global_stat('profiled-malloc_committed')) |
- if sizes['tc-unused'] < 0: |
- LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' % |
- sizes['tc-unused']) |
- sizes['tc-unused'] = 0 |
- sizes['tc-total'] = sizes['mmap-tcmalloc'] |
- |
- # TODO(dmikurube): global_stat will be deprecated. |
- # See http://crbug.com/245603. |
- for key, value in { |
- 'total': 'total_committed', |
- 'filemapped': 'file_committed', |
- 'absent': 'absent_committed', |
- 'file-exec': 'file-exec_committed', |
- 'file-nonexec': 'file-nonexec_committed', |
- 'anonymous': 'anonymous_committed', |
- 'stack': 'stack_committed', |
- 'other': 'other_committed', |
- 'unhooked-absent': 'nonprofiled-absent_committed', |
- 'total-vm': 'total_virtual', |
- 'filemapped-vm': 'file_virtual', |
- 'anonymous-vm': 'anonymous_virtual', |
- 'other-vm': 'other_virtual' }.iteritems(): |
- if key in sizes: |
- sizes[key] = dump.global_stat(value) |
- |
- if 'mustbezero' in sizes: |
- removed_list = ( |
- 'profiled-mmap_committed', |
- 'nonprofiled-absent_committed', |
- 'nonprofiled-anonymous_committed', |
- 'nonprofiled-file-exec_committed', |
- 'nonprofiled-file-nonexec_committed', |
- 'nonprofiled-stack_committed', |
- 'nonprofiled-other_committed') |
- sizes['mustbezero'] = ( |
- dump.global_stat('total_committed') - |
- sum(dump.global_stat(removed) for removed in removed_list)) |
- if 'total-exclude-profiler' in sizes: |
- sizes['total-exclude-profiler'] = ( |
- dump.global_stat('total_committed') - |
- (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) |
- if 'hour' in sizes: |
- sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 |
- if 'minute' in sizes: |
- sizes['minute'] = (dump.time - first_dump_time) / 60.0 |
- if 'second' in sizes: |
- sizes['second'] = dump.time - first_dump_time |
- |
- return sizes |
- |
- @staticmethod |
- def _accumulate_malloc(dump, policy, bucket_set, sizes): |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if not bucket or bucket.allocator_type == 'malloc': |
- component_match = policy.find_malloc(bucket) |
- elif bucket.allocator_type == 'mmap': |
- continue |
- else: |
- assert False |
- sizes[component_match] += int(words[COMMITTED]) |
- |
- assert not component_match.startswith('mmap-') |
- if component_match.startswith('tc-'): |
- sizes['tc-total-log'] += int(words[COMMITTED]) |
- else: |
- sizes['other-total-log'] += int(words[COMMITTED]) |
- |
- @staticmethod |
- def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes): |
- # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed. |
- # http://crbug.com/245603. |
- global_stats = { |
- 'total': 0, |
- 'file-exec': 0, |
- 'file-nonexec': 0, |
- 'anonymous': 0, |
- 'stack': 0, |
- 'other': 0, |
- 'nonprofiled-file-exec': 0, |
- 'nonprofiled-file-nonexec': 0, |
- 'nonprofiled-anonymous': 0, |
- 'nonprofiled-stack': 0, |
- 'nonprofiled-other': 0, |
- 'profiled-mmap': 0, |
- } |
- |
- for key, value in dump.iter_map: |
- # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed. |
- # It's temporary verification code for transition described in |
- # http://crbug.com/245603. |
- committed = 0 |
- if 'committed' in value[1]: |
- committed = value[1]['committed'] |
- global_stats['total'] += committed |
- key = 'other' |
- name = value[1]['vma']['name'] |
- if name.startswith('/'): |
- if value[1]['vma']['executable'] == 'x': |
- key = 'file-exec' |
- else: |
- key = 'file-nonexec' |
- elif name == '[stack]': |
- key = 'stack' |
- elif name == '': |
- key = 'anonymous' |
- global_stats[key] += committed |
- if value[0] == 'unhooked': |
- global_stats['nonprofiled-' + key] += committed |
- if value[0] == 'hooked': |
- global_stats['profiled-mmap'] += committed |
- |
- if value[0] == 'unhooked': |
- if pfn_dict and dump.pageframe_length: |
- for pageframe in value[1]['pageframe']: |
- component_match = policy.find_unhooked(value, pageframe, pfn_dict) |
- sizes[component_match] += pageframe.size |
- else: |
- component_match = policy.find_unhooked(value) |
- sizes[component_match] += int(value[1]['committed']) |
- elif value[0] == 'hooked': |
- if pfn_dict and dump.pageframe_length: |
- for pageframe in value[1]['pageframe']: |
- component_match, _ = policy.find_mmap( |
- value, bucket_set, pageframe, pfn_dict) |
- sizes[component_match] += pageframe.size |
- assert not component_match.startswith('tc-') |
- if component_match.startswith('mmap-'): |
- sizes['mmap-total-log'] += pageframe.size |
- else: |
- sizes['other-total-log'] += pageframe.size |
- else: |
- component_match, _ = policy.find_mmap(value, bucket_set) |
- sizes[component_match] += int(value[1]['committed']) |
- if component_match.startswith('mmap-'): |
- sizes['mmap-total-log'] += int(value[1]['committed']) |
- else: |
- sizes['other-total-log'] += int(value[1]['committed']) |
- else: |
- LOGGER.error('Unrecognized mapping status: %s' % value[0]) |
- |
- return global_stats |
- |
- |
-class CSVCommand(PolicyCommands): |
- def __init__(self): |
- super(CSVCommand, self).__init__('csv') |
- |
- def do(self, sys_argv): |
- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) |
- return CSVCommand._output( |
- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) |
- |
- @staticmethod |
- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): |
- max_components = 0 |
- for label in policy_set: |
- max_components = max(max_components, len(policy_set[label].components)) |
- |
- for label in sorted(policy_set): |
- components = policy_set[label].components |
- if len(policy_set) > 1: |
- out.write('%s%s\n' % (label, ',' * (max_components - 1))) |
- out.write('%s%s\n' % ( |
- ','.join(components), ',' * (max_components - len(components)))) |
- |
- LOGGER.info('Applying a policy %s to...' % label) |
- for dump in dumps: |
- component_sizes = PolicyCommands._apply_policy( |
- dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) |
- s = [] |
- for c in components: |
- if c in ('hour', 'minute', 'second'): |
- s.append('%05.5f' % (component_sizes[c])) |
- else: |
- s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) |
- out.write('%s%s\n' % ( |
- ','.join(s), ',' * (max_components - len(components)))) |
- |
- bucket_set.clear_component_cache() |
- |
- return 0 |
- |
- |
-class JSONCommand(PolicyCommands): |
- def __init__(self): |
- super(JSONCommand, self).__init__('json') |
- |
- def do(self, sys_argv): |
- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) |
- return JSONCommand._output( |
- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) |
- |
- @staticmethod |
- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): |
- json_base = { |
- 'version': 'JSON_DEEP_2', |
- 'policies': {}, |
- } |
- |
- for label in sorted(policy_set): |
- json_base['policies'][label] = { |
- 'legends': policy_set[label].components, |
- 'snapshots': [], |
- } |
- |
- LOGGER.info('Applying a policy %s to...' % label) |
- for dump in dumps: |
- component_sizes = PolicyCommands._apply_policy( |
- dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) |
- component_sizes['dump_path'] = dump.path |
- component_sizes['dump_time'] = datetime.datetime.fromtimestamp( |
- dump.time).strftime('%Y-%m-%d %H:%M:%S') |
- json_base['policies'][label]['snapshots'].append(component_sizes) |
- |
- bucket_set.clear_component_cache() |
- |
- json.dump(json_base, out, indent=2, sort_keys=True) |
- |
- return 0 |
- |
- |
-class ListCommand(PolicyCommands): |
- def __init__(self): |
- super(ListCommand, self).__init__('list') |
- |
- def do(self, sys_argv): |
- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) |
- return ListCommand._output( |
- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) |
- |
- @staticmethod |
- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): |
- for label in sorted(policy_set): |
- LOGGER.info('Applying a policy %s to...' % label) |
- for dump in dumps: |
- component_sizes = PolicyCommands._apply_policy( |
- dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time) |
- out.write('%s for %s:\n' % (label, dump.path)) |
- for c in policy_set[label].components: |
- if c in ['hour', 'minute', 'second']: |
- out.write('%40s %12.3f\n' % (c, component_sizes[c])) |
- else: |
- out.write('%40s %12d\n' % (c, component_sizes[c])) |
- |
- bucket_set.clear_component_cache() |
- |
- return 0 |
- |
- |
-class MapCommand(Command): |
- def __init__(self): |
- super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>') |
- |
- def do(self, sys_argv, out=sys.stdout): |
- _, args = self._parse_args(sys_argv, 2) |
- dump_path = args[1] |
- target_policy = args[2] |
- (bucket_set, dumps) = Command.load_basic_files(dump_path, True) |
- policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) |
- |
- MapCommand._output(dumps, bucket_set, policy_set[target_policy], out) |
- return 0 |
- |
- @staticmethod |
- def _output(dumps, bucket_set, policy, out): |
- """Prints all stacktraces in a given component of given depth. |
- |
- Args: |
- dumps: A list of Dump objects. |
- bucket_set: A BucketSet object. |
- policy: A Policy object. |
- out: An IO object to output. |
- """ |
- max_dump_count = 0 |
- range_dict = ExclusiveRangeDict(ListAttribute) |
- for dump in dumps: |
- max_dump_count = max(max_dump_count, dump.count) |
- for key, value in dump.iter_map: |
- for begin, end, attr in range_dict.iter_range(key[0], key[1]): |
- attr[dump.count] = value |
- |
- max_dump_count_digit = len(str(max_dump_count)) |
- for begin, end, attr in range_dict.iter_range(): |
- out.write('%x-%x\n' % (begin, end)) |
- if len(attr) < max_dump_count: |
- attr[max_dump_count] = None |
- for index, value in enumerate(attr[1:]): |
- out.write(' #%0*d: ' % (max_dump_count_digit, index + 1)) |
- if not value: |
- out.write('None\n') |
- elif value[0] == 'hooked': |
- component_match, _ = policy.find_mmap(value, bucket_set) |
- out.write('%s @ %d\n' % (component_match, value[1]['bucket_id'])) |
- else: |
- component_match = policy.find_unhooked(value) |
- region_info = value[1] |
- size = region_info['committed'] |
- out.write('%s [%d bytes] %s%s%s%s %s\n' % ( |
- component_match, size, value[1]['vma']['readable'], |
- value[1]['vma']['writable'], value[1]['vma']['executable'], |
- value[1]['vma']['private'], value[1]['vma']['name'])) |
- |
- |
-class ExpandCommand(Command): |
- def __init__(self): |
- super(ExpandCommand, self).__init__( |
- 'Usage: %prog expand <dump> <policy> <component> <depth>') |
- |
- def do(self, sys_argv): |
- _, args = self._parse_args(sys_argv, 4) |
- dump_path = args[1] |
- target_policy = args[2] |
- component_name = args[3] |
- depth = args[4] |
- (bucket_set, dump) = Command.load_basic_files(dump_path, False) |
- policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) |
- |
- ExpandCommand._output(dump, policy_set[target_policy], bucket_set, |
- component_name, int(depth), sys.stdout) |
- return 0 |
- |
- @staticmethod |
- def _output(dump, policy, bucket_set, component_name, depth, out): |
- """Prints all stacktraces in a given component of given depth. |
- |
- Args: |
- dump: A Dump object. |
- policy: A Policy object. |
- bucket_set: A BucketSet object. |
- component_name: A name of component for filtering. |
- depth: An integer representing depth to be printed. |
- out: An IO object to output. |
- """ |
- sizes = {} |
- |
- ExpandCommand._accumulate( |
- dump, policy, bucket_set, component_name, depth, sizes) |
- |
- sorted_sizes_list = sorted( |
- sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
- total = 0 |
- # TODO(dmikurube): Better formatting. |
- for size_pair in sorted_sizes_list: |
- out.write('%10d %s\n' % (size_pair[1], size_pair[0])) |
- total += size_pair[1] |
- LOGGER.info('total: %d\n' % total) |
- |
- @staticmethod |
- def _add_size(precedence, bucket, depth, committed, sizes): |
- stacktrace_sequence = precedence |
- for function, sourcefile in zip( |
- bucket.symbolized_stackfunction[ |
- 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], |
- bucket.symbolized_stacksourcefile[ |
- 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): |
- stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) |
- if not stacktrace_sequence in sizes: |
- sizes[stacktrace_sequence] = 0 |
- sizes[stacktrace_sequence] += committed |
- |
- @staticmethod |
- def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): |
- rule = policy.find_rule(component_name) |
- if not rule: |
- pass |
- elif rule.allocator_type == 'malloc': |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if not bucket or bucket.allocator_type == 'malloc': |
- component_match = policy.find_malloc(bucket) |
- elif bucket.allocator_type == 'mmap': |
- continue |
- else: |
- assert False |
- if component_match == component_name: |
- precedence = '' |
- precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT]) |
- precedence += '(free=%d) ' % int(words[FREE_COUNT]) |
- if bucket.typeinfo: |
- precedence += '(type=%s) ' % bucket.symbolized_typeinfo |
- precedence += '(type.name=%s) ' % bucket.typeinfo_name |
- ExpandCommand._add_size(precedence, bucket, depth, |
- int(words[COMMITTED]), sizes) |
- elif rule.allocator_type == 'mmap': |
- for _, region in dump.iter_map: |
- if region[0] != 'hooked': |
- continue |
- component_match, bucket = policy.find_mmap(region, bucket_set) |
- if component_match == component_name: |
- ExpandCommand._add_size('', bucket, depth, |
- region[1]['committed'], sizes) |
- |
- |
-class PProfCommand(Command): |
- def __init__(self): |
- super(PProfCommand, self).__init__( |
- 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') |
- self._parser.add_option('-c', '--component', type='string', |
- dest='component', |
- help='restrict to COMPONENT', metavar='COMPONENT') |
- |
- def do(self, sys_argv): |
- options, args = self._parse_args(sys_argv, 2) |
- |
- dump_path = args[1] |
- target_policy = args[2] |
- component = options.component |
- |
- (bucket_set, dump) = Command.load_basic_files(dump_path, False) |
- policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) |
- |
- with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: |
- maps_lines = maps_f.readlines() |
- PProfCommand._output( |
- dump, policy_set[target_policy], bucket_set, maps_lines, component, |
- sys.stdout) |
- |
- return 0 |
- |
- @staticmethod |
- def _output(dump, policy, bucket_set, maps_lines, component_name, out): |
- """Converts the heap profile dump so it can be processed by pprof. |
- |
- Args: |
- dump: A Dump object. |
- policy: A Policy object. |
- bucket_set: A BucketSet object. |
- maps_lines: A list of strings containing /proc/.../maps. |
- component_name: A name of component for filtering. |
- out: An IO object to output. |
- """ |
- out.write('heap profile: ') |
- com_committed, com_allocs = PProfCommand._accumulate( |
- dump, policy, bucket_set, component_name) |
- |
- out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( |
- com_allocs, com_committed, com_allocs, com_committed)) |
- |
- PProfCommand._output_stacktrace_lines( |
- dump, policy, bucket_set, component_name, out) |
- |
- out.write('MAPPED_LIBRARIES:\n') |
- for line in maps_lines: |
- out.write(line) |
- |
- @staticmethod |
- def _accumulate(dump, policy, bucket_set, component_name): |
- """Accumulates size of committed chunks and the number of allocated chunks. |
- |
- Args: |
- dump: A Dump object. |
- policy: A Policy object. |
- bucket_set: A BucketSet object. |
- component_name: A name of component for filtering. |
- |
- Returns: |
- Two integers which are the accumulated size of committed regions and the |
- number of allocated chunks, respectively. |
- """ |
- com_committed = 0 |
- com_allocs = 0 |
- |
- for _, region in dump.iter_map: |
- if region[0] != 'hooked': |
- continue |
- component_match, bucket = policy.find_mmap(region, bucket_set) |
- |
- if (component_name and component_name != component_match) or ( |
- region[1]['committed'] == 0): |
- continue |
- |
- com_committed += region[1]['committed'] |
- com_allocs += 1 |
- |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if not bucket or bucket.allocator_type == 'malloc': |
- component_match = policy.find_malloc(bucket) |
- elif bucket.allocator_type == 'mmap': |
- continue |
- else: |
- assert False |
- if (not bucket or |
- (component_name and component_name != component_match)): |
- continue |
- |
- com_committed += int(words[COMMITTED]) |
- com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) |
- |
- return com_committed, com_allocs |
- |
- @staticmethod |
- def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): |
- """Prints information of stacktrace lines for pprof. |
- |
- Args: |
- dump: A Dump object. |
- policy: A Policy object. |
- bucket_set: A BucketSet object. |
- component_name: A name of component for filtering. |
- out: An IO object to output. |
- """ |
- for _, region in dump.iter_map: |
- if region[0] != 'hooked': |
- continue |
- component_match, bucket = policy.find_mmap(region, bucket_set) |
- |
- if (component_name and component_name != component_match) or ( |
- region[1]['committed'] == 0): |
- continue |
- |
- out.write(' 1: %8s [ 1: %8s] @' % ( |
- region[1]['committed'], region[1]['committed'])) |
- for address in bucket.stacktrace: |
- out.write(' 0x%016x' % address) |
- out.write('\n') |
- |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if not bucket or bucket.allocator_type == 'malloc': |
- component_match = policy.find_malloc(bucket) |
- elif bucket.allocator_type == 'mmap': |
- continue |
- else: |
- assert False |
- if (not bucket or |
- (component_name and component_name != component_match)): |
- continue |
- |
- out.write('%6d: %8s [%6d: %8s] @' % ( |
- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
- words[COMMITTED], |
- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), |
- words[COMMITTED])) |
- for address in bucket.stacktrace: |
- out.write(' 0x%016x' % address) |
- out.write('\n') |
- |
- |
-class UploadCommand(Command): |
- def __init__(self): |
- super(UploadCommand, self).__init__( |
- 'Usage: %prog upload [--gsutil path/to/gsutil] ' |
- '<first-dump> <destination-gs-path>') |
- self._parser.add_option('--gsutil', default='gsutil', |
- help='path to GSUTIL', metavar='GSUTIL') |
- |
- def do(self, sys_argv): |
- options, args = self._parse_args(sys_argv, 2) |
- dump_path = args[1] |
- gs_path = args[2] |
- |
- dump_files = Command._find_all_dumps(dump_path) |
- bucket_files = Command._find_all_buckets(dump_path) |
- prefix = Command._find_prefix(dump_path) |
- symbol_data_sources = SymbolDataSources(prefix) |
- symbol_data_sources.prepare() |
- symbol_path = symbol_data_sources.path() |
- |
- handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof') |
- os.close(handle_zip) |
- |
- try: |
- file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED) |
- for filename in dump_files: |
- file_zip.write(filename, os.path.basename(os.path.abspath(filename))) |
- for filename in bucket_files: |
- file_zip.write(filename, os.path.basename(os.path.abspath(filename))) |
- |
- symbol_basename = os.path.basename(os.path.abspath(symbol_path)) |
- for filename in os.listdir(symbol_path): |
- if not filename.startswith('.'): |
- file_zip.write(os.path.join(symbol_path, filename), |
- os.path.join(symbol_basename, os.path.basename( |
- os.path.abspath(filename)))) |
- file_zip.close() |
- |
- returncode = UploadCommand._run_gsutil( |
- options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path) |
- finally: |
- os.remove(filename_zip) |
- |
- return returncode |
- |
- @staticmethod |
- def _run_gsutil(gsutil, *args): |
- """Run gsutil as a subprocess. |
- |
- Args: |
- *args: Arguments to pass to gsutil. The first argument should be an |
- operation such as ls, cp or cat. |
- Returns: |
- The return code from the process. |
- """ |
- command = [gsutil] + list(args) |
- LOGGER.info("Running: %s", command) |
- |
- try: |
- return subprocess.call(command) |
- except OSError, e: |
- LOGGER.error('Error to run gsutil: %s', e) |
- |
- |
-class CatCommand(Command): |
- def __init__(self): |
- super(CatCommand, self).__init__('Usage: %prog cat <first-dump>') |
- self._parser.add_option('--alternative-dirs', dest='alternative_dirs', |
- metavar='/path/on/target@/path/on/host[:...]', |
- help='Read files in /path/on/host/ instead of ' |
- 'files in /path/on/target/.') |
- self._parser.add_option('--indent', dest='indent', action='store_true', |
- help='Indent the output.') |
- |
- def do(self, sys_argv): |
- options, args = self._parse_args(sys_argv, 1) |
- dump_path = args[1] |
- # TODO(dmikurube): Support shared memory. |
- alternative_dirs_dict = {} |
- if options.alternative_dirs: |
- for alternative_dir_pair in options.alternative_dirs.split(':'): |
- target_path, host_path = alternative_dir_pair.split('@', 1) |
- alternative_dirs_dict[target_path] = host_path |
- (bucket_set, dumps) = Command.load_basic_files( |
- dump_path, True, alternative_dirs=alternative_dirs_dict) |
- |
- json_root = OrderedDict() |
- json_root['version'] = 1 |
- json_root['run_id'] = None |
- for dump in dumps: |
- if json_root['run_id'] and json_root['run_id'] != dump.run_id: |
- LOGGER.error('Inconsistent heap profile dumps.') |
- json_root['run_id'] = '' |
- break |
- json_root['run_id'] = dump.run_id |
- json_root['snapshots'] = [] |
- |
- # Load all sorters. |
- sorters = SorterSet() |
- |
- for dump in dumps: |
- json_root['snapshots'].append( |
- self._fill_snapshot(dump, bucket_set, sorters)) |
- |
- if options.indent: |
- json.dump(json_root, sys.stdout, indent=2) |
- else: |
- json.dump(json_root, sys.stdout) |
- print '' |
- |
- @staticmethod |
- def _fill_snapshot(dump, bucket_set, sorters): |
- root = OrderedDict() |
- root['time'] = dump.time |
- root['worlds'] = OrderedDict() |
- root['worlds']['vm'] = CatCommand._fill_world( |
- dump, bucket_set, sorters, 'vm') |
- root['worlds']['malloc'] = CatCommand._fill_world( |
- dump, bucket_set, sorters, 'malloc') |
- return root |
- |
- @staticmethod |
- def _fill_world(dump, bucket_set, sorters, world): |
- root = OrderedDict() |
- |
- root['name'] = 'world' |
- if world == 'vm': |
- root['unit_fields'] = ['committed', 'reserved'] |
- elif world == 'malloc': |
- root['unit_fields'] = ['size', 'alloc_count', 'free_count'] |
- |
- # Make { vm | malloc } units with their sizes. |
- root['units'] = OrderedDict() |
- unit_set = UnitSet(world) |
- if world == 'vm': |
- for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set): |
- unit_set.append(unit) |
- for unit in unit_set: |
- root['units'][unit.unit_id] = [unit.committed, unit.reserved] |
- elif world == 'malloc': |
- for unit in CatCommand._iterate_malloc_unit(dump, bucket_set): |
- unit_set.append(unit) |
- for unit in unit_set: |
- root['units'][unit.unit_id] = [ |
- unit.size, unit.alloc_count, unit.free_count] |
- |
- # Iterate for { vm | malloc } sorters. |
- root['breakdown'] = OrderedDict() |
- for sorter in sorters.iter_world(world): |
- breakdown = OrderedDict() |
- for unit in unit_set: |
- found = sorter.find(unit) |
- if found.name not in breakdown: |
- category = OrderedDict() |
- category['name'] = found.name |
- category['color'] = 'random' |
- subworlds = {} |
- for subworld in found.iter_subworld(): |
- subworlds[subworld] = False |
- if subworlds: |
- category['subworlds'] = subworlds |
- if found.hidden: |
- category['hidden'] = True |
- category['units'] = [] |
- breakdown[found.name] = category |
- breakdown[found.name]['units'].append(unit.unit_id) |
- root['breakdown'][sorter.name] = breakdown |
- |
- return root |
- |
- @staticmethod |
- def _iterate_vm_unit(dump, pfn_dict, bucket_set): |
- unit_id = 0 |
- for _, region in dump.iter_map: |
- unit_id += 1 |
- if region[0] == 'unhooked': |
- if pfn_dict and dump.pageframe_length: |
- for pageframe in region[1]['pageframe']: |
- yield UnhookedUnit(unit_id, pageframe.size, pageframe.size, |
- region, pageframe, pfn_dict) |
- else: |
- yield UnhookedUnit(unit_id, |
- int(region[1]['committed']), |
- int(region[1]['reserved']), |
- region) |
- elif region[0] == 'hooked': |
- if pfn_dict and dump.pageframe_length: |
- for pageframe in region[1]['pageframe']: |
- yield MMapUnit(unit_id, |
- pageframe.size, |
- pageframe.size, |
- region, bucket_set, pageframe, pfn_dict) |
- else: |
- yield MMapUnit(unit_id, |
- int(region[1]['committed']), |
- int(region[1]['reserved']), |
- region, |
- bucket_set) |
- else: |
- LOGGER.error('Unrecognized mapping status: %s' % region[0]) |
- |
- @staticmethod |
- def _iterate_malloc_unit(dump, bucket_set): |
- for line in dump.iter_stacktrace: |
- words = line.split() |
- bucket = bucket_set.get(int(words[BUCKET_ID])) |
- if bucket and bucket.allocator_type == 'malloc': |
- yield MallocUnit(int(words[BUCKET_ID]), |
- int(words[COMMITTED]), |
- int(words[ALLOC_COUNT]), |
- int(words[FREE_COUNT]), |
- bucket) |
- elif not bucket: |
- # 'Not-found' buckets are all assumed as malloc buckets. |
- yield MallocUnit(int(words[BUCKET_ID]), |
- int(words[COMMITTED]), |
- int(words[ALLOC_COUNT]), |
- int(words[FREE_COUNT]), |
- None) |
def main(): |
COMMANDS = { |
- 'buckets': BucketsCommand, |
- 'cat': CatCommand, |
- 'csv': CSVCommand, |
- 'expand': ExpandCommand, |
- 'json': JSONCommand, |
- 'list': ListCommand, |
- 'map': MapCommand, |
- 'pprof': PProfCommand, |
- 'stacktrace': StacktraceCommand, |
- 'upload': UploadCommand, |
+ 'buckets': subcommands.BucketsCommand, |
+ 'cat': subcommands.CatCommand, |
+ 'csv': subcommands.CSVCommand, |
+ 'expand': subcommands.ExpandCommand, |
+ 'json': subcommands.JSONCommand, |
+ 'list': subcommands.ListCommand, |
+ 'map': subcommands.MapCommand, |
+ 'pprof': subcommands.PProfCommand, |
+ 'stacktrace': subcommands.StacktraceCommand, |
+ 'upload': subcommands.UploadCommand, |
} |
if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |