Index: tools/deep_memory_profiler/dmprof.py |
diff --git a/tools/deep_memory_profiler/dmprof.py b/tools/deep_memory_profiler/dmprof.py |
index 7c14b80a3b71b8e25e5b47cf199c61cdb20a77ff..b868a7f69d1279704eb1db20cc1f52341ac96344 100644 |
--- a/tools/deep_memory_profiler/dmprof.py |
+++ b/tools/deep_memory_profiler/dmprof.py |
@@ -120,39 +120,39 @@ def skip_while(index, max_index, skipping_condition): |
return index, True |
-class SymbolMapping(object): |
- """Manages all symbol information on process memory mapping. |
+class SymbolDataSources(object): |
+ """Manages symbol data sources in a process. |
- The symbol information consists of all symbols in the binary files obtained |
- by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, |
- nm and so on. It is minimum requisite information to run dmprof. |
+ The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and |
+ so on. They are collected into a directory '|prefix|.symmap' from the binary |
+ files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. |
- The information is prepared in a directory "|prefix|.symmap" by prepare(). |
- The directory is more portable than Chromium binaries. Users can save it |
- and re-analyze with the portable one. |
+ Binaries are not mandatory to profile. The prepared data sources work in |
+ place of the binary even if the binary has been overwritten with another |
+ binary. |
- Note that loading the symbol information takes a long time. It is very big |
- in general -- it doesn't know which functions are called and which types are |
- used actually. Used symbols can be cached in the "SymbolCache" class. |
+ Note that loading the symbol data sources takes a long time. They are often |
+ very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' |
+ which caches actually used symbols. |
""" |
def __init__(self, prefix): |
self._prefix = prefix |
- self._prepared_symbol_mapping_path = None |
- self._loaded_symbol_mapping = None |
+ self._prepared_symbol_data_sources_path = None |
+ self._loaded_symbol_data_sources = None |
def prepare(self): |
- """Extracts symbol mapping from binaries and prepares it to use. |
+ """Prepares symbol data sources by extracting mapping from a binary. |
- The symbol mapping is stored in a directory whose name is stored in |
- |self._prepared_symbol_mapping_path|. |
+ The prepared symbol data sources are stored in a directory. The directory |
+ name is stored in |self._prepared_symbol_data_sources_path|. |
Returns: |
True if succeeded. |
""" |
LOGGER.info('Preparing symbol mapping...') |
- self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( |
+ self._prepared_symbol_data_sources_path, used_tempdir = prepare_symbol_info( |
self._prefix + '.maps', self._prefix + '.symmap', True) |
- if self._prepared_symbol_mapping_path: |
+ if self._prepared_symbol_data_sources_path: |
LOGGER.info(' Prepared symbol mapping.') |
if used_tempdir: |
LOGGER.warn(' Using a temporary directory for symbol mapping.') |
@@ -164,111 +164,116 @@ class SymbolMapping(object): |
return False |
def get(self): |
- """Returns symbol mapping. |
+ """Returns the prepared symbol data sources. |
Returns: |
- Loaded symbol mapping. None if failed. |
+ The prepared symbol data sources. None if failed. |
""" |
- if not self._prepared_symbol_mapping_path and not self.prepare(): |
+ if not self._prepared_symbol_data_sources_path and not self.prepare(): |
return None |
- if not self._loaded_symbol_mapping: |
+ if not self._loaded_symbol_data_sources: |
LOGGER.info('Loading symbol mapping...') |
- self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( |
- self._prepared_symbol_mapping_path) |
- return self._loaded_symbol_mapping |
+ self._loaded_symbol_data_sources = RuntimeSymbolsInProcess.load( |
+ self._prepared_symbol_data_sources_path) |
+ return self._loaded_symbol_data_sources |
-class SymbolCache(object): |
- """Manages cache of used symbol mapping. |
+class SymbolFinder(object): |
+ """Finds corresponding symbols from addresses. |
- The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for |
- examples), and "SymbolCache" just caches "how dmprof interprets the address" |
- to speed-up another analysis for the same binary and profile dumps. |
- Handling all symbol mapping takes a long time in "SymbolMapping". |
- "SymbolCache" caches used symbol mapping on memory and in files. |
+ This class does only 'find()' symbols from a specified |address_list|. |
+ It is introduced to make a finder mockable. |
""" |
- def __init__(self, prefix): |
- self._prefix = prefix |
- self._symbol_cache_paths = { |
- FUNCTION_ADDRESS: prefix + '.funcsym', |
- TYPEINFO_ADDRESS: prefix + '.typesym', |
- } |
- self._find_runtime_symbols_functions = { |
- FUNCTION_ADDRESS: find_runtime_symbols_list, |
- TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, |
- } |
- self._symbol_caches = { |
+ _FIND_RUNTIME_SYMBOLS_FUNCTIONS = { |
+ FUNCTION_ADDRESS: find_runtime_symbols_list, |
+ TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, |
+ } |
+ |
+ def __init__(self, address_type, symbol_data_sources): |
+ self._finder_function = self._FIND_RUNTIME_SYMBOLS_FUNCTIONS[address_type] |
+ self._symbol_data_sources = symbol_data_sources |
+ |
+ def find(self, address_list): |
+ return self._finder_function(self._symbol_data_sources.get(), address_list) |
+ |
+ |
+class SymbolMappingCache(object): |
+ """Caches mapping from actually used addresses to symbols. |
+ |
+ 'update()' updates the cache from the original symbol data sources via |
+ 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. |
+ """ |
+ def __init__(self): |
+ self._symbol_mapping_caches = { |
FUNCTION_ADDRESS: {}, |
TYPEINFO_ADDRESS: {}, |
} |
- def update(self, address_type, bucket_set, symbol_mapping): |
- """Updates symbol mapping on memory and in a ".*sym" cache file. |
+ def update(self, address_type, bucket_set, symbol_finder, cache_f): |
+ """Updates symbol mapping cache on memory and in a symbol cache file. |
- It reads cached symbol mapping from a ".*sym" file if it exists. Then, |
- it looks up unresolved addresses from a given "SymbolMapping". Finally, |
- both symbol mappings on memory and in the ".*sym" cache file are updated. |
+ It reads cached symbol mapping from a symbol cache file |cache_f| if it |
+ exists. Unresolved addresses are then resolved and added to the cache |
+ both on memory and in the symbol cache file with using 'SymbolFinder'. |
- Symbol files are formatted as follows: |
+ A cache file is formatted as follows: |
<Address> <Symbol> |
<Address> <Symbol> |
<Address> <Symbol> |
... |
Args: |
- address_type: A type of addresses to update. It should be one of |
- FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
+ address_type: A type of addresses to update. |
+ It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
bucket_set: A BucketSet object. |
- symbol_mapping: A SymbolMapping object. |
+ symbol_finder: A SymbolFinder object to find symbols. |
+ cache_f: A readable and writable IO object of the symbol cache file. |
""" |
- self._load(address_type) |
+ cache_f.seek(0, os.SEEK_SET) |
+ self._load(cache_f, address_type) |
unresolved_addresses = sorted( |
address for address in bucket_set.iter_addresses(address_type) |
- if address not in self._symbol_caches[address_type]) |
+ if address not in self._symbol_mapping_caches[address_type]) |
if not unresolved_addresses: |
LOGGER.info('No need to resolve any more addresses.') |
return |
- symbol_cache_path = self._symbol_cache_paths[address_type] |
- with open(symbol_cache_path, mode='a+') as symbol_f: |
- LOGGER.info('Loading %d unresolved addresses.' % |
- len(unresolved_addresses)) |
- symbol_list = self._find_runtime_symbols_functions[address_type]( |
- symbol_mapping.get(), unresolved_addresses) |
+ cache_f.seek(0, os.SEEK_END) |
+ LOGGER.info('Loading %d unresolved addresses.' % |
+ len(unresolved_addresses)) |
+ symbol_list = symbol_finder.find(unresolved_addresses) |
- for address, symbol in zip(unresolved_addresses, symbol_list): |
- stripped_symbol = symbol.strip() or '??' |
- self._symbol_caches[address_type][address] = stripped_symbol |
- symbol_f.write('%x %s\n' % (address, stripped_symbol)) |
+ for address, symbol in zip(unresolved_addresses, symbol_list): |
+ stripped_symbol = symbol.strip() or '??' |
+ self._symbol_mapping_caches[address_type][address] = stripped_symbol |
+ cache_f.write('%x %s\n' % (address, stripped_symbol)) |
def lookup(self, address_type, address): |
"""Looks up a symbol for a given |address|. |
Args: |
- address_type: A type of addresses to lookup. It should be one of |
- FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
+ address_type: A type of addresses to lookup. |
+ It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
address: An integer that represents an address. |
Returns: |
A string that represents a symbol. |
""" |
- return self._symbol_caches[address_type].get(address) |
+ return self._symbol_mapping_caches[address_type].get(address) |
- def _load(self, address_type): |
- symbol_cache_path = self._symbol_cache_paths[address_type] |
+ def _load(self, cache_f, address_type): |
try: |
- with open(symbol_cache_path, mode='r') as symbol_f: |
- for line in symbol_f: |
- items = line.rstrip().split(None, 1) |
- if len(items) == 1: |
- items.append('??') |
- self._symbol_caches[address_type][int(items[0], 16)] = items[1] |
+ for line in cache_f: |
+ items = line.rstrip().split(None, 1) |
+ if len(items) == 1: |
+ items.append('??') |
+ self._symbol_mapping_caches[address_type][int(items[0], 16)] = items[1] |
LOGGER.info('Loaded %d entries from symbol cache.' % |
- len(self._symbol_caches[address_type])) |
+ len(self._symbol_mapping_caches[address_type])) |
except IOError as e: |
- LOGGER.info('No valid symbol cache file is found: %s' % e) |
+ LOGGER.info('The symbol cache file is invalid: %s' % e) |
class Rule(object): |
@@ -483,21 +488,21 @@ class Bucket(object): |
self.component_cache = '' |
- def symbolize(self, symbol_cache): |
- """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. |
+ def symbolize(self, symbol_mapping_cache): |
+ """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. |
Args: |
- symbol_cache: A SymbolCache object. |
+ symbol_mapping_cache: A SymbolMappingCache object. |
""" |
# TODO(dmikurube): Fill explicitly with numbers if symbol not found. |
self._symbolized_stacktrace = [ |
- symbol_cache.lookup(FUNCTION_ADDRESS, address) |
+ symbol_mapping_cache.lookup(FUNCTION_ADDRESS, address) |
for address in self._stacktrace] |
self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) |
if not self._typeinfo: |
self._symbolized_typeinfo = 'no typeinfo' |
else: |
- self._symbolized_typeinfo = symbol_cache.lookup( |
+ self._symbolized_typeinfo = symbol_mapping_cache.lookup( |
TYPEINFO_ADDRESS, self._typeinfo) |
if not self._symbolized_typeinfo: |
self._symbolized_typeinfo = 'no typeinfo' |
@@ -597,9 +602,9 @@ class BucketSet(object): |
def get(self, bucket_id): |
return self._buckets.get(bucket_id) |
- def symbolize(self, symbol_cache): |
+ def symbolize(self, symbol_mapping_cache): |
for bucket_content in self._buckets.itervalues(): |
- bucket_content.symbolize(symbol_cache) |
+ bucket_content.symbolize(symbol_mapping_cache) |
def clear_component_cache(self): |
for bucket_content in self._buckets.itervalues(): |
@@ -815,18 +820,24 @@ class Command(object): |
@staticmethod |
def load_basic_files(dump_path, multiple): |
prefix = Command._find_prefix(dump_path) |
- symbol_mapping = SymbolMapping(prefix) |
- symbol_mapping.prepare() |
+ symbol_data_sources = SymbolDataSources(prefix) |
+ symbol_data_sources.prepare() |
bucket_set = BucketSet() |
bucket_set.load(prefix) |
if multiple: |
dump_list = DumpList.load(Command._find_all_dumps(dump_path)) |
else: |
dump = Dump.load(dump_path) |
- symbol_cache = SymbolCache(prefix) |
- symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) |
- symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) |
- bucket_set.symbolize(symbol_cache) |
+ symbol_mapping_cache = SymbolMappingCache() |
+ with open(prefix + '.funcsym', 'a+') as cache_f: |
+ symbol_mapping_cache.update( |
+ FUNCTION_ADDRESS, bucket_set, |
+ SymbolFinder(FUNCTION_ADDRESS, symbol_data_sources), cache_f) |
+ with open(prefix + '.typesym', 'a+') as cache_f: |
+ symbol_mapping_cache.update( |
+ TYPEINFO_ADDRESS, bucket_set, |
+ SymbolFinder(TYPEINFO_ADDRESS, symbol_data_sources), cache_f) |
+ bucket_set.symbolize(symbol_mapping_cache) |
if multiple: |
return (bucket_set, dump_list) |
else: |