| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """The deep heap profiler script for Chrome.""" | 5 """The deep heap profiler script for Chrome.""" |
| 6 | 6 |
| 7 from datetime import datetime | 7 from datetime import datetime |
| 8 import json | 8 import json |
| 9 import logging | 9 import logging |
| 10 import optparse | 10 import optparse |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 113 boolean value which is True if found a line which skipping_condition | 113 boolean value which is True if found a line which skipping_condition |
| 114 is False for. | 114 is False for. |
| 115 """ | 115 """ |
| 116 while skipping_condition(index): | 116 while skipping_condition(index): |
| 117 index += 1 | 117 index += 1 |
| 118 if index >= max_index: | 118 if index >= max_index: |
| 119 return index, False | 119 return index, False |
| 120 return index, True | 120 return index, True |
| 121 | 121 |
| 122 | 122 |
| 123 class SymbolMapping(object): | 123 class SymbolDataSources(object): |
| 124 """Manages all symbol information on process memory mapping. | 124 """Manages symbol data sources in a process. |
| 125 | 125 |
| 126 The symbol information consists of all symbols in the binary files obtained | 126 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and |
| 127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, | 127 so on. They are collected into a directory '|prefix|.symmap' from the binary |
| 128 nm and so on. It is minimum requisite information to run dmprof. | 128 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. |
| 129 | 129 |
| 130 The information is prepared in a directory "|prefix|.symmap" by prepare(). | 130 Binaries are not mandatory to profile. The prepared data sources work in |
| 131 The directory is more portable than Chromium binaries. Users can save it | 131 place of the binary even if the binary has been overwritten with another |
| 132 and re-analyze with the portable one. | 132 binary. |
| 133 | 133 |
| 134 Note that loading the symbol information takes a long time. It is very big | 134 Note that loading the symbol data sources takes a long time. They are often |
| 135 in general -- it doesn't know which functions are called and which types are | 135 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' |
| 136 used actually. Used symbols can be cached in the "SymbolCache" class. | 136 which caches actually used symbols. |
| 137 """ | 137 """ |
| 138 def __init__(self, prefix): | 138 def __init__(self, prefix): |
| 139 self._prefix = prefix | 139 self._prefix = prefix |
| 140 self._prepared_symbol_mapping_path = None | 140 self._prepared_symbol_data_sources_path = None |
| 141 self._loaded_symbol_mapping = None | 141 self._loaded_symbol_data_sources = None |
| 142 | 142 |
| 143 def prepare(self): | 143 def prepare(self): |
| 144 """Extracts symbol mapping from binaries and prepares it to use. | 144 """Prepares symbol data sources by extracting mapping from a binary. |
| 145 | 145 |
| 146 The symbol mapping is stored in a directory whose name is stored in | 146 The prepared symbol data sources are stored in a directory. The directory |
| 147 |self._prepared_symbol_mapping_path|. | 147 name is stored in |self._prepared_symbol_data_sources_path|. |
| 148 | 148 |
| 149 Returns: | 149 Returns: |
| 150 True if succeeded. | 150 True if succeeded. |
| 151 """ | 151 """ |
| 152 LOGGER.info('Preparing symbol mapping...') | 152 LOGGER.info('Preparing symbol mapping...') |
| 153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( | 153 self._prepared_symbol_data_sources_path, used_tempdir = prepare_symbol_info( |
| 154 self._prefix + '.maps', self._prefix + '.symmap', True) | 154 self._prefix + '.maps', self._prefix + '.symmap', True) |
| 155 if self._prepared_symbol_mapping_path: | 155 if self._prepared_symbol_data_sources_path: |
| 156 LOGGER.info(' Prepared symbol mapping.') | 156 LOGGER.info(' Prepared symbol mapping.') |
| 157 if used_tempdir: | 157 if used_tempdir: |
| 158 LOGGER.warn(' Using a temporary directory for symbol mapping.') | 158 LOGGER.warn(' Using a temporary directory for symbol mapping.') |
| 159 LOGGER.warn(' Delete it by yourself.') | 159 LOGGER.warn(' Delete it by yourself.') |
| 160 LOGGER.warn(' Or, move the directory by yourself to use it later.') | 160 LOGGER.warn(' Or, move the directory by yourself to use it later.') |
| 161 return True | 161 return True |
| 162 else: | 162 else: |
| 163 LOGGER.warn(' Failed to prepare symbol mapping.') | 163 LOGGER.warn(' Failed to prepare symbol mapping.') |
| 164 return False | 164 return False |
| 165 | 165 |
| 166 def get(self): | 166 def get(self): |
| 167 """Returns symbol mapping. | 167 """Returns the prepared symbol data sources. |
| 168 | 168 |
| 169 Returns: | 169 Returns: |
| 170 Loaded symbol mapping. None if failed. | 170 The prepared symbol data sources. None if failed. |
| 171 """ | 171 """ |
| 172 if not self._prepared_symbol_mapping_path and not self.prepare(): | 172 if not self._prepared_symbol_data_sources_path and not self.prepare(): |
| 173 return None | 173 return None |
| 174 if not self._loaded_symbol_mapping: | 174 if not self._loaded_symbol_data_sources: |
| 175 LOGGER.info('Loading symbol mapping...') | 175 LOGGER.info('Loading symbol mapping...') |
| 176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( | 176 self._loaded_symbol_data_sources = RuntimeSymbolsInProcess.load( |
| 177 self._prepared_symbol_mapping_path) | 177 self._prepared_symbol_data_sources_path) |
| 178 return self._loaded_symbol_mapping | 178 return self._loaded_symbol_data_sources |
| 179 | 179 |
| 180 | 180 |
| 181 class SymbolCache(object): | 181 class SymbolFinder(object): |
| 182 """Manages cache of used symbol mapping. | 182 """Finds corresponding symbols from addresses. |
| 183 | 183 |
| 184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for | 184 This class does only 'find()' symbols from a specified |address_list|. |
| 185 examples), and "SymbolCache" just caches "how dmprof interprets the address" | 185 It is introduced to make a finder mockable. |
| 186 to speed-up another analysis for the same binary and profile dumps. | |
| 187 Handling all symbol mapping takes a long time in "SymbolMapping". | |
| 188 "SymbolCache" caches used symbol mapping on memory and in files. | |
| 189 """ | 186 """ |
| 190 def __init__(self, prefix): | 187 _FIND_RUNTIME_SYMBOLS_FUNCTIONS = { |
| 191 self._prefix = prefix | 188 FUNCTION_ADDRESS: find_runtime_symbols_list, |
| 192 self._symbol_cache_paths = { | 189 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, |
| 193 FUNCTION_ADDRESS: prefix + '.funcsym', | 190 } |
| 194 TYPEINFO_ADDRESS: prefix + '.typesym', | 191 |
| 195 } | 192 def __init__(self, address_type, symbol_data_sources): |
| 196 self._find_runtime_symbols_functions = { | 193 self._finder_function = self._FIND_RUNTIME_SYMBOLS_FUNCTIONS[address_type] |
| 197 FUNCTION_ADDRESS: find_runtime_symbols_list, | 194 self._symbol_data_sources = symbol_data_sources |
| 198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, | 195 |
| 199 } | 196 def find(self, address_list): |
| 200 self._symbol_caches = { | 197 return self._finder_function(self._symbol_data_sources.get(), address_list) |
| 198 |
| 199 |
| 200 class SymbolMappingCache(object): |
| 201 """Caches mapping from actually used addresses to symbols. |
| 202 |
| 203 'update()' updates the cache from the original symbol data sources via |
| 204 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. |
| 205 """ |
| 206 def __init__(self): |
| 207 self._symbol_mapping_caches = { |
| 201 FUNCTION_ADDRESS: {}, | 208 FUNCTION_ADDRESS: {}, |
| 202 TYPEINFO_ADDRESS: {}, | 209 TYPEINFO_ADDRESS: {}, |
| 203 } | 210 } |
| 204 | 211 |
| 205 def update(self, address_type, bucket_set, symbol_mapping): | 212 def update(self, address_type, bucket_set, symbol_finder, cache_f): |
| 206 """Updates symbol mapping on memory and in a ".*sym" cache file. | 213 """Updates symbol mapping cache on memory and in a symbol cache file. |
| 207 | 214 |
| 208 It reads cached symbol mapping from a ".*sym" file if it exists. Then, | 215 It reads cached symbol mapping from a symbol cache file |cache_f| if it |
| 209 it looks up unresolved addresses from a given "SymbolMapping". Finally, | 216 exists. Unresolved addresses are then resolved and added to the cache |
| 210 both symbol mappings on memory and in the ".*sym" cache file are updated. | 217 both on memory and in the symbol cache file with using 'SymbolFinder'. |
| 211 | 218 |
| 212 Symbol files are formatted as follows: | 219 A cache file is formatted as follows: |
| 213 <Address> <Symbol> | 220 <Address> <Symbol> |
| 214 <Address> <Symbol> | 221 <Address> <Symbol> |
| 215 <Address> <Symbol> | 222 <Address> <Symbol> |
| 216 ... | 223 ... |
| 217 | 224 |
| 218 Args: | 225 Args: |
| 219 address_type: A type of addresses to update. It should be one of | 226 address_type: A type of addresses to update. |
| 220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 227 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
| 221 bucket_set: A BucketSet object. | 228 bucket_set: A BucketSet object. |
| 222 symbol_mapping: A SymbolMapping object. | 229 symbol_finder: A SymbolFinder object to find symbols. |
| 230 cache_f: A readable and writable IO object of the symbol cache file. |
| 223 """ | 231 """ |
| 224 self._load(address_type) | 232 cache_f.seek(0, os.SEEK_SET) |
| 233 self._load(cache_f, address_type) |
| 225 | 234 |
| 226 unresolved_addresses = sorted( | 235 unresolved_addresses = sorted( |
| 227 address for address in bucket_set.iter_addresses(address_type) | 236 address for address in bucket_set.iter_addresses(address_type) |
| 228 if address not in self._symbol_caches[address_type]) | 237 if address not in self._symbol_mapping_caches[address_type]) |
| 229 | 238 |
| 230 if not unresolved_addresses: | 239 if not unresolved_addresses: |
| 231 LOGGER.info('No need to resolve any more addresses.') | 240 LOGGER.info('No need to resolve any more addresses.') |
| 232 return | 241 return |
| 233 | 242 |
| 234 symbol_cache_path = self._symbol_cache_paths[address_type] | 243 cache_f.seek(0, os.SEEK_END) |
| 235 with open(symbol_cache_path, mode='a+') as symbol_f: | 244 LOGGER.info('Loading %d unresolved addresses.' % |
| 236 LOGGER.info('Loading %d unresolved addresses.' % | 245 len(unresolved_addresses)) |
| 237 len(unresolved_addresses)) | 246 symbol_list = symbol_finder.find(unresolved_addresses) |
| 238 symbol_list = self._find_runtime_symbols_functions[address_type]( | |
| 239 symbol_mapping.get(), unresolved_addresses) | |
| 240 | 247 |
| 241 for address, symbol in zip(unresolved_addresses, symbol_list): | 248 for address, symbol in zip(unresolved_addresses, symbol_list): |
| 242 stripped_symbol = symbol.strip() or '??' | 249 stripped_symbol = symbol.strip() or '??' |
| 243 self._symbol_caches[address_type][address] = stripped_symbol | 250 self._symbol_mapping_caches[address_type][address] = stripped_symbol |
| 244 symbol_f.write('%x %s\n' % (address, stripped_symbol)) | 251 cache_f.write('%x %s\n' % (address, stripped_symbol)) |
| 245 | 252 |
| 246 def lookup(self, address_type, address): | 253 def lookup(self, address_type, address): |
| 247 """Looks up a symbol for a given |address|. | 254 """Looks up a symbol for a given |address|. |
| 248 | 255 |
| 249 Args: | 256 Args: |
| 250 address_type: A type of addresses to lookup. It should be one of | 257 address_type: A type of addresses to lookup. |
| 251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 258 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
| 252 address: An integer that represents an address. | 259 address: An integer that represents an address. |
| 253 | 260 |
| 254 Returns: | 261 Returns: |
| 255 A string that represents a symbol. | 262 A string that represents a symbol. |
| 256 """ | 263 """ |
| 257 return self._symbol_caches[address_type].get(address) | 264 return self._symbol_mapping_caches[address_type].get(address) |
| 258 | 265 |
| 259 def _load(self, address_type): | 266 def _load(self, cache_f, address_type): |
| 260 symbol_cache_path = self._symbol_cache_paths[address_type] | |
| 261 try: | 267 try: |
| 262 with open(symbol_cache_path, mode='r') as symbol_f: | 268 for line in cache_f: |
| 263 for line in symbol_f: | 269 items = line.rstrip().split(None, 1) |
| 264 items = line.rstrip().split(None, 1) | 270 if len(items) == 1: |
| 265 if len(items) == 1: | 271 items.append('??') |
| 266 items.append('??') | 272 self._symbol_mapping_caches[address_type][int(items[0], 16)] = items[1] |
| 267 self._symbol_caches[address_type][int(items[0], 16)] = items[1] | |
| 268 LOGGER.info('Loaded %d entries from symbol cache.' % | 273 LOGGER.info('Loaded %d entries from symbol cache.' % |
| 269 len(self._symbol_caches[address_type])) | 274 len(self._symbol_mapping_caches[address_type])) |
| 270 except IOError as e: | 275 except IOError as e: |
| 271 LOGGER.info('No valid symbol cache file is found: %s' % e) | 276 LOGGER.info('The symbol cache file is invalid: %s' % e) |
| 272 | 277 |
| 273 | 278 |
| 274 class Rule(object): | 279 class Rule(object): |
| 275 """Represents one matching rule in a policy file.""" | 280 """Represents one matching rule in a policy file.""" |
| 276 | 281 |
| 277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): | 282 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): |
| 278 self._name = name | 283 self._name = name |
| 279 self._mmap = mmap | 284 self._mmap = mmap |
| 280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') | 285 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') |
| 281 if typeinfo_pattern: | 286 if typeinfo_pattern: |
| (...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 476 self._mmap = mmap | 481 self._mmap = mmap |
| 477 self._typeinfo = typeinfo | 482 self._typeinfo = typeinfo |
| 478 self._typeinfo_name = typeinfo_name | 483 self._typeinfo_name = typeinfo_name |
| 479 | 484 |
| 480 self._symbolized_stacktrace = stacktrace | 485 self._symbolized_stacktrace = stacktrace |
| 481 self._symbolized_joined_stacktrace = '' | 486 self._symbolized_joined_stacktrace = '' |
| 482 self._symbolized_typeinfo = typeinfo_name | 487 self._symbolized_typeinfo = typeinfo_name |
| 483 | 488 |
| 484 self.component_cache = '' | 489 self.component_cache = '' |
| 485 | 490 |
| 486 def symbolize(self, symbol_cache): | 491 def symbolize(self, symbol_mapping_cache): |
| 487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. | 492 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. |
| 488 | 493 |
| 489 Args: | 494 Args: |
| 490 symbol_cache: A SymbolCache object. | 495 symbol_mapping_cache: A SymbolMappingCache object. |
| 491 """ | 496 """ |
| 492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | 497 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. |
| 493 self._symbolized_stacktrace = [ | 498 self._symbolized_stacktrace = [ |
| 494 symbol_cache.lookup(FUNCTION_ADDRESS, address) | 499 symbol_mapping_cache.lookup(FUNCTION_ADDRESS, address) |
| 495 for address in self._stacktrace] | 500 for address in self._stacktrace] |
| 496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) | 501 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) |
| 497 if not self._typeinfo: | 502 if not self._typeinfo: |
| 498 self._symbolized_typeinfo = 'no typeinfo' | 503 self._symbolized_typeinfo = 'no typeinfo' |
| 499 else: | 504 else: |
| 500 self._symbolized_typeinfo = symbol_cache.lookup( | 505 self._symbolized_typeinfo = symbol_mapping_cache.lookup( |
| 501 TYPEINFO_ADDRESS, self._typeinfo) | 506 TYPEINFO_ADDRESS, self._typeinfo) |
| 502 if not self._symbolized_typeinfo: | 507 if not self._symbolized_typeinfo: |
| 503 self._symbolized_typeinfo = 'no typeinfo' | 508 self._symbolized_typeinfo = 'no typeinfo' |
| 504 | 509 |
| 505 def clear_component_cache(self): | 510 def clear_component_cache(self): |
| 506 self.component_cache = '' | 511 self.component_cache = '' |
| 507 | 512 |
| 508 @property | 513 @property |
| 509 def stacktrace(self): | 514 def stacktrace(self): |
| 510 return self._stacktrace | 515 return self._stacktrace |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 590 def __iter__(self): | 595 def __iter__(self): |
| 591 for bucket_id, bucket_content in self._buckets.iteritems(): | 596 for bucket_id, bucket_content in self._buckets.iteritems(): |
| 592 yield bucket_id, bucket_content | 597 yield bucket_id, bucket_content |
| 593 | 598 |
| 594 def __getitem__(self, bucket_id): | 599 def __getitem__(self, bucket_id): |
| 595 return self._buckets[bucket_id] | 600 return self._buckets[bucket_id] |
| 596 | 601 |
| 597 def get(self, bucket_id): | 602 def get(self, bucket_id): |
| 598 return self._buckets.get(bucket_id) | 603 return self._buckets.get(bucket_id) |
| 599 | 604 |
| 600 def symbolize(self, symbol_cache): | 605 def symbolize(self, symbol_mapping_cache): |
| 601 for bucket_content in self._buckets.itervalues(): | 606 for bucket_content in self._buckets.itervalues(): |
| 602 bucket_content.symbolize(symbol_cache) | 607 bucket_content.symbolize(symbol_mapping_cache) |
| 603 | 608 |
| 604 def clear_component_cache(self): | 609 def clear_component_cache(self): |
| 605 for bucket_content in self._buckets.itervalues(): | 610 for bucket_content in self._buckets.itervalues(): |
| 606 bucket_content.clear_component_cache() | 611 bucket_content.clear_component_cache() |
| 607 | 612 |
| 608 def iter_addresses(self, address_type): | 613 def iter_addresses(self, address_type): |
| 609 for function in self._addresses[address_type]: | 614 for function in self._addresses[address_type]: |
| 610 yield function | 615 yield function |
| 611 | 616 |
| 612 | 617 |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 808 """Subclasses are a subcommand for this executable. | 813 """Subclasses are a subcommand for this executable. |
| 809 | 814 |
| 810 See COMMANDS in main(). | 815 See COMMANDS in main(). |
| 811 """ | 816 """ |
| 812 def __init__(self, usage): | 817 def __init__(self, usage): |
| 813 self._parser = optparse.OptionParser(usage) | 818 self._parser = optparse.OptionParser(usage) |
| 814 | 819 |
| 815 @staticmethod | 820 @staticmethod |
| 816 def load_basic_files(dump_path, multiple): | 821 def load_basic_files(dump_path, multiple): |
| 817 prefix = Command._find_prefix(dump_path) | 822 prefix = Command._find_prefix(dump_path) |
| 818 symbol_mapping = SymbolMapping(prefix) | 823 symbol_data_sources = SymbolDataSources(prefix) |
| 819 symbol_mapping.prepare() | 824 symbol_data_sources.prepare() |
| 820 bucket_set = BucketSet() | 825 bucket_set = BucketSet() |
| 821 bucket_set.load(prefix) | 826 bucket_set.load(prefix) |
| 822 if multiple: | 827 if multiple: |
| 823 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | 828 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) |
| 824 else: | 829 else: |
| 825 dump = Dump.load(dump_path) | 830 dump = Dump.load(dump_path) |
| 826 symbol_cache = SymbolCache(prefix) | 831 symbol_mapping_cache = SymbolMappingCache() |
| 827 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) | 832 with open(prefix + '.funcsym', 'a+') as cache_f: |
| 828 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) | 833 symbol_mapping_cache.update( |
| 829 bucket_set.symbolize(symbol_cache) | 834 FUNCTION_ADDRESS, bucket_set, |
| 835 SymbolFinder(FUNCTION_ADDRESS, symbol_data_sources), cache_f) |
| 836 with open(prefix + '.typesym', 'a+') as cache_f: |
| 837 symbol_mapping_cache.update( |
| 838 TYPEINFO_ADDRESS, bucket_set, |
| 839 SymbolFinder(TYPEINFO_ADDRESS, symbol_data_sources), cache_f) |
| 840 bucket_set.symbolize(symbol_mapping_cache) |
| 830 if multiple: | 841 if multiple: |
| 831 return (bucket_set, dump_list) | 842 return (bucket_set, dump_list) |
| 832 else: | 843 else: |
| 833 return (bucket_set, dump) | 844 return (bucket_set, dump) |
| 834 | 845 |
| 835 @staticmethod | 846 @staticmethod |
| 836 def _find_prefix(path): | 847 def _find_prefix(path): |
| 837 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | 848 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
| 838 | 849 |
| 839 @staticmethod | 850 @staticmethod |
| (...skipping 491 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1331 errorcode = COMMANDS[action]().do(sys.argv) | 1342 errorcode = COMMANDS[action]().do(sys.argv) |
| 1332 except ParsingException, e: | 1343 except ParsingException, e: |
| 1333 errorcode = 1 | 1344 errorcode = 1 |
| 1334 sys.stderr.write('Exit by parsing error: %s\n' % e) | 1345 sys.stderr.write('Exit by parsing error: %s\n' % e) |
| 1335 | 1346 |
| 1336 return errorcode | 1347 return errorcode |
| 1337 | 1348 |
| 1338 | 1349 |
| 1339 if __name__ == '__main__': | 1350 if __name__ == '__main__': |
| 1340 sys.exit(main()) | 1351 sys.exit(main()) |
| OLD | NEW |