OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """The deep heap profiler script for Chrome.""" | 5 """The deep heap profiler script for Chrome.""" |
6 | 6 |
7 from datetime import datetime | 7 from datetime import datetime |
8 import json | 8 import json |
9 import logging | 9 import logging |
10 import optparse | 10 import optparse |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
113 boolean value which is True if found a line which skipping_condition | 113 boolean value which is True if found a line which skipping_condition |
114 is False for. | 114 is False for. |
115 """ | 115 """ |
116 while skipping_condition(index): | 116 while skipping_condition(index): |
117 index += 1 | 117 index += 1 |
118 if index >= max_index: | 118 if index >= max_index: |
119 return index, False | 119 return index, False |
120 return index, True | 120 return index, True |
121 | 121 |
122 | 122 |
123 class SymbolMapping(object): | 123 class SymbolDataSources(object): |
124 """Manages all symbol information on process memory mapping. | 124 """Manages symbol data sources in a process. |
125 | 125 |
126 The symbol information consists of all symbols in the binary files obtained | 126 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and |
127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, | 127 so on. They are collected into a directory '|prefix|.symmap' from the binary |
128 nm and so on. It is minimum requisite information to run dmprof. | 128 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. |
129 | 129 |
130 The information is prepared in a directory "|prefix|.symmap" by prepare(). | 130 Binaries are not mandatory to profile. The prepared data sources work in |
131 The directory is more portable than Chromium binaries. Users can save it | 131 place of the binary even if the binary has been overwritten with another |
132 and re-analyze with the portable one. | 132 binary. |
133 | 133 |
134 Note that loading the symbol information takes a long time. It is very big | 134 Note that loading the symbol data sources takes a long time. They are often |
135 in general -- it doesn't know which functions are called and which types are | 135 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' |
136 used actually. Used symbols can be cached in the "SymbolCache" class. | 136 which caches actually used symbols. |
137 """ | 137 """ |
138 def __init__(self, prefix): | 138 def __init__(self, prefix): |
139 self._prefix = prefix | 139 self._prefix = prefix |
140 self._prepared_symbol_mapping_path = None | 140 self._prepared_symbol_data_sources_path = None |
141 self._loaded_symbol_mapping = None | 141 self._loaded_symbol_data_sources = None |
142 | 142 |
143 def prepare(self): | 143 def prepare(self): |
144 """Extracts symbol mapping from binaries and prepares it to use. | 144 """Prepares symbol data sources by extracting mapping from a binary. |
145 | 145 |
146 The symbol mapping is stored in a directory whose name is stored in | 146 The prepared symbol data sources are stored in a directory. The directory |
147 |self._prepared_symbol_mapping_path|. | 147 name is stored in |self._prepared_symbol_data_sources_path|. |
148 | 148 |
149 Returns: | 149 Returns: |
150 True if succeeded. | 150 True if succeeded. |
151 """ | 151 """ |
152 LOGGER.info('Preparing symbol mapping...') | 152 LOGGER.info('Preparing symbol mapping...') |
153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( | 153 self._prepared_symbol_data_sources_path, used_tempdir = prepare_symbol_info( |
154 self._prefix + '.maps', self._prefix + '.symmap', True) | 154 self._prefix + '.maps', self._prefix + '.symmap', True) |
155 if self._prepared_symbol_mapping_path: | 155 if self._prepared_symbol_data_sources_path: |
156 LOGGER.info(' Prepared symbol mapping.') | 156 LOGGER.info(' Prepared symbol mapping.') |
157 if used_tempdir: | 157 if used_tempdir: |
158 LOGGER.warn(' Using a temporary directory for symbol mapping.') | 158 LOGGER.warn(' Using a temporary directory for symbol mapping.') |
159 LOGGER.warn(' Delete it by yourself.') | 159 LOGGER.warn(' Delete it by yourself.') |
160 LOGGER.warn(' Or, move the directory by yourself to use it later.') | 160 LOGGER.warn(' Or, move the directory by yourself to use it later.') |
161 return True | 161 return True |
162 else: | 162 else: |
163 LOGGER.warn(' Failed to prepare symbol mapping.') | 163 LOGGER.warn(' Failed to prepare symbol mapping.') |
164 return False | 164 return False |
165 | 165 |
166 def get(self): | 166 def get(self): |
167 """Returns symbol mapping. | 167 """Returns the prepared symbol data sources. |
168 | 168 |
169 Returns: | 169 Returns: |
170 Loaded symbol mapping. None if failed. | 170 The prepared symbol data sources. None if failed. |
171 """ | 171 """ |
172 if not self._prepared_symbol_mapping_path and not self.prepare(): | 172 if not self._prepared_symbol_data_sources_path and not self.prepare(): |
173 return None | 173 return None |
174 if not self._loaded_symbol_mapping: | 174 if not self._loaded_symbol_data_sources: |
175 LOGGER.info('Loading symbol mapping...') | 175 LOGGER.info('Loading symbol mapping...') |
176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( | 176 self._loaded_symbol_data_sources = RuntimeSymbolsInProcess.load( |
177 self._prepared_symbol_mapping_path) | 177 self._prepared_symbol_data_sources_path) |
178 return self._loaded_symbol_mapping | 178 return self._loaded_symbol_data_sources |
179 | 179 |
180 | 180 |
181 class SymbolCache(object): | 181 class SymbolFinder(object): |
182 """Manages cache of used symbol mapping. | 182 """Finds corresponding symbols from addresses. |
183 | 183 |
184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for | 184 This class does only 'find()' symbols from a specified |address_list|. |
185 examples), and "SymbolCache" just caches "how dmprof interprets the address" | 185 It is introduced to make a finder mockable. |
186 to speed-up another analysis for the same binary and profile dumps. | |
187 Handling all symbol mapping takes a long time in "SymbolMapping". | |
188 "SymbolCache" caches used symbol mapping on memory and in files. | |
189 """ | 186 """ |
190 def __init__(self, prefix): | 187 _FIND_RUNTIME_SYMBOLS_FUNCTIONS = { |
191 self._prefix = prefix | 188 FUNCTION_ADDRESS: find_runtime_symbols_list, |
192 self._symbol_cache_paths = { | 189 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, |
193 FUNCTION_ADDRESS: prefix + '.funcsym', | 190 } |
194 TYPEINFO_ADDRESS: prefix + '.typesym', | 191 |
195 } | 192 def __init__(self, address_type, symbol_data_sources): |
196 self._find_runtime_symbols_functions = { | 193 self._finder_function = self._FIND_RUNTIME_SYMBOLS_FUNCTIONS[address_type] |
197 FUNCTION_ADDRESS: find_runtime_symbols_list, | 194 self._symbol_data_sources = symbol_data_sources |
198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, | 195 |
199 } | 196 def find(self, address_list): |
200 self._symbol_caches = { | 197 return self._finder_function(self._symbol_data_sources.get(), address_list) |
| 198 |
| 199 |
| 200 class SymbolMappingCache(object): |
| 201 """Caches mapping from actually used addresses to symbols. |
| 202 |
| 203 'update()' updates the cache from the original symbol data sources via |
| 204 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. |
| 205 """ |
| 206 def __init__(self): |
| 207 self._symbol_mapping_caches = { |
201 FUNCTION_ADDRESS: {}, | 208 FUNCTION_ADDRESS: {}, |
202 TYPEINFO_ADDRESS: {}, | 209 TYPEINFO_ADDRESS: {}, |
203 } | 210 } |
204 | 211 |
205 def update(self, address_type, bucket_set, symbol_mapping): | 212 def update(self, address_type, bucket_set, symbol_finder, cache_f): |
206 """Updates symbol mapping on memory and in a ".*sym" cache file. | 213 """Updates symbol mapping cache on memory and in a symbol cache file. |
207 | 214 |
208 It reads cached symbol mapping from a ".*sym" file if it exists. Then, | 215 It reads cached symbol mapping from a symbol cache file |cache_f| if it |
209 it looks up unresolved addresses from a given "SymbolMapping". Finally, | 216 exists. Unresolved addresses are then resolved and added to the cache |
210 both symbol mappings on memory and in the ".*sym" cache file are updated. | 217 both on memory and in the symbol cache file with using 'SymbolFinder'. |
211 | 218 |
212 Symbol files are formatted as follows: | 219 A cache file is formatted as follows: |
213 <Address> <Symbol> | 220 <Address> <Symbol> |
214 <Address> <Symbol> | 221 <Address> <Symbol> |
215 <Address> <Symbol> | 222 <Address> <Symbol> |
216 ... | 223 ... |
217 | 224 |
218 Args: | 225 Args: |
219 address_type: A type of addresses to update. It should be one of | 226 address_type: A type of addresses to update. |
220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 227 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
221 bucket_set: A BucketSet object. | 228 bucket_set: A BucketSet object. |
222 symbol_mapping: A SymbolMapping object. | 229 symbol_finder: A SymbolFinder object to find symbols. |
| 230 cache_f: A readable and writable IO object of the symbol cache file. |
223 """ | 231 """ |
224 self._load(address_type) | 232 cache_f.seek(0, os.SEEK_SET) |
| 233 self._load(cache_f, address_type) |
225 | 234 |
226 unresolved_addresses = sorted( | 235 unresolved_addresses = sorted( |
227 address for address in bucket_set.iter_addresses(address_type) | 236 address for address in bucket_set.iter_addresses(address_type) |
228 if address not in self._symbol_caches[address_type]) | 237 if address not in self._symbol_mapping_caches[address_type]) |
229 | 238 |
230 if not unresolved_addresses: | 239 if not unresolved_addresses: |
231 LOGGER.info('No need to resolve any more addresses.') | 240 LOGGER.info('No need to resolve any more addresses.') |
232 return | 241 return |
233 | 242 |
234 symbol_cache_path = self._symbol_cache_paths[address_type] | 243 cache_f.seek(0, os.SEEK_END) |
235 with open(symbol_cache_path, mode='a+') as symbol_f: | 244 LOGGER.info('Loading %d unresolved addresses.' % |
236 LOGGER.info('Loading %d unresolved addresses.' % | 245 len(unresolved_addresses)) |
237 len(unresolved_addresses)) | 246 symbol_list = symbol_finder.find(unresolved_addresses) |
238 symbol_list = self._find_runtime_symbols_functions[address_type]( | |
239 symbol_mapping.get(), unresolved_addresses) | |
240 | 247 |
241 for address, symbol in zip(unresolved_addresses, symbol_list): | 248 for address, symbol in zip(unresolved_addresses, symbol_list): |
242 stripped_symbol = symbol.strip() or '??' | 249 stripped_symbol = symbol.strip() or '??' |
243 self._symbol_caches[address_type][address] = stripped_symbol | 250 self._symbol_mapping_caches[address_type][address] = stripped_symbol |
244 symbol_f.write('%x %s\n' % (address, stripped_symbol)) | 251 cache_f.write('%x %s\n' % (address, stripped_symbol)) |
245 | 252 |
246 def lookup(self, address_type, address): | 253 def lookup(self, address_type, address): |
247 """Looks up a symbol for a given |address|. | 254 """Looks up a symbol for a given |address|. |
248 | 255 |
249 Args: | 256 Args: |
250 address_type: A type of addresses to lookup. It should be one of | 257 address_type: A type of addresses to lookup. |
251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 258 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. |
252 address: An integer that represents an address. | 259 address: An integer that represents an address. |
253 | 260 |
254 Returns: | 261 Returns: |
255 A string that represents a symbol. | 262 A string that represents a symbol. |
256 """ | 263 """ |
257 return self._symbol_caches[address_type].get(address) | 264 return self._symbol_mapping_caches[address_type].get(address) |
258 | 265 |
259 def _load(self, address_type): | 266 def _load(self, cache_f, address_type): |
260 symbol_cache_path = self._symbol_cache_paths[address_type] | |
261 try: | 267 try: |
262 with open(symbol_cache_path, mode='r') as symbol_f: | 268 for line in cache_f: |
263 for line in symbol_f: | 269 items = line.rstrip().split(None, 1) |
264 items = line.rstrip().split(None, 1) | 270 if len(items) == 1: |
265 if len(items) == 1: | 271 items.append('??') |
266 items.append('??') | 272 self._symbol_mapping_caches[address_type][int(items[0], 16)] = items[1] |
267 self._symbol_caches[address_type][int(items[0], 16)] = items[1] | |
268 LOGGER.info('Loaded %d entries from symbol cache.' % | 273 LOGGER.info('Loaded %d entries from symbol cache.' % |
269 len(self._symbol_caches[address_type])) | 274 len(self._symbol_mapping_caches[address_type])) |
270 except IOError as e: | 275 except IOError as e: |
271 LOGGER.info('No valid symbol cache file is found: %s' % e) | 276 LOGGER.info('The symbol cache file is invalid: %s' % e) |
272 | 277 |
273 | 278 |
274 class Rule(object): | 279 class Rule(object): |
275 """Represents one matching rule in a policy file.""" | 280 """Represents one matching rule in a policy file.""" |
276 | 281 |
277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): | 282 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): |
278 self._name = name | 283 self._name = name |
279 self._mmap = mmap | 284 self._mmap = mmap |
280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') | 285 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') |
281 if typeinfo_pattern: | 286 if typeinfo_pattern: |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
476 self._mmap = mmap | 481 self._mmap = mmap |
477 self._typeinfo = typeinfo | 482 self._typeinfo = typeinfo |
478 self._typeinfo_name = typeinfo_name | 483 self._typeinfo_name = typeinfo_name |
479 | 484 |
480 self._symbolized_stacktrace = stacktrace | 485 self._symbolized_stacktrace = stacktrace |
481 self._symbolized_joined_stacktrace = '' | 486 self._symbolized_joined_stacktrace = '' |
482 self._symbolized_typeinfo = typeinfo_name | 487 self._symbolized_typeinfo = typeinfo_name |
483 | 488 |
484 self.component_cache = '' | 489 self.component_cache = '' |
485 | 490 |
486 def symbolize(self, symbol_cache): | 491 def symbolize(self, symbol_mapping_cache): |
487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. | 492 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. |
488 | 493 |
489 Args: | 494 Args: |
490 symbol_cache: A SymbolCache object. | 495 symbol_mapping_cache: A SymbolMappingCache object. |
491 """ | 496 """ |
492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | 497 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. |
493 self._symbolized_stacktrace = [ | 498 self._symbolized_stacktrace = [ |
494 symbol_cache.lookup(FUNCTION_ADDRESS, address) | 499 symbol_mapping_cache.lookup(FUNCTION_ADDRESS, address) |
495 for address in self._stacktrace] | 500 for address in self._stacktrace] |
496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) | 501 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) |
497 if not self._typeinfo: | 502 if not self._typeinfo: |
498 self._symbolized_typeinfo = 'no typeinfo' | 503 self._symbolized_typeinfo = 'no typeinfo' |
499 else: | 504 else: |
500 self._symbolized_typeinfo = symbol_cache.lookup( | 505 self._symbolized_typeinfo = symbol_mapping_cache.lookup( |
501 TYPEINFO_ADDRESS, self._typeinfo) | 506 TYPEINFO_ADDRESS, self._typeinfo) |
502 if not self._symbolized_typeinfo: | 507 if not self._symbolized_typeinfo: |
503 self._symbolized_typeinfo = 'no typeinfo' | 508 self._symbolized_typeinfo = 'no typeinfo' |
504 | 509 |
505 def clear_component_cache(self): | 510 def clear_component_cache(self): |
506 self.component_cache = '' | 511 self.component_cache = '' |
507 | 512 |
508 @property | 513 @property |
509 def stacktrace(self): | 514 def stacktrace(self): |
510 return self._stacktrace | 515 return self._stacktrace |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
590 def __iter__(self): | 595 def __iter__(self): |
591 for bucket_id, bucket_content in self._buckets.iteritems(): | 596 for bucket_id, bucket_content in self._buckets.iteritems(): |
592 yield bucket_id, bucket_content | 597 yield bucket_id, bucket_content |
593 | 598 |
594 def __getitem__(self, bucket_id): | 599 def __getitem__(self, bucket_id): |
595 return self._buckets[bucket_id] | 600 return self._buckets[bucket_id] |
596 | 601 |
597 def get(self, bucket_id): | 602 def get(self, bucket_id): |
598 return self._buckets.get(bucket_id) | 603 return self._buckets.get(bucket_id) |
599 | 604 |
600 def symbolize(self, symbol_cache): | 605 def symbolize(self, symbol_mapping_cache): |
601 for bucket_content in self._buckets.itervalues(): | 606 for bucket_content in self._buckets.itervalues(): |
602 bucket_content.symbolize(symbol_cache) | 607 bucket_content.symbolize(symbol_mapping_cache) |
603 | 608 |
604 def clear_component_cache(self): | 609 def clear_component_cache(self): |
605 for bucket_content in self._buckets.itervalues(): | 610 for bucket_content in self._buckets.itervalues(): |
606 bucket_content.clear_component_cache() | 611 bucket_content.clear_component_cache() |
607 | 612 |
608 def iter_addresses(self, address_type): | 613 def iter_addresses(self, address_type): |
609 for function in self._addresses[address_type]: | 614 for function in self._addresses[address_type]: |
610 yield function | 615 yield function |
611 | 616 |
612 | 617 |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
808 """Subclasses are a subcommand for this executable. | 813 """Subclasses are a subcommand for this executable. |
809 | 814 |
810 See COMMANDS in main(). | 815 See COMMANDS in main(). |
811 """ | 816 """ |
812 def __init__(self, usage): | 817 def __init__(self, usage): |
813 self._parser = optparse.OptionParser(usage) | 818 self._parser = optparse.OptionParser(usage) |
814 | 819 |
815 @staticmethod | 820 @staticmethod |
816 def load_basic_files(dump_path, multiple): | 821 def load_basic_files(dump_path, multiple): |
817 prefix = Command._find_prefix(dump_path) | 822 prefix = Command._find_prefix(dump_path) |
818 symbol_mapping = SymbolMapping(prefix) | 823 symbol_data_sources = SymbolDataSources(prefix) |
819 symbol_mapping.prepare() | 824 symbol_data_sources.prepare() |
820 bucket_set = BucketSet() | 825 bucket_set = BucketSet() |
821 bucket_set.load(prefix) | 826 bucket_set.load(prefix) |
822 if multiple: | 827 if multiple: |
823 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | 828 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) |
824 else: | 829 else: |
825 dump = Dump.load(dump_path) | 830 dump = Dump.load(dump_path) |
826 symbol_cache = SymbolCache(prefix) | 831 symbol_mapping_cache = SymbolMappingCache() |
827 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) | 832 with open(prefix + '.funcsym', 'a+') as cache_f: |
828 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) | 833 symbol_mapping_cache.update( |
829 bucket_set.symbolize(symbol_cache) | 834 FUNCTION_ADDRESS, bucket_set, |
| 835 SymbolFinder(FUNCTION_ADDRESS, symbol_data_sources), cache_f) |
| 836 with open(prefix + '.typesym', 'a+') as cache_f: |
| 837 symbol_mapping_cache.update( |
| 838 TYPEINFO_ADDRESS, bucket_set, |
| 839 SymbolFinder(TYPEINFO_ADDRESS, symbol_data_sources), cache_f) |
| 840 bucket_set.symbolize(symbol_mapping_cache) |
830 if multiple: | 841 if multiple: |
831 return (bucket_set, dump_list) | 842 return (bucket_set, dump_list) |
832 else: | 843 else: |
833 return (bucket_set, dump) | 844 return (bucket_set, dump) |
834 | 845 |
835 @staticmethod | 846 @staticmethod |
836 def _find_prefix(path): | 847 def _find_prefix(path): |
837 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | 848 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
838 | 849 |
839 @staticmethod | 850 @staticmethod |
(...skipping 491 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1331 errorcode = COMMANDS[action]().do(sys.argv) | 1342 errorcode = COMMANDS[action]().do(sys.argv) |
1332 except ParsingException, e: | 1343 except ParsingException, e: |
1333 errorcode = 1 | 1344 errorcode = 1 |
1334 sys.stderr.write('Exit by parsing error: %s\n' % e) | 1345 sys.stderr.write('Exit by parsing error: %s\n' % e) |
1335 | 1346 |
1336 return errorcode | 1347 return errorcode |
1337 | 1348 |
1338 | 1349 |
1339 if __name__ == '__main__': | 1350 if __name__ == '__main__': |
1340 sys.exit(main()) | 1351 sys.exit(main()) |
OLD | NEW |