Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(342)

Side by Side Diff: tools/deep_memory_profiler/dmprof.py

Issue 11418130: Add a test for SymbolMappingCache in tools/deep_memory_profiler/dmprof.py. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fixed Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/deep_memory_profiler/tests/dmprof_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """The deep heap profiler script for Chrome.""" 5 """The deep heap profiler script for Chrome."""
6 6
7 from datetime import datetime 7 from datetime import datetime
8 import json 8 import json
9 import logging 9 import logging
10 import optparse 10 import optparse
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 boolean value which is True if found a line which skipping_condition 113 boolean value which is True if found a line which skipping_condition
114 is False for. 114 is False for.
115 """ 115 """
116 while skipping_condition(index): 116 while skipping_condition(index):
117 index += 1 117 index += 1
118 if index >= max_index: 118 if index >= max_index:
119 return index, False 119 return index, False
120 return index, True 120 return index, True
121 121
122 122
123 class SymbolMapping(object): 123 class SymbolDataSources(object):
124 """Manages all symbol information on process memory mapping. 124 """Manages symbol data sources in a process.
125 125
126 The symbol information consists of all symbols in the binary files obtained 126 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, 127 so on. They are collected into a directory '|prefix|.symmap' from the binary
128 nm and so on. It is minimum requisite information to run dmprof. 128 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
129 129
130 The information is prepared in a directory "|prefix|.symmap" by prepare(). 130 Binaries are not mandatory to profile. The prepared data sources work in
131 The directory is more portable than Chromium binaries. Users can save it 131 place of the binary even if the binary has been overwritten with another
132 and re-analyze with the portable one. 132 binary.
133 133
134 Note that loading the symbol information takes a long time. It is very big 134 Note that loading the symbol data sources takes a long time. They are often
135 in general -- it doesn't know which functions are called and which types are 135 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
136 used actually. Used symbols can be cached in the "SymbolCache" class. 136 which caches actually used symbols.
137 """ 137 """
138 def __init__(self, prefix): 138 def __init__(self, prefix):
139 self._prefix = prefix 139 self._prefix = prefix
140 self._prepared_symbol_mapping_path = None 140 self._prepared_symbol_data_sources_path = None
141 self._loaded_symbol_mapping = None 141 self._loaded_symbol_data_sources = None
142 142
143 def prepare(self): 143 def prepare(self):
144 """Extracts symbol mapping from binaries and prepares it to use. 144 """Prepares symbol data sources by extracting mapping from a binary.
145 145
146 The symbol mapping is stored in a directory whose name is stored in 146 The prepared symbol data sources are stored in a directory. The directory
147 |self._prepared_symbol_mapping_path|. 147 name is stored in |self._prepared_symbol_data_sources_path|.
148 148
149 Returns: 149 Returns:
150 True if succeeded. 150 True if succeeded.
151 """ 151 """
152 LOGGER.info('Preparing symbol mapping...') 152 LOGGER.info('Preparing symbol mapping...')
153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( 153 self._prepared_symbol_data_sources_path, used_tempdir = prepare_symbol_info(
154 self._prefix + '.maps', self._prefix + '.symmap', True) 154 self._prefix + '.maps', self._prefix + '.symmap', True)
155 if self._prepared_symbol_mapping_path: 155 if self._prepared_symbol_data_sources_path:
156 LOGGER.info(' Prepared symbol mapping.') 156 LOGGER.info(' Prepared symbol mapping.')
157 if used_tempdir: 157 if used_tempdir:
158 LOGGER.warn(' Using a temporary directory for symbol mapping.') 158 LOGGER.warn(' Using a temporary directory for symbol mapping.')
159 LOGGER.warn(' Delete it by yourself.') 159 LOGGER.warn(' Delete it by yourself.')
160 LOGGER.warn(' Or, move the directory by yourself to use it later.') 160 LOGGER.warn(' Or, move the directory by yourself to use it later.')
161 return True 161 return True
162 else: 162 else:
163 LOGGER.warn(' Failed to prepare symbol mapping.') 163 LOGGER.warn(' Failed to prepare symbol mapping.')
164 return False 164 return False
165 165
166 def get(self): 166 def get(self):
167 """Returns symbol mapping. 167 """Returns the prepared symbol data sources.
168 168
169 Returns: 169 Returns:
170 Loaded symbol mapping. None if failed. 170 The prepared symbol data sources. None if failed.
171 """ 171 """
172 if not self._prepared_symbol_mapping_path and not self.prepare(): 172 if not self._prepared_symbol_data_sources_path and not self.prepare():
173 return None 173 return None
174 if not self._loaded_symbol_mapping: 174 if not self._loaded_symbol_data_sources:
175 LOGGER.info('Loading symbol mapping...') 175 LOGGER.info('Loading symbol mapping...')
176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( 176 self._loaded_symbol_data_sources = RuntimeSymbolsInProcess.load(
177 self._prepared_symbol_mapping_path) 177 self._prepared_symbol_data_sources_path)
178 return self._loaded_symbol_mapping 178 return self._loaded_symbol_data_sources
179 179
180 180
181 class SymbolCache(object): 181 class SymbolFinder(object):
182 """Manages cache of used symbol mapping. 182 """Finds corresponding symbols from addresses.
183 183
184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for 184 This class does only 'find()' symbols from a specified |address_list|.
185 examples), and "SymbolCache" just caches "how dmprof interprets the address" 185 It is introduced to make a finder mockable.
186 to speed-up another analysis for the same binary and profile dumps.
187 Handling all symbol mapping takes a long time in "SymbolMapping".
188 "SymbolCache" caches used symbol mapping on memory and in files.
189 """ 186 """
190 def __init__(self, prefix): 187 _FIND_RUNTIME_SYMBOLS_FUNCTIONS = {
191 self._prefix = prefix 188 FUNCTION_ADDRESS: find_runtime_symbols_list,
192 self._symbol_cache_paths = { 189 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list,
193 FUNCTION_ADDRESS: prefix + '.funcsym', 190 }
194 TYPEINFO_ADDRESS: prefix + '.typesym', 191
195 } 192 def __init__(self, address_type, symbol_data_sources):
196 self._find_runtime_symbols_functions = { 193 self._finder_function = self._FIND_RUNTIME_SYMBOLS_FUNCTIONS[address_type]
197 FUNCTION_ADDRESS: find_runtime_symbols_list, 194 self._symbol_data_sources = symbol_data_sources
198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, 195
199 } 196 def find(self, address_list):
200 self._symbol_caches = { 197 return self._finder_function(self._symbol_data_sources.get(), address_list)
198
199
200 class SymbolMappingCache(object):
201 """Caches mapping from actually used addresses to symbols.
202
203 'update()' updates the cache from the original symbol data sources via
204 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.
205 """
206 def __init__(self):
207 self._symbol_mapping_caches = {
201 FUNCTION_ADDRESS: {}, 208 FUNCTION_ADDRESS: {},
202 TYPEINFO_ADDRESS: {}, 209 TYPEINFO_ADDRESS: {},
203 } 210 }
204 211
205 def update(self, address_type, bucket_set, symbol_mapping): 212 def update(self, address_type, bucket_set, symbol_finder, cache_f):
206 """Updates symbol mapping on memory and in a ".*sym" cache file. 213 """Updates symbol mapping cache on memory and in a symbol cache file.
207 214
208 It reads cached symbol mapping from a ".*sym" file if it exists. Then, 215 It reads cached symbol mapping from a symbol cache file |cache_f| if it
209 it looks up unresolved addresses from a given "SymbolMapping". Finally, 216 exists. Unresolved addresses are then resolved and added to the cache
210 both symbol mappings on memory and in the ".*sym" cache file are updated. 217 both on memory and in the symbol cache file with using 'SymbolFinder'.
211 218
212 Symbol files are formatted as follows: 219 A cache file is formatted as follows:
213 <Address> <Symbol> 220 <Address> <Symbol>
214 <Address> <Symbol> 221 <Address> <Symbol>
215 <Address> <Symbol> 222 <Address> <Symbol>
216 ... 223 ...
217 224
218 Args: 225 Args:
219 address_type: A type of addresses to update. It should be one of 226 address_type: A type of addresses to update.
220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. 227 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
221 bucket_set: A BucketSet object. 228 bucket_set: A BucketSet object.
222 symbol_mapping: A SymbolMapping object. 229 symbol_finder: A SymbolFinder object to find symbols.
230 cache_f: A readable and writable IO object of the symbol cache file.
223 """ 231 """
224 self._load(address_type) 232 cache_f.seek(0, os.SEEK_SET)
233 self._load(cache_f, address_type)
225 234
226 unresolved_addresses = sorted( 235 unresolved_addresses = sorted(
227 address for address in bucket_set.iter_addresses(address_type) 236 address for address in bucket_set.iter_addresses(address_type)
228 if address not in self._symbol_caches[address_type]) 237 if address not in self._symbol_mapping_caches[address_type])
229 238
230 if not unresolved_addresses: 239 if not unresolved_addresses:
231 LOGGER.info('No need to resolve any more addresses.') 240 LOGGER.info('No need to resolve any more addresses.')
232 return 241 return
233 242
234 symbol_cache_path = self._symbol_cache_paths[address_type] 243 cache_f.seek(0, os.SEEK_END)
235 with open(symbol_cache_path, mode='a+') as symbol_f: 244 LOGGER.info('Loading %d unresolved addresses.' %
236 LOGGER.info('Loading %d unresolved addresses.' % 245 len(unresolved_addresses))
237 len(unresolved_addresses)) 246 symbol_list = symbol_finder.find(unresolved_addresses)
238 symbol_list = self._find_runtime_symbols_functions[address_type](
239 symbol_mapping.get(), unresolved_addresses)
240 247
241 for address, symbol in zip(unresolved_addresses, symbol_list): 248 for address, symbol in zip(unresolved_addresses, symbol_list):
242 stripped_symbol = symbol.strip() or '??' 249 stripped_symbol = symbol.strip() or '??'
243 self._symbol_caches[address_type][address] = stripped_symbol 250 self._symbol_mapping_caches[address_type][address] = stripped_symbol
244 symbol_f.write('%x %s\n' % (address, stripped_symbol)) 251 cache_f.write('%x %s\n' % (address, stripped_symbol))
245 252
246 def lookup(self, address_type, address): 253 def lookup(self, address_type, address):
247 """Looks up a symbol for a given |address|. 254 """Looks up a symbol for a given |address|.
248 255
249 Args: 256 Args:
250 address_type: A type of addresses to lookup. It should be one of 257 address_type: A type of addresses to lookup.
251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. 258 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
252 address: An integer that represents an address. 259 address: An integer that represents an address.
253 260
254 Returns: 261 Returns:
255 A string that represents a symbol. 262 A string that represents a symbol.
256 """ 263 """
257 return self._symbol_caches[address_type].get(address) 264 return self._symbol_mapping_caches[address_type].get(address)
258 265
259 def _load(self, address_type): 266 def _load(self, cache_f, address_type):
260 symbol_cache_path = self._symbol_cache_paths[address_type]
261 try: 267 try:
262 with open(symbol_cache_path, mode='r') as symbol_f: 268 for line in cache_f:
263 for line in symbol_f: 269 items = line.rstrip().split(None, 1)
264 items = line.rstrip().split(None, 1) 270 if len(items) == 1:
265 if len(items) == 1: 271 items.append('??')
266 items.append('??') 272 self._symbol_mapping_caches[address_type][int(items[0], 16)] = items[1]
267 self._symbol_caches[address_type][int(items[0], 16)] = items[1]
268 LOGGER.info('Loaded %d entries from symbol cache.' % 273 LOGGER.info('Loaded %d entries from symbol cache.' %
269 len(self._symbol_caches[address_type])) 274 len(self._symbol_mapping_caches[address_type]))
270 except IOError as e: 275 except IOError as e:
271 LOGGER.info('No valid symbol cache file is found: %s' % e) 276 LOGGER.info('The symbol cache file is invalid: %s' % e)
272 277
273 278
274 class Rule(object): 279 class Rule(object):
275 """Represents one matching rule in a policy file.""" 280 """Represents one matching rule in a policy file."""
276 281
277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): 282 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None):
278 self._name = name 283 self._name = name
279 self._mmap = mmap 284 self._mmap = mmap
280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') 285 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z')
281 if typeinfo_pattern: 286 if typeinfo_pattern:
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 self._mmap = mmap 481 self._mmap = mmap
477 self._typeinfo = typeinfo 482 self._typeinfo = typeinfo
478 self._typeinfo_name = typeinfo_name 483 self._typeinfo_name = typeinfo_name
479 484
480 self._symbolized_stacktrace = stacktrace 485 self._symbolized_stacktrace = stacktrace
481 self._symbolized_joined_stacktrace = '' 486 self._symbolized_joined_stacktrace = ''
482 self._symbolized_typeinfo = typeinfo_name 487 self._symbolized_typeinfo = typeinfo_name
483 488
484 self.component_cache = '' 489 self.component_cache = ''
485 490
486 def symbolize(self, symbol_cache): 491 def symbolize(self, symbol_mapping_cache):
487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. 492 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|.
488 493
489 Args: 494 Args:
490 symbol_cache: A SymbolCache object. 495 symbol_mapping_cache: A SymbolMappingCache object.
491 """ 496 """
492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. 497 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
493 self._symbolized_stacktrace = [ 498 self._symbolized_stacktrace = [
494 symbol_cache.lookup(FUNCTION_ADDRESS, address) 499 symbol_mapping_cache.lookup(FUNCTION_ADDRESS, address)
495 for address in self._stacktrace] 500 for address in self._stacktrace]
496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) 501 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace)
497 if not self._typeinfo: 502 if not self._typeinfo:
498 self._symbolized_typeinfo = 'no typeinfo' 503 self._symbolized_typeinfo = 'no typeinfo'
499 else: 504 else:
500 self._symbolized_typeinfo = symbol_cache.lookup( 505 self._symbolized_typeinfo = symbol_mapping_cache.lookup(
501 TYPEINFO_ADDRESS, self._typeinfo) 506 TYPEINFO_ADDRESS, self._typeinfo)
502 if not self._symbolized_typeinfo: 507 if not self._symbolized_typeinfo:
503 self._symbolized_typeinfo = 'no typeinfo' 508 self._symbolized_typeinfo = 'no typeinfo'
504 509
505 def clear_component_cache(self): 510 def clear_component_cache(self):
506 self.component_cache = '' 511 self.component_cache = ''
507 512
508 @property 513 @property
509 def stacktrace(self): 514 def stacktrace(self):
510 return self._stacktrace 515 return self._stacktrace
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
590 def __iter__(self): 595 def __iter__(self):
591 for bucket_id, bucket_content in self._buckets.iteritems(): 596 for bucket_id, bucket_content in self._buckets.iteritems():
592 yield bucket_id, bucket_content 597 yield bucket_id, bucket_content
593 598
594 def __getitem__(self, bucket_id): 599 def __getitem__(self, bucket_id):
595 return self._buckets[bucket_id] 600 return self._buckets[bucket_id]
596 601
597 def get(self, bucket_id): 602 def get(self, bucket_id):
598 return self._buckets.get(bucket_id) 603 return self._buckets.get(bucket_id)
599 604
600 def symbolize(self, symbol_cache): 605 def symbolize(self, symbol_mapping_cache):
601 for bucket_content in self._buckets.itervalues(): 606 for bucket_content in self._buckets.itervalues():
602 bucket_content.symbolize(symbol_cache) 607 bucket_content.symbolize(symbol_mapping_cache)
603 608
604 def clear_component_cache(self): 609 def clear_component_cache(self):
605 for bucket_content in self._buckets.itervalues(): 610 for bucket_content in self._buckets.itervalues():
606 bucket_content.clear_component_cache() 611 bucket_content.clear_component_cache()
607 612
608 def iter_addresses(self, address_type): 613 def iter_addresses(self, address_type):
609 for function in self._addresses[address_type]: 614 for function in self._addresses[address_type]:
610 yield function 615 yield function
611 616
612 617
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
808 """Subclasses are a subcommand for this executable. 813 """Subclasses are a subcommand for this executable.
809 814
810 See COMMANDS in main(). 815 See COMMANDS in main().
811 """ 816 """
812 def __init__(self, usage): 817 def __init__(self, usage):
813 self._parser = optparse.OptionParser(usage) 818 self._parser = optparse.OptionParser(usage)
814 819
815 @staticmethod 820 @staticmethod
816 def load_basic_files(dump_path, multiple): 821 def load_basic_files(dump_path, multiple):
817 prefix = Command._find_prefix(dump_path) 822 prefix = Command._find_prefix(dump_path)
818 symbol_mapping = SymbolMapping(prefix) 823 symbol_data_sources = SymbolDataSources(prefix)
819 symbol_mapping.prepare() 824 symbol_data_sources.prepare()
820 bucket_set = BucketSet() 825 bucket_set = BucketSet()
821 bucket_set.load(prefix) 826 bucket_set.load(prefix)
822 if multiple: 827 if multiple:
823 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) 828 dump_list = DumpList.load(Command._find_all_dumps(dump_path))
824 else: 829 else:
825 dump = Dump.load(dump_path) 830 dump = Dump.load(dump_path)
826 symbol_cache = SymbolCache(prefix) 831 symbol_mapping_cache = SymbolMappingCache()
827 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) 832 with open(prefix + '.funcsym', 'a+') as cache_f:
828 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) 833 symbol_mapping_cache.update(
829 bucket_set.symbolize(symbol_cache) 834 FUNCTION_ADDRESS, bucket_set,
835 SymbolFinder(FUNCTION_ADDRESS, symbol_data_sources), cache_f)
836 with open(prefix + '.typesym', 'a+') as cache_f:
837 symbol_mapping_cache.update(
838 TYPEINFO_ADDRESS, bucket_set,
839 SymbolFinder(TYPEINFO_ADDRESS, symbol_data_sources), cache_f)
840 bucket_set.symbolize(symbol_mapping_cache)
830 if multiple: 841 if multiple:
831 return (bucket_set, dump_list) 842 return (bucket_set, dump_list)
832 else: 843 else:
833 return (bucket_set, dump) 844 return (bucket_set, dump)
834 845
835 @staticmethod 846 @staticmethod
836 def _find_prefix(path): 847 def _find_prefix(path):
837 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) 848 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
838 849
839 @staticmethod 850 @staticmethod
(...skipping 491 matching lines...) Expand 10 before | Expand all | Expand 10 after
1331 errorcode = COMMANDS[action]().do(sys.argv) 1342 errorcode = COMMANDS[action]().do(sys.argv)
1332 except ParsingException, e: 1343 except ParsingException, e:
1333 errorcode = 1 1344 errorcode = 1
1334 sys.stderr.write('Exit by parsing error: %s\n' % e) 1345 sys.stderr.write('Exit by parsing error: %s\n' % e)
1335 1346
1336 return errorcode 1347 return errorcode
1337 1348
1338 1349
1339 if __name__ == '__main__': 1350 if __name__ == '__main__':
1340 sys.exit(main()) 1351 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/deep_memory_profiler/tests/dmprof_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698