OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """The deep heap profiler script for Chrome.""" | 5 """The deep heap profiler script for Chrome.""" |
6 | 6 |
7 from datetime import datetime | 7 import datetime |
8 import json | 8 import json |
9 import logging | 9 import logging |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
12 import re | 12 import re |
13 import subprocess | 13 import subprocess |
14 import sys | 14 import sys |
15 import tempfile | 15 import tempfile |
16 import zipfile | 16 import zipfile |
17 | 17 |
18 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | 18 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) |
19 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | 19 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( |
20 BASE_PATH, os.pardir, 'find_runtime_symbols') | 20 BASE_PATH, os.pardir, 'find_runtime_symbols') |
21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | 21 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) |
22 | 22 |
23 from find_runtime_symbols import find_runtime_symbols_list | 23 import find_runtime_symbols |
24 from find_runtime_symbols import find_runtime_typeinfo_symbols_list | 24 import prepare_symbol_info |
25 from find_runtime_symbols import RuntimeSymbolsInProcess | 25 |
26 from prepare_symbol_info import prepare_symbol_info | 26 from find_runtime_symbols import FUNCTION_SYMBOLS |
| 27 from find_runtime_symbols import SOURCEFILE_SYMBOLS |
| 28 from find_runtime_symbols import TYPEINFO_SYMBOLS |
27 | 29 |
28 BUCKET_ID = 5 | 30 BUCKET_ID = 5 |
29 VIRTUAL = 0 | 31 VIRTUAL = 0 |
30 COMMITTED = 1 | 32 COMMITTED = 1 |
31 ALLOC_COUNT = 2 | 33 ALLOC_COUNT = 2 |
32 FREE_COUNT = 3 | 34 FREE_COUNT = 3 |
33 NULL_REGEX = re.compile('') | 35 NULL_REGEX = re.compile('') |
34 | 36 |
35 LOGGER = logging.getLogger('dmprof') | 37 LOGGER = logging.getLogger('dmprof') |
36 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') | 38 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') |
37 FUNCTION_ADDRESS = 'function' | |
38 TYPEINFO_ADDRESS = 'typeinfo' | |
39 | 39 |
40 | 40 |
41 # Heap Profile Dump versions | 41 # Heap Profile Dump versions |
42 | 42 |
43 # DUMP_DEEP_[1-4] are obsolete. | 43 # DUMP_DEEP_[1-4] are obsolete. |
44 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | 44 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
45 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | 45 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
46 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | 46 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
47 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | 47 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
48 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | 48 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
148 def prepare(self): | 148 def prepare(self): |
149 """Prepares symbol data sources by extracting mapping from a binary. | 149 """Prepares symbol data sources by extracting mapping from a binary. |
150 | 150 |
151 The prepared symbol data sources are stored in a directory. The directory | 151 The prepared symbol data sources are stored in a directory. The directory |
152 name is stored in |self._prepared_symbol_data_sources_path|. | 152 name is stored in |self._prepared_symbol_data_sources_path|. |
153 | 153 |
154 Returns: | 154 Returns: |
155 True if succeeded. | 155 True if succeeded. |
156 """ | 156 """ |
157 LOGGER.info('Preparing symbol mapping...') | 157 LOGGER.info('Preparing symbol mapping...') |
158 self._prepared_symbol_data_sources_path, used_tempdir = prepare_symbol_info( | 158 self._prepared_symbol_data_sources_path, used_tempdir = ( |
159 self._prefix + '.maps', self._prefix + '.symmap', True) | 159 prepare_symbol_info.prepare_symbol_info( |
| 160 self._prefix + '.maps', |
| 161 output_dir_path=self._prefix + '.symmap', |
| 162 use_tempdir=True, |
| 163 use_source_file_name=True)) |
160 if self._prepared_symbol_data_sources_path: | 164 if self._prepared_symbol_data_sources_path: |
161 LOGGER.info(' Prepared symbol mapping.') | 165 LOGGER.info(' Prepared symbol mapping.') |
162 if used_tempdir: | 166 if used_tempdir: |
163 LOGGER.warn(' Using a temporary directory for symbol mapping.') | 167 LOGGER.warn(' Using a temporary directory for symbol mapping.') |
164 LOGGER.warn(' Delete it by yourself.') | 168 LOGGER.warn(' Delete it by yourself.') |
165 LOGGER.warn(' Or, move the directory by yourself to use it later.') | 169 LOGGER.warn(' Or, move the directory by yourself to use it later.') |
166 return True | 170 return True |
167 else: | 171 else: |
168 LOGGER.warn(' Failed to prepare symbol mapping.') | 172 LOGGER.warn(' Failed to prepare symbol mapping.') |
169 return False | 173 return False |
170 | 174 |
171 def get(self): | 175 def get(self): |
172 """Returns the prepared symbol data sources. | 176 """Returns the prepared symbol data sources. |
173 | 177 |
174 Returns: | 178 Returns: |
175 The prepared symbol data sources. None if failed. | 179 The prepared symbol data sources. None if failed. |
176 """ | 180 """ |
177 if not self._prepared_symbol_data_sources_path and not self.prepare(): | 181 if not self._prepared_symbol_data_sources_path and not self.prepare(): |
178 return None | 182 return None |
179 if not self._loaded_symbol_data_sources: | 183 if not self._loaded_symbol_data_sources: |
180 LOGGER.info('Loading symbol mapping...') | 184 LOGGER.info('Loading symbol mapping...') |
181 self._loaded_symbol_data_sources = RuntimeSymbolsInProcess.load( | 185 self._loaded_symbol_data_sources = ( |
182 self._prepared_symbol_data_sources_path) | 186 find_runtime_symbols.RuntimeSymbolsInProcess.load( |
| 187 self._prepared_symbol_data_sources_path)) |
183 return self._loaded_symbol_data_sources | 188 return self._loaded_symbol_data_sources |
184 | 189 |
185 def path(self): | 190 def path(self): |
186 """Returns the path of the prepared symbol data sources if possible.""" | 191 """Returns the path of the prepared symbol data sources if possible.""" |
187 if not self._prepared_symbol_data_sources_path and not self.prepare(): | 192 if not self._prepared_symbol_data_sources_path and not self.prepare(): |
188 return None | 193 return None |
189 return self._prepared_symbol_data_sources_path | 194 return self._prepared_symbol_data_sources_path |
190 | 195 |
191 | 196 |
192 class SymbolFinder(object): | 197 class SymbolFinder(object): |
193 """Finds corresponding symbols from addresses. | 198 """Finds corresponding symbols from addresses. |
194 | 199 |
195 This class does only 'find()' symbols from a specified |address_list|. | 200 This class does only 'find()' symbols from a specified |address_list|. |
196 It is introduced to make a finder mockable. | 201 It is introduced to make a finder mockable. |
197 """ | 202 """ |
198 _FIND_RUNTIME_SYMBOLS_FUNCTIONS = { | 203 def __init__(self, symbol_type, symbol_data_sources): |
199 FUNCTION_ADDRESS: find_runtime_symbols_list, | 204 self._symbol_type = symbol_type |
200 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, | |
201 } | |
202 | |
203 def __init__(self, address_type, symbol_data_sources): | |
204 self._finder_function = self._FIND_RUNTIME_SYMBOLS_FUNCTIONS[address_type] | |
205 self._symbol_data_sources = symbol_data_sources | 205 self._symbol_data_sources = symbol_data_sources |
206 | 206 |
207 def find(self, address_list): | 207 def find(self, address_list): |
208 return self._finder_function(self._symbol_data_sources.get(), address_list) | 208 return find_runtime_symbols.find_runtime_symbols( |
| 209 self._symbol_type, self._symbol_data_sources.get(), address_list) |
209 | 210 |
210 | 211 |
211 class SymbolMappingCache(object): | 212 class SymbolMappingCache(object): |
212 """Caches mapping from actually used addresses to symbols. | 213 """Caches mapping from actually used addresses to symbols. |
213 | 214 |
214 'update()' updates the cache from the original symbol data sources via | 215 'update()' updates the cache from the original symbol data sources via |
215 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. | 216 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. |
216 """ | 217 """ |
217 def __init__(self): | 218 def __init__(self): |
218 self._symbol_mapping_caches = { | 219 self._symbol_mapping_caches = { |
219 FUNCTION_ADDRESS: {}, | 220 FUNCTION_SYMBOLS: {}, |
220 TYPEINFO_ADDRESS: {}, | 221 SOURCEFILE_SYMBOLS: {}, |
| 222 TYPEINFO_SYMBOLS: {}, |
221 } | 223 } |
222 | 224 |
223 def update(self, address_type, bucket_set, symbol_finder, cache_f): | 225 def update(self, symbol_type, bucket_set, symbol_finder, cache_f): |
224 """Updates symbol mapping cache on memory and in a symbol cache file. | 226 """Updates symbol mapping cache on memory and in a symbol cache file. |
225 | 227 |
226 It reads cached symbol mapping from a symbol cache file |cache_f| if it | 228 It reads cached symbol mapping from a symbol cache file |cache_f| if it |
227 exists. Unresolved addresses are then resolved and added to the cache | 229 exists. Unresolved addresses are then resolved and added to the cache |
228 both on memory and in the symbol cache file with using 'SymbolFinder'. | 230 both on memory and in the symbol cache file with using 'SymbolFinder'. |
229 | 231 |
230 A cache file is formatted as follows: | 232 A cache file is formatted as follows: |
231 <Address> <Symbol> | 233 <Address> <Symbol> |
232 <Address> <Symbol> | 234 <Address> <Symbol> |
233 <Address> <Symbol> | 235 <Address> <Symbol> |
234 ... | 236 ... |
235 | 237 |
236 Args: | 238 Args: |
237 address_type: A type of addresses to update. | 239 symbol_type: A type of symbols to update. It should be one of |
238 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 240 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. |
239 bucket_set: A BucketSet object. | 241 bucket_set: A BucketSet object. |
240 symbol_finder: A SymbolFinder object to find symbols. | 242 symbol_finder: A SymbolFinder object to find symbols. |
241 cache_f: A readable and writable IO object of the symbol cache file. | 243 cache_f: A readable and writable IO object of the symbol cache file. |
242 """ | 244 """ |
243 cache_f.seek(0, os.SEEK_SET) | 245 cache_f.seek(0, os.SEEK_SET) |
244 self._load(cache_f, address_type) | 246 self._load(cache_f, symbol_type) |
245 | 247 |
246 unresolved_addresses = sorted( | 248 unresolved_addresses = sorted( |
247 address for address in bucket_set.iter_addresses(address_type) | 249 address for address in bucket_set.iter_addresses(symbol_type) |
248 if address not in self._symbol_mapping_caches[address_type]) | 250 if address not in self._symbol_mapping_caches[symbol_type]) |
249 | 251 |
250 if not unresolved_addresses: | 252 if not unresolved_addresses: |
251 LOGGER.info('No need to resolve any more addresses.') | 253 LOGGER.info('No need to resolve any more addresses.') |
252 return | 254 return |
253 | 255 |
254 cache_f.seek(0, os.SEEK_END) | 256 cache_f.seek(0, os.SEEK_END) |
255 LOGGER.info('Loading %d unresolved addresses.' % | 257 LOGGER.info('Loading %d unresolved addresses.' % |
256 len(unresolved_addresses)) | 258 len(unresolved_addresses)) |
257 symbol_list = symbol_finder.find(unresolved_addresses) | 259 symbol_dict = symbol_finder.find(unresolved_addresses) |
258 | 260 |
259 for address, symbol in zip(unresolved_addresses, symbol_list): | 261 for address, symbol in symbol_dict.iteritems(): |
260 stripped_symbol = symbol.strip() or '??' | 262 stripped_symbol = symbol.strip() or '?' |
261 self._symbol_mapping_caches[address_type][address] = stripped_symbol | 263 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol |
262 cache_f.write('%x %s\n' % (address, stripped_symbol)) | 264 cache_f.write('%x %s\n' % (address, stripped_symbol)) |
263 | 265 |
264 def lookup(self, address_type, address): | 266 def lookup(self, symbol_type, address): |
265 """Looks up a symbol for a given |address|. | 267 """Looks up a symbol for a given |address|. |
266 | 268 |
267 Args: | 269 Args: |
268 address_type: A type of addresses to lookup. | 270 symbol_type: A type of symbols to update. It should be one of |
269 It should be one of FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | 271 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. |
270 address: An integer that represents an address. | 272 address: An integer that represents an address. |
271 | 273 |
272 Returns: | 274 Returns: |
273 A string that represents a symbol. | 275 A string that represents a symbol. |
274 """ | 276 """ |
275 return self._symbol_mapping_caches[address_type].get(address) | 277 return self._symbol_mapping_caches[symbol_type].get(address) |
276 | 278 |
277 def _load(self, cache_f, address_type): | 279 def _load(self, cache_f, symbol_type): |
278 try: | 280 try: |
279 for line in cache_f: | 281 for line in cache_f: |
280 items = line.rstrip().split(None, 1) | 282 items = line.rstrip().split(None, 1) |
281 if len(items) == 1: | 283 if len(items) == 1: |
282 items.append('??') | 284 items.append('??') |
283 self._symbol_mapping_caches[address_type][int(items[0], 16)] = items[1] | 285 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] |
284 LOGGER.info('Loaded %d entries from symbol cache.' % | 286 LOGGER.info('Loaded %d entries from symbol cache.' % |
285 len(self._symbol_mapping_caches[address_type])) | 287 len(self._symbol_mapping_caches[symbol_type])) |
286 except IOError as e: | 288 except IOError as e: |
287 LOGGER.info('The symbol cache file is invalid: %s' % e) | 289 LOGGER.info('The symbol cache file is invalid: %s' % e) |
288 | 290 |
289 | 291 |
290 class Rule(object): | 292 class Rule(object): |
291 """Represents one matching rule in a policy file.""" | 293 """Represents one matching rule in a policy file.""" |
292 | 294 |
293 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): | 295 def __init__(self, |
| 296 name, |
| 297 mmap, |
| 298 stackfunction_pattern=None, |
| 299 stacksourcefile_pattern=None, |
| 300 typeinfo_pattern=None): |
294 self._name = name | 301 self._name = name |
295 self._mmap = mmap | 302 self._mmap = mmap |
296 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') | 303 |
| 304 self._stackfunction_pattern = None |
| 305 if stackfunction_pattern: |
| 306 self._stackfunction_pattern = re.compile( |
| 307 stackfunction_pattern + r'\Z') |
| 308 |
| 309 self._stacksourcefile_pattern = None |
| 310 if stacksourcefile_pattern: |
| 311 self._stacksourcefile_pattern = re.compile( |
| 312 stacksourcefile_pattern + r'\Z') |
| 313 |
| 314 self._typeinfo_pattern = None |
297 if typeinfo_pattern: | 315 if typeinfo_pattern: |
298 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') | 316 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') |
299 else: | |
300 self._typeinfo_pattern = None | |
301 | 317 |
302 @property | 318 @property |
303 def name(self): | 319 def name(self): |
304 return self._name | 320 return self._name |
305 | 321 |
306 @property | 322 @property |
307 def mmap(self): | 323 def mmap(self): |
308 return self._mmap | 324 return self._mmap |
309 | 325 |
310 @property | 326 @property |
311 def stacktrace_pattern(self): | 327 def stackfunction_pattern(self): |
312 return self._stacktrace_pattern | 328 return self._stackfunction_pattern |
| 329 |
| 330 @property |
| 331 def stacksourcefile_pattern(self): |
| 332 return self._stacksourcefile_pattern |
313 | 333 |
314 @property | 334 @property |
315 def typeinfo_pattern(self): | 335 def typeinfo_pattern(self): |
316 return self._typeinfo_pattern | 336 return self._typeinfo_pattern |
317 | 337 |
318 | 338 |
319 class Policy(object): | 339 class Policy(object): |
320 """Represents a policy, a content of a policy file.""" | 340 """Represents a policy, a content of a policy file.""" |
321 | 341 |
322 def __init__(self, rules, version, components): | 342 def __init__(self, rules, version, components): |
(...skipping 20 matching lines...) Expand all Loading... |
343 bucket: A Bucket object to be searched for. | 363 bucket: A Bucket object to be searched for. |
344 | 364 |
345 Returns: | 365 Returns: |
346 A string representing a component name. | 366 A string representing a component name. |
347 """ | 367 """ |
348 if not bucket: | 368 if not bucket: |
349 return 'no-bucket' | 369 return 'no-bucket' |
350 if bucket.component_cache: | 370 if bucket.component_cache: |
351 return bucket.component_cache | 371 return bucket.component_cache |
352 | 372 |
353 stacktrace = bucket.symbolized_joined_stacktrace | 373 stackfunction = bucket.symbolized_joined_stackfunction |
| 374 stacksourcefile = bucket.symbolized_joined_stacksourcefile |
354 typeinfo = bucket.symbolized_typeinfo | 375 typeinfo = bucket.symbolized_typeinfo |
355 if typeinfo.startswith('0x'): | 376 if typeinfo.startswith('0x'): |
356 typeinfo = bucket.typeinfo_name | 377 typeinfo = bucket.typeinfo_name |
357 | 378 |
358 for rule in self._rules: | 379 for rule in self._rules: |
359 if (bucket.mmap == rule.mmap and | 380 if (bucket.mmap == rule.mmap and |
360 rule.stacktrace_pattern.match(stacktrace) and | 381 (not rule.stackfunction_pattern or |
| 382 rule.stackfunction_pattern.match(stackfunction)) and |
| 383 (not rule.stacksourcefile_pattern or |
| 384 rule.stacksourcefile_pattern.match(stacksourcefile)) and |
361 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): | 385 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): |
362 bucket.component_cache = rule.name | 386 bucket.component_cache = rule.name |
363 return rule.name | 387 return rule.name |
364 | 388 |
365 assert False | 389 assert False |
366 | 390 |
367 @staticmethod | 391 @staticmethod |
368 def load(filename, filetype): | 392 def load(filename, filetype): |
369 """Loads a policy file of |filename| in a |format|. | 393 """Loads a policy file of |filename| in a |format|. |
370 | 394 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
407 Args: | 431 Args: |
408 policy_f: A File/IO object to read. | 432 policy_f: A File/IO object to read. |
409 | 433 |
410 Returns: | 434 Returns: |
411 A loaded policy object. | 435 A loaded policy object. |
412 """ | 436 """ |
413 policy = json.load(policy_f) | 437 policy = json.load(policy_f) |
414 | 438 |
415 rules = [] | 439 rules = [] |
416 for rule in policy['rules']: | 440 for rule in policy['rules']: |
| 441 stackfunction = rule.get('stackfunction') or rule.get('stacktrace') |
| 442 stacksourcefile = rule.get('stacksourcefile') |
417 rules.append(Rule( | 443 rules.append(Rule( |
418 rule['name'], | 444 rule['name'], |
419 rule['allocator'] == 'mmap', | 445 rule['allocator'] == 'mmap', |
420 rule['stacktrace'], | 446 stackfunction, |
| 447 stacksourcefile, |
421 rule['typeinfo'] if 'typeinfo' in rule else None)) | 448 rule['typeinfo'] if 'typeinfo' in rule else None)) |
| 449 |
422 return Policy(rules, policy['version'], policy['components']) | 450 return Policy(rules, policy['version'], policy['components']) |
423 | 451 |
424 | 452 |
425 class PolicySet(object): | 453 class PolicySet(object): |
426 """Represents a set of policies.""" | 454 """Represents a set of policies.""" |
427 | 455 |
428 def __init__(self, policy_directory): | 456 def __init__(self, policy_directory): |
429 self._policy_directory = policy_directory | 457 self._policy_directory = policy_directory |
430 | 458 |
431 @staticmethod | 459 @staticmethod |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
486 | 514 |
487 class Bucket(object): | 515 class Bucket(object): |
488 """Represents a bucket, which is a unit of memory block classification.""" | 516 """Represents a bucket, which is a unit of memory block classification.""" |
489 | 517 |
490 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name): | 518 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name): |
491 self._stacktrace = stacktrace | 519 self._stacktrace = stacktrace |
492 self._mmap = mmap | 520 self._mmap = mmap |
493 self._typeinfo = typeinfo | 521 self._typeinfo = typeinfo |
494 self._typeinfo_name = typeinfo_name | 522 self._typeinfo_name = typeinfo_name |
495 | 523 |
496 self._symbolized_stacktrace = stacktrace | 524 self._symbolized_stackfunction = stacktrace |
497 self._symbolized_joined_stacktrace = '' | 525 self._symbolized_joined_stackfunction = '' |
| 526 self._symbolized_stacksourcefile = stacktrace |
| 527 self._symbolized_joined_stacksourcefile = '' |
498 self._symbolized_typeinfo = typeinfo_name | 528 self._symbolized_typeinfo = typeinfo_name |
499 | 529 |
500 self.component_cache = '' | 530 self.component_cache = '' |
501 | 531 |
502 def symbolize(self, symbol_mapping_cache): | 532 def symbolize(self, symbol_mapping_cache): |
503 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. | 533 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. |
504 | 534 |
505 Args: | 535 Args: |
506 symbol_mapping_cache: A SymbolMappingCache object. | 536 symbol_mapping_cache: A SymbolMappingCache object. |
507 """ | 537 """ |
508 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | 538 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. |
509 self._symbolized_stacktrace = [ | 539 self._symbolized_stackfunction = [ |
510 symbol_mapping_cache.lookup(FUNCTION_ADDRESS, address) | 540 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) |
511 for address in self._stacktrace] | 541 for address in self._stacktrace] |
512 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) | 542 self._symbolized_joined_stackfunction = ' '.join( |
| 543 self._symbolized_stackfunction) |
| 544 self._symbolized_stacksourcefile = [ |
| 545 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) |
| 546 for address in self._stacktrace] |
| 547 self._symbolized_joined_stacksourcefile = ' '.join( |
| 548 self._symbolized_stacksourcefile) |
513 if not self._typeinfo: | 549 if not self._typeinfo: |
514 self._symbolized_typeinfo = 'no typeinfo' | 550 self._symbolized_typeinfo = 'no typeinfo' |
515 else: | 551 else: |
516 self._symbolized_typeinfo = symbol_mapping_cache.lookup( | 552 self._symbolized_typeinfo = symbol_mapping_cache.lookup( |
517 TYPEINFO_ADDRESS, self._typeinfo) | 553 TYPEINFO_SYMBOLS, self._typeinfo) |
518 if not self._symbolized_typeinfo: | 554 if not self._symbolized_typeinfo: |
519 self._symbolized_typeinfo = 'no typeinfo' | 555 self._symbolized_typeinfo = 'no typeinfo' |
520 | 556 |
521 def clear_component_cache(self): | 557 def clear_component_cache(self): |
522 self.component_cache = '' | 558 self.component_cache = '' |
523 | 559 |
524 @property | 560 @property |
525 def stacktrace(self): | 561 def stacktrace(self): |
526 return self._stacktrace | 562 return self._stacktrace |
527 | 563 |
528 @property | 564 @property |
529 def mmap(self): | 565 def mmap(self): |
530 return self._mmap | 566 return self._mmap |
531 | 567 |
532 @property | 568 @property |
533 def typeinfo(self): | 569 def typeinfo(self): |
534 return self._typeinfo | 570 return self._typeinfo |
535 | 571 |
536 @property | 572 @property |
537 def typeinfo_name(self): | 573 def typeinfo_name(self): |
538 return self._typeinfo_name | 574 return self._typeinfo_name |
539 | 575 |
540 @property | 576 @property |
541 def symbolized_stacktrace(self): | 577 def symbolized_stackfunction(self): |
542 return self._symbolized_stacktrace | 578 return self._symbolized_stackfunction |
543 | 579 |
544 @property | 580 @property |
545 def symbolized_joined_stacktrace(self): | 581 def symbolized_joined_stackfunction(self): |
546 return self._symbolized_joined_stacktrace | 582 return self._symbolized_joined_stackfunction |
| 583 |
| 584 @property |
| 585 def symbolized_stacksourcefile(self): |
| 586 return self._symbolized_stacksourcefile |
| 587 |
| 588 @property |
| 589 def symbolized_joined_stacksourcefile(self): |
| 590 return self._symbolized_joined_stacksourcefile |
547 | 591 |
548 @property | 592 @property |
549 def symbolized_typeinfo(self): | 593 def symbolized_typeinfo(self): |
550 return self._symbolized_typeinfo | 594 return self._symbolized_typeinfo |
551 | 595 |
552 | 596 |
553 class BucketSet(object): | 597 class BucketSet(object): |
554 """Represents a set of bucket.""" | 598 """Represents a set of bucket.""" |
555 def __init__(self): | 599 def __init__(self): |
556 self._buckets = {} | 600 self._buckets = {} |
557 self._addresses = { | 601 self._code_addresses = set() |
558 FUNCTION_ADDRESS: set(), | 602 self._typeinfo_addresses = set() |
559 TYPEINFO_ADDRESS: set(), | |
560 } | |
561 | 603 |
562 def load(self, prefix): | 604 def load(self, prefix): |
563 """Loads all related bucket files. | 605 """Loads all related bucket files. |
564 | 606 |
565 Args: | 607 Args: |
566 prefix: A prefix string for bucket file names. | 608 prefix: A prefix string for bucket file names. |
567 """ | 609 """ |
568 LOGGER.info('Loading bucket files.') | 610 LOGGER.info('Loading bucket files.') |
569 | 611 |
570 n = 0 | 612 n = 0 |
(...skipping 13 matching lines...) Expand all Loading... |
584 for line in bucket_f: | 626 for line in bucket_f: |
585 words = line.split() | 627 words = line.split() |
586 typeinfo = None | 628 typeinfo = None |
587 typeinfo_name = '' | 629 typeinfo_name = '' |
588 stacktrace_begin = 2 | 630 stacktrace_begin = 2 |
589 for index, word in enumerate(words): | 631 for index, word in enumerate(words): |
590 if index < 2: | 632 if index < 2: |
591 continue | 633 continue |
592 if word[0] == 't': | 634 if word[0] == 't': |
593 typeinfo = int(word[1:], 16) | 635 typeinfo = int(word[1:], 16) |
594 self._addresses[TYPEINFO_ADDRESS].add(typeinfo) | 636 self._typeinfo_addresses.add(typeinfo) |
595 elif word[0] == 'n': | 637 elif word[0] == 'n': |
596 typeinfo_name = word[1:] | 638 typeinfo_name = word[1:] |
597 else: | 639 else: |
598 stacktrace_begin = index | 640 stacktrace_begin = index |
599 break | 641 break |
600 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] | 642 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] |
601 for frame in stacktrace: | 643 for frame in stacktrace: |
602 self._addresses[FUNCTION_ADDRESS].add(frame) | 644 self._code_addresses.add(frame) |
603 self._buckets[int(words[0])] = Bucket( | 645 self._buckets[int(words[0])] = Bucket( |
604 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name) | 646 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name) |
605 | 647 |
606 def __iter__(self): | 648 def __iter__(self): |
607 for bucket_id, bucket_content in self._buckets.iteritems(): | 649 for bucket_id, bucket_content in self._buckets.iteritems(): |
608 yield bucket_id, bucket_content | 650 yield bucket_id, bucket_content |
609 | 651 |
610 def __getitem__(self, bucket_id): | 652 def __getitem__(self, bucket_id): |
611 return self._buckets[bucket_id] | 653 return self._buckets[bucket_id] |
612 | 654 |
613 def get(self, bucket_id): | 655 def get(self, bucket_id): |
614 return self._buckets.get(bucket_id) | 656 return self._buckets.get(bucket_id) |
615 | 657 |
616 def symbolize(self, symbol_mapping_cache): | 658 def symbolize(self, symbol_mapping_cache): |
617 for bucket_content in self._buckets.itervalues(): | 659 for bucket_content in self._buckets.itervalues(): |
618 bucket_content.symbolize(symbol_mapping_cache) | 660 bucket_content.symbolize(symbol_mapping_cache) |
619 | 661 |
620 def clear_component_cache(self): | 662 def clear_component_cache(self): |
621 for bucket_content in self._buckets.itervalues(): | 663 for bucket_content in self._buckets.itervalues(): |
622 bucket_content.clear_component_cache() | 664 bucket_content.clear_component_cache() |
623 | 665 |
624 def iter_addresses(self, address_type): | 666 def iter_addresses(self, symbol_type): |
625 for function in self._addresses[address_type]: | 667 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: |
626 yield function | 668 for function in self._code_addresses: |
| 669 yield function |
| 670 else: |
| 671 for function in self._typeinfo_addresses: |
| 672 yield function |
627 | 673 |
628 | 674 |
629 class Dump(object): | 675 class Dump(object): |
630 """Represents a heap profile dump.""" | 676 """Represents a heap profile dump.""" |
631 | 677 |
632 def __init__(self, path, time): | 678 def __init__(self, path, time): |
633 self._path = path | 679 self._path = path |
634 self._time = time | 680 self._time = time |
635 self._stacktrace_lines = [] | 681 self._stacktrace_lines = [] |
636 self._global_stats = {} # used only in apply_policy | 682 self._global_stats = {} # used only in apply_policy |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
833 prefix = Command._find_prefix(dump_path) | 879 prefix = Command._find_prefix(dump_path) |
834 symbol_data_sources = SymbolDataSources(prefix) | 880 symbol_data_sources = SymbolDataSources(prefix) |
835 symbol_data_sources.prepare() | 881 symbol_data_sources.prepare() |
836 bucket_set = BucketSet() | 882 bucket_set = BucketSet() |
837 bucket_set.load(prefix) | 883 bucket_set.load(prefix) |
838 if multiple: | 884 if multiple: |
839 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | 885 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) |
840 else: | 886 else: |
841 dump = Dump.load(dump_path) | 887 dump = Dump.load(dump_path) |
842 symbol_mapping_cache = SymbolMappingCache() | 888 symbol_mapping_cache = SymbolMappingCache() |
843 with open(prefix + '.funcsym', 'a+') as cache_f: | 889 with open(prefix + '.cache.function', 'a+') as cache_f: |
844 symbol_mapping_cache.update( | 890 symbol_mapping_cache.update( |
845 FUNCTION_ADDRESS, bucket_set, | 891 FUNCTION_SYMBOLS, bucket_set, |
846 SymbolFinder(FUNCTION_ADDRESS, symbol_data_sources), cache_f) | 892 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) |
847 with open(prefix + '.typesym', 'a+') as cache_f: | 893 with open(prefix + '.cache.typeinfo', 'a+') as cache_f: |
848 symbol_mapping_cache.update( | 894 symbol_mapping_cache.update( |
849 TYPEINFO_ADDRESS, bucket_set, | 895 TYPEINFO_SYMBOLS, bucket_set, |
850 SymbolFinder(TYPEINFO_ADDRESS, symbol_data_sources), cache_f) | 896 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) |
| 897 with open(prefix + '.cache.sourcefile', 'a+') as cache_f: |
| 898 symbol_mapping_cache.update( |
| 899 SOURCEFILE_SYMBOLS, bucket_set, |
| 900 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) |
851 bucket_set.symbolize(symbol_mapping_cache) | 901 bucket_set.symbolize(symbol_mapping_cache) |
852 if multiple: | 902 if multiple: |
853 return (bucket_set, dump_list) | 903 return (bucket_set, dump_list) |
854 else: | 904 else: |
855 return (bucket_set, dump) | 905 return (bucket_set, dump) |
856 | 906 |
857 @staticmethod | 907 @staticmethod |
858 def _find_prefix(path): | 908 def _find_prefix(path): |
859 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | 909 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) |
860 | 910 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
929 bucket_set: A BucketSet object. | 979 bucket_set: A BucketSet object. |
930 out: A file object to output. | 980 out: A file object to output. |
931 """ | 981 """ |
932 for line in dump.iter_stacktrace: | 982 for line in dump.iter_stacktrace: |
933 words = line.split() | 983 words = line.split() |
934 bucket = bucket_set.get(int(words[BUCKET_ID])) | 984 bucket = bucket_set.get(int(words[BUCKET_ID])) |
935 if not bucket: | 985 if not bucket: |
936 continue | 986 continue |
937 for i in range(0, BUCKET_ID - 1): | 987 for i in range(0, BUCKET_ID - 1): |
938 out.write(words[i] + ' ') | 988 out.write(words[i] + ' ') |
939 for frame in bucket.symbolized_stacktrace: | 989 for frame in bucket.symbolized_stackfunction: |
940 out.write(frame + ' ') | 990 out.write(frame + ' ') |
941 out.write('\n') | 991 out.write('\n') |
942 | 992 |
943 | 993 |
944 class PolicyCommands(Command): | 994 class PolicyCommands(Command): |
945 def __init__(self, command): | 995 def __init__(self, command): |
946 super(PolicyCommands, self).__init__( | 996 super(PolicyCommands, self).__init__( |
947 'Usage: %%prog %s [-p POLICY] <first-dump>' % command) | 997 'Usage: %%prog %s [-p POLICY] <first-dump>' % command) |
948 self._parser.add_option('-p', '--policy', type='string', dest='policy', | 998 self._parser.add_option('-p', '--policy', type='string', dest='policy', |
949 help='profile with POLICY', metavar='POLICY') | 999 help='profile with POLICY', metavar='POLICY') |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1114 json_base['policies'][label] = { | 1164 json_base['policies'][label] = { |
1115 'legends': policy_set[label].components, | 1165 'legends': policy_set[label].components, |
1116 'snapshots': [], | 1166 'snapshots': [], |
1117 } | 1167 } |
1118 | 1168 |
1119 LOGGER.info('Applying a policy %s to...' % label) | 1169 LOGGER.info('Applying a policy %s to...' % label) |
1120 for dump in dumps: | 1170 for dump in dumps: |
1121 component_sizes = PolicyCommands._apply_policy( | 1171 component_sizes = PolicyCommands._apply_policy( |
1122 dump, policy_set[label], bucket_set, dumps[0].time) | 1172 dump, policy_set[label], bucket_set, dumps[0].time) |
1123 component_sizes['dump_path'] = dump.path | 1173 component_sizes['dump_path'] = dump.path |
1124 component_sizes['dump_time'] = datetime.fromtimestamp( | 1174 component_sizes['dump_time'] = datetime.datetime.fromtimestamp( |
1125 dump.time).strftime('%Y-%m-%d %H:%M:%S') | 1175 dump.time).strftime('%Y-%m-%d %H:%M:%S') |
1126 json_base['policies'][label]['snapshots'].append(component_sizes) | 1176 json_base['policies'][label]['snapshots'].append(component_sizes) |
1127 | 1177 |
1128 bucket_set.clear_component_cache() | 1178 bucket_set.clear_component_cache() |
1129 | 1179 |
1130 json.dump(json_base, out, indent=2, sort_keys=True) | 1180 json.dump(json_base, out, indent=2, sort_keys=True) |
1131 | 1181 |
1132 return 0 | 1182 return 0 |
1133 | 1183 |
1134 | 1184 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1190 out: An IO object to output. | 1240 out: An IO object to output. |
1191 """ | 1241 """ |
1192 sizes = {} | 1242 sizes = {} |
1193 | 1243 |
1194 ExpandCommand._accumulate( | 1244 ExpandCommand._accumulate( |
1195 dump, policy, bucket_set, component_name, depth, sizes) | 1245 dump, policy, bucket_set, component_name, depth, sizes) |
1196 | 1246 |
1197 sorted_sizes_list = sorted( | 1247 sorted_sizes_list = sorted( |
1198 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | 1248 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) |
1199 total = 0 | 1249 total = 0 |
| 1250 # TODO(dmikurube): Better formatting. |
1200 for size_pair in sorted_sizes_list: | 1251 for size_pair in sorted_sizes_list: |
1201 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) | 1252 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) |
1202 total += size_pair[1] | 1253 total += size_pair[1] |
1203 LOGGER.info('total: %d\n' % total) | 1254 LOGGER.info('total: %d\n' % total) |
1204 | 1255 |
1205 @staticmethod | 1256 @staticmethod |
1206 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): | 1257 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): |
1207 for line in dump.iter_stacktrace: | 1258 for line in dump.iter_stacktrace: |
1208 words = line.split() | 1259 words = line.split() |
1209 bucket = bucket_set.get(int(words[BUCKET_ID])) | 1260 bucket = bucket_set.get(int(words[BUCKET_ID])) |
1210 component_match = policy.find(bucket) | 1261 component_match = policy.find(bucket) |
1211 if component_match == component_name: | 1262 if component_match == component_name: |
1212 stacktrace_sequence = '' | 1263 stacktrace_sequence = '' |
1213 if bucket.typeinfo: | 1264 if bucket.typeinfo: |
1214 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo | 1265 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo |
1215 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name | 1266 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name |
1216 for stack in bucket.symbolized_stacktrace[ | 1267 for function, sourcefile in zip( |
1217 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]: | 1268 bucket.symbolized_stackfunction[ |
1218 stacktrace_sequence += stack + ' ' | 1269 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], |
| 1270 bucket.symbolized_stacksourcefile[ |
| 1271 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): |
| 1272 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) |
1219 if not stacktrace_sequence in sizes: | 1273 if not stacktrace_sequence in sizes: |
1220 sizes[stacktrace_sequence] = 0 | 1274 sizes[stacktrace_sequence] = 0 |
1221 sizes[stacktrace_sequence] += int(words[COMMITTED]) | 1275 sizes[stacktrace_sequence] += int(words[COMMITTED]) |
1222 | 1276 |
1223 | 1277 |
1224 class PProfCommand(Command): | 1278 class PProfCommand(Command): |
1225 def __init__(self): | 1279 def __init__(self): |
1226 super(PProfCommand, self).__init__( | 1280 super(PProfCommand, self).__init__( |
1227 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | 1281 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') |
1228 self._parser.add_option('-c', '--component', type='string', | 1282 self._parser.add_option('-c', '--component', type='string', |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1439 errorcode = COMMANDS[action]().do(sys.argv) | 1493 errorcode = COMMANDS[action]().do(sys.argv) |
1440 except ParsingException, e: | 1494 except ParsingException, e: |
1441 errorcode = 1 | 1495 errorcode = 1 |
1442 sys.stderr.write('Exit by parsing error: %s\n' % e) | 1496 sys.stderr.write('Exit by parsing error: %s\n' % e) |
1443 | 1497 |
1444 return errorcode | 1498 return errorcode |
1445 | 1499 |
1446 | 1500 |
1447 if __name__ == '__main__': | 1501 if __name__ == '__main__': |
1448 sys.exit(main()) | 1502 sys.exit(main()) |
OLD | NEW |