OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env bash |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """The deep heap profiler script for Chrome.""" | 6 # Re-direct the arguments to dmprof.py. |
7 | 7 |
8 from datetime import datetime | 8 BASEDIR=`dirname $0` |
9 import json | 9 ARGV="$@" |
10 import logging | |
11 import optparse | |
12 import os | |
13 import re | |
14 import shutil | |
15 import subprocess | |
16 import sys | |
17 import tempfile | |
18 | 10 |
19 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | 11 PYTHONPATH=$BASEDIR/../python/google python \ |
20 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | 12 "$BASEDIR/dmprof.py" $ARGV |
21 BASE_PATH, os.pardir, 'find_runtime_symbols') | |
22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | |
23 | |
24 from find_runtime_symbols import find_runtime_symbols_list | |
25 from find_runtime_symbols import find_runtime_typeinfo_symbols_list | |
26 from find_runtime_symbols import RuntimeSymbolsInProcess | |
27 from prepare_symbol_info import prepare_symbol_info | |
28 | |
29 BUCKET_ID = 5 | |
30 VIRTUAL = 0 | |
31 COMMITTED = 1 | |
32 ALLOC_COUNT = 2 | |
33 FREE_COUNT = 3 | |
34 NULL_REGEX = re.compile('') | |
35 | |
36 LOGGER = logging.getLogger('dmprof') | |
37 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') | |
38 FUNCTION_ADDRESS = 'function' | |
39 TYPEINFO_ADDRESS = 'typeinfo' | |
40 | |
41 | |
42 # Heap Profile Dump versions | |
43 | |
44 # DUMP_DEEP_[1-4] are obsolete. | |
45 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | |
46 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | |
47 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | |
48 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | |
49 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | |
50 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
51 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
52 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
53 DUMP_DEEP_4 = 'DUMP_DEEP_4' | |
54 | |
55 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) | |
56 | |
57 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. | |
58 # malloc and mmap are identified in bucket files. | |
59 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. | |
60 DUMP_DEEP_5 = 'DUMP_DEEP_5' | |
61 | |
62 | |
63 # Heap Profile Policy versions | |
64 | |
65 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | |
66 # mmap regions are distincted w/ mmap frames in the pattern column. | |
67 POLICY_DEEP_1 = 'POLICY_DEEP_1' | |
68 | |
69 # POLICY_DEEP_2 DOES include allocation_type columns. | |
70 # mmap regions are distincted w/ the allocation_type column. | |
71 POLICY_DEEP_2 = 'POLICY_DEEP_2' | |
72 | |
73 # POLICY_DEEP_3 is in JSON format. | |
74 POLICY_DEEP_3 = 'POLICY_DEEP_3' | |
75 | |
76 # POLICY_DEEP_3 contains typeinfo. | |
77 POLICY_DEEP_4 = 'POLICY_DEEP_4' | |
78 | |
79 | |
80 class EmptyDumpException(Exception): | |
81 def __init__(self, value): | |
82 self.value = value | |
83 def __str__(self): | |
84 return repr(self.value) | |
85 | |
86 | |
87 class ParsingException(Exception): | |
88 def __init__(self, value): | |
89 self.value = value | |
90 def __str__(self): | |
91 return repr(self.value) | |
92 | |
93 | |
94 class InvalidDumpException(ParsingException): | |
95 def __init__(self, value): | |
96 self.value = value | |
97 def __str__(self): | |
98 return "invalid heap profile dump: %s" % repr(self.value) | |
99 | |
100 | |
101 class ObsoleteDumpVersionException(ParsingException): | |
102 def __init__(self, value): | |
103 self.value = value | |
104 def __str__(self): | |
105 return "obsolete heap profile dump version: %s" % repr(self.value) | |
106 | |
107 | |
108 def skip_while(index, max_index, skipping_condition): | |
109 """Increments |index| until |skipping_condition|(|index|) is False. | |
110 | |
111 Returns: | |
112 A pair of an integer indicating a line number after skipped, and a | |
113 boolean value which is True if found a line which skipping_condition | |
114 is False for. | |
115 """ | |
116 while skipping_condition(index): | |
117 index += 1 | |
118 if index >= max_index: | |
119 return index, False | |
120 return index, True | |
121 | |
122 | |
123 class SymbolMapping(object): | |
124 """Manages all symbol information on process memory mapping. | |
125 | |
126 The symbol information consists of all symbols in the binary files obtained | |
127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps, | |
128 nm and so on. It is minimum requisite information to run dmprof. | |
129 | |
130 The information is prepared in a directory "|prefix|.symmap" by prepare(). | |
131 The directory is more portable than Chromium binaries. Users can save it | |
132 and re-analyze with the portable one. | |
133 | |
134 Note that loading the symbol information takes a long time. It is very big | |
135 in general -- it doesn't know which functions are called and which types are | |
136 used actually. Used symbols can be cached in the "SymbolCache" class. | |
137 """ | |
138 def __init__(self, prefix): | |
139 self._prefix = prefix | |
140 self._prepared_symbol_mapping_path = None | |
141 self._loaded_symbol_mapping = None | |
142 | |
143 def prepare(self): | |
144 """Extracts symbol mapping from binaries and prepares it to use. | |
145 | |
146 The symbol mapping is stored in a directory whose name is stored in | |
147 |self._prepared_symbol_mapping_path|. | |
148 | |
149 Returns: | |
150 True if succeeded. | |
151 """ | |
152 LOGGER.info('Preparing symbol mapping...') | |
153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info( | |
154 self._prefix + '.maps', self._prefix + '.symmap', True) | |
155 if self._prepared_symbol_mapping_path: | |
156 LOGGER.info(' Prepared symbol mapping.') | |
157 if used_tempdir: | |
158 LOGGER.warn(' Using a temporary directory for symbol mapping.') | |
159 LOGGER.warn(' Delete it by yourself.') | |
160 LOGGER.warn(' Or, move the directory by yourself to use it later.') | |
161 return True | |
162 else: | |
163 LOGGER.warn(' Failed to prepare symbol mapping.') | |
164 return False | |
165 | |
166 def get(self): | |
167 """Returns symbol mapping. | |
168 | |
169 Returns: | |
170 Loaded symbol mapping. None if failed. | |
171 """ | |
172 if not self._prepared_symbol_mapping_path and not self.prepare(): | |
173 return None | |
174 if not self._loaded_symbol_mapping: | |
175 LOGGER.info('Loading symbol mapping...') | |
176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load( | |
177 self._prepared_symbol_mapping_path) | |
178 return self._loaded_symbol_mapping | |
179 | |
180 | |
181 class SymbolCache(object): | |
182 """Manages cache of used symbol mapping. | |
183 | |
184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for | |
185 examples), and "SymbolCache" just caches "how dmprof interprets the address" | |
186 to speed-up another analysis for the same binary and profile dumps. | |
187 Handling all symbol mapping takes a long time in "SymbolMapping". | |
188 "SymbolCache" caches used symbol mapping on memory and in files. | |
189 """ | |
190 def __init__(self, prefix): | |
191 self._prefix = prefix | |
192 self._symbol_cache_paths = { | |
193 FUNCTION_ADDRESS: prefix + '.funcsym', | |
194 TYPEINFO_ADDRESS: prefix + '.typesym', | |
195 } | |
196 self._find_runtime_symbols_functions = { | |
197 FUNCTION_ADDRESS: find_runtime_symbols_list, | |
198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list, | |
199 } | |
200 self._symbol_caches = { | |
201 FUNCTION_ADDRESS: {}, | |
202 TYPEINFO_ADDRESS: {}, | |
203 } | |
204 | |
205 def update(self, address_type, bucket_set, symbol_mapping): | |
206 """Updates symbol mapping on memory and in a ".*sym" cache file. | |
207 | |
208 It reads cached symbol mapping from a ".*sym" file if it exists. Then, | |
209 it looks up unresolved addresses from a given "SymbolMapping". Finally, | |
210 both symbol mappings on memory and in the ".*sym" cache file are updated. | |
211 | |
212 Symbol files are formatted as follows: | |
213 <Address> <Symbol> | |
214 <Address> <Symbol> | |
215 <Address> <Symbol> | |
216 ... | |
217 | |
218 Args: | |
219 address_type: A type of addresses to update. It should be one of | |
220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | |
221 bucket_set: A BucketSet object. | |
222 symbol_mapping: A SymbolMapping object. | |
223 """ | |
224 self._load(address_type) | |
225 | |
226 unresolved_addresses = sorted( | |
227 address for address in bucket_set.iter_addresses(address_type) | |
228 if address not in self._symbol_caches[address_type]) | |
229 | |
230 if not unresolved_addresses: | |
231 LOGGER.info('No need to resolve any more addresses.') | |
232 return | |
233 | |
234 symbol_cache_path = self._symbol_cache_paths[address_type] | |
235 with open(symbol_cache_path, mode='a+') as symbol_f: | |
236 LOGGER.info('Loading %d unresolved addresses.' % | |
237 len(unresolved_addresses)) | |
238 symbol_list = self._find_runtime_symbols_functions[address_type]( | |
239 symbol_mapping.get(), unresolved_addresses) | |
240 | |
241 for address, symbol in zip(unresolved_addresses, symbol_list): | |
242 stripped_symbol = symbol.strip() or '??' | |
243 self._symbol_caches[address_type][address] = stripped_symbol | |
244 symbol_f.write('%x %s\n' % (address, stripped_symbol)) | |
245 | |
246 def lookup(self, address_type, address): | |
247 """Looks up a symbol for a given |address|. | |
248 | |
249 Args: | |
250 address_type: A type of addresses to lookup. It should be one of | |
251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS. | |
252 address: An integer that represents an address. | |
253 | |
254 Returns: | |
255 A string that represents a symbol. | |
256 """ | |
257 return self._symbol_caches[address_type].get(address) | |
258 | |
259 def _load(self, address_type): | |
260 symbol_cache_path = self._symbol_cache_paths[address_type] | |
261 try: | |
262 with open(symbol_cache_path, mode='r') as symbol_f: | |
263 for line in symbol_f: | |
264 items = line.rstrip().split(None, 1) | |
265 if len(items) == 1: | |
266 items.append('??') | |
267 self._symbol_caches[address_type][int(items[0], 16)] = items[1] | |
268 LOGGER.info('Loaded %d entries from symbol cache.' % | |
269 len(self._symbol_caches[address_type])) | |
270 except IOError as e: | |
271 LOGGER.info('No valid symbol cache file is found.') | |
272 | |
273 | |
274 class Rule(object): | |
275 """Represents one matching rule in a policy file.""" | |
276 | |
277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None): | |
278 self._name = name | |
279 self._mmap = mmap | |
280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z') | |
281 if typeinfo_pattern: | |
282 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') | |
283 else: | |
284 self._typeinfo_pattern = None | |
285 | |
286 @property | |
287 def name(self): | |
288 return self._name | |
289 | |
290 @property | |
291 def mmap(self): | |
292 return self._mmap | |
293 | |
294 @property | |
295 def stacktrace_pattern(self): | |
296 return self._stacktrace_pattern | |
297 | |
298 @property | |
299 def typeinfo_pattern(self): | |
300 return self._typeinfo_pattern | |
301 | |
302 | |
303 class Policy(object): | |
304 """Represents a policy, a content of a policy file.""" | |
305 | |
306 def __init__(self, rules, version, components): | |
307 self._rules = rules | |
308 self._version = version | |
309 self._components = components | |
310 | |
311 @property | |
312 def rules(self): | |
313 return self._rules | |
314 | |
315 @property | |
316 def version(self): | |
317 return self._version | |
318 | |
319 @property | |
320 def components(self): | |
321 return self._components | |
322 | |
323 def find(self, bucket): | |
324 """Finds a matching component name which a given |bucket| belongs to. | |
325 | |
326 Args: | |
327 bucket: A Bucket object to be searched for. | |
328 | |
329 Returns: | |
330 A string representing a component name. | |
331 """ | |
332 if not bucket: | |
333 return 'no-bucket' | |
334 if bucket.component_cache: | |
335 return bucket.component_cache | |
336 | |
337 stacktrace = bucket.symbolized_joined_stacktrace | |
338 typeinfo = bucket.symbolized_typeinfo | |
339 if typeinfo.startswith('0x'): | |
340 typeinfo = bucket.typeinfo_name | |
341 | |
342 for rule in self._rules: | |
343 if (bucket.mmap == rule.mmap and | |
344 rule.stacktrace_pattern.match(stacktrace) and | |
345 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): | |
346 bucket.component_cache = rule.name | |
347 return rule.name | |
348 | |
349 assert False | |
350 | |
351 @staticmethod | |
352 def load(filename, format): | |
353 """Loads a policy file of |filename| in a |format|. | |
354 | |
355 Args: | |
356 filename: A filename to be loaded. | |
357 format: A string to specify a format of the file. Only 'json' is | |
358 supported for now. | |
359 | |
360 Returns: | |
361 A loaded Policy object. | |
362 """ | |
363 with open(os.path.join(BASE_PATH, filename)) as policy_f: | |
364 return Policy.parse(policy_f, format) | |
365 | |
366 @staticmethod | |
367 def parse(policy_f, format): | |
368 """Parses a policy file content in a |format|. | |
369 | |
370 Args: | |
371 policy_f: An IO object to be loaded. | |
372 format: A string to specify a format of the file. Only 'json' is | |
373 supported for now. | |
374 | |
375 Returns: | |
376 A loaded Policy object. | |
377 """ | |
378 if format == 'json': | |
379 return Policy._parse_json(policy_f) | |
380 else: | |
381 return None | |
382 | |
383 @staticmethod | |
384 def _parse_json(policy_f): | |
385 """Parses policy file in json format. | |
386 | |
387 A policy file contains component's names and their stacktrace pattern | |
388 written in regular expression. Those patterns are matched against each | |
389 symbols of each stacktraces in the order written in the policy file | |
390 | |
391 Args: | |
392 policy_f: A File/IO object to read. | |
393 | |
394 Returns: | |
395 A loaded policy object. | |
396 """ | |
397 policy = json.load(policy_f) | |
398 | |
399 rules = [] | |
400 for rule in policy['rules']: | |
401 rules.append(Rule( | |
402 rule['name'], | |
403 rule['allocator'] == 'mmap', | |
404 rule['stacktrace'], | |
405 rule['typeinfo'] if 'typeinfo' in rule else None)) | |
406 return Policy(rules, policy['version'], policy['components']) | |
407 | |
408 | |
409 class PolicySet(object): | |
410 """Represents a set of policies.""" | |
411 | |
412 def __init__(self, policy_directory): | |
413 self._policy_directory = policy_directory | |
414 | |
415 @staticmethod | |
416 def load(labels=None): | |
417 """Loads a set of policies via the "default policy directory". | |
418 | |
419 The "default policy directory" contains pairs of policies and their labels. | |
420 For example, a policy "policy.l0.json" is labeled "l0" in the default | |
421 policy directory "policies.json". | |
422 | |
423 All policies in the directory are loaded by default. Policies can be | |
424 limited by |labels|. | |
425 | |
426 Args: | |
427 labels: An array that contains policy labels to be loaded. | |
428 | |
429 Returns: | |
430 A PolicySet object. | |
431 """ | |
432 default_policy_directory = PolicySet._load_default_policy_directory() | |
433 if labels: | |
434 specified_policy_directory = {} | |
435 for label in labels: | |
436 if label in default_policy_directory: | |
437 specified_policy_directory[label] = default_policy_directory[label] | |
438 # TODO(dmikurube): Load an un-labeled policy file. | |
439 return PolicySet._load_policies(specified_policy_directory) | |
440 else: | |
441 return PolicySet._load_policies(default_policy_directory) | |
442 | |
443 def __len__(self): | |
444 return len(self._policy_directory) | |
445 | |
446 def __iter__(self): | |
447 for label in self._policy_directory: | |
448 yield label | |
449 | |
450 def __getitem__(self, label): | |
451 return self._policy_directory[label] | |
452 | |
453 @staticmethod | |
454 def _load_default_policy_directory(): | |
455 with open(POLICIES_JSON_PATH, mode='r') as policies_f: | |
456 default_policy_directory = json.load(policies_f) | |
457 return default_policy_directory | |
458 | |
459 @staticmethod | |
460 def _load_policies(directory): | |
461 LOGGER.info('Loading policy files.') | |
462 policies = {} | |
463 for label in directory: | |
464 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) | |
465 loaded = Policy.load(directory[label]['file'], directory[label]['format']) | |
466 if loaded: | |
467 policies[label] = loaded | |
468 return PolicySet(policies) | |
469 | |
470 | |
471 class Bucket(object): | |
472 """Represents a bucket, which is a unit of memory block classification.""" | |
473 | |
474 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name): | |
475 self._stacktrace = stacktrace | |
476 self._mmap = mmap | |
477 self._typeinfo = typeinfo | |
478 self._typeinfo_name = typeinfo_name | |
479 | |
480 self._symbolized_stacktrace = stacktrace | |
481 self._symbolized_joined_stacktrace = '' | |
482 self._symbolized_typeinfo = typeinfo_name | |
483 | |
484 self.component_cache = '' | |
485 | |
486 def symbolize(self, symbol_cache): | |
487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|. | |
488 | |
489 Args: | |
490 symbol_cache: A SymbolCache object. | |
491 """ | |
492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | |
493 self._symbolized_stacktrace = [ | |
494 symbol_cache.lookup(FUNCTION_ADDRESS, address) | |
495 for address in self._stacktrace] | |
496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace) | |
497 if not self._typeinfo: | |
498 self._symbolized_typeinfo = 'no typeinfo' | |
499 else: | |
500 self._symbolized_typeinfo = symbol_cache.lookup( | |
501 TYPEINFO_ADDRESS, self._typeinfo) | |
502 if not self._symbolized_typeinfo: | |
503 self._symbolized_typeinfo = 'no typeinfo' | |
504 | |
505 def clear_component_cache(self): | |
506 self.component_cache = '' | |
507 | |
508 @property | |
509 def stacktrace(self): | |
510 return self._stacktrace | |
511 | |
512 @property | |
513 def mmap(self): | |
514 return self._mmap | |
515 | |
516 @property | |
517 def typeinfo(self): | |
518 return self._typeinfo | |
519 | |
520 @property | |
521 def typeinfo_name(self): | |
522 return self._typeinfo_name | |
523 | |
524 @property | |
525 def symbolized_stacktrace(self): | |
526 return self._symbolized_stacktrace | |
527 | |
528 @property | |
529 def symbolized_joined_stacktrace(self): | |
530 return self._symbolized_joined_stacktrace | |
531 | |
532 @property | |
533 def symbolized_typeinfo(self): | |
534 return self._symbolized_typeinfo | |
535 | |
536 | |
537 class BucketSet(object): | |
538 """Represents a set of bucket.""" | |
539 def __init__(self): | |
540 self._buckets = {} | |
541 self._addresses = { | |
542 FUNCTION_ADDRESS: set(), | |
543 TYPEINFO_ADDRESS: set(), | |
544 } | |
545 | |
546 @staticmethod | |
547 def load(prefix): | |
548 """Loads all related bucket files. | |
549 | |
550 Args: | |
551 prefix: A prefix string for bucket file names. | |
552 | |
553 Returns: | |
554 A loaded BucketSet object. | |
555 """ | |
556 LOGGER.info('Loading bucket files.') | |
557 bucket_set = BucketSet() | |
558 | |
559 n = 0 | |
560 while True: | |
561 path = '%s.%04d.buckets' % (prefix, n) | |
562 if not os.path.exists(path): | |
563 if n > 10: | |
564 break | |
565 n += 1 | |
566 continue | |
567 LOGGER.info(' %s' % path) | |
568 with open(path, 'r') as f: | |
569 bucket_set._load_file(f) | |
570 n += 1 | |
571 | |
572 return bucket_set | |
573 | |
574 def _load_file(self, bucket_f): | |
575 for line in bucket_f: | |
576 words = line.split() | |
577 typeinfo = None | |
578 typeinfo_name = '' | |
579 stacktrace_begin = 2 | |
580 for index, word in enumerate(words): | |
581 if index < 2: | |
582 continue | |
583 if word[0] == 't': | |
584 typeinfo = int(word[1:], 16) | |
585 self._addresses[TYPEINFO_ADDRESS].add(typeinfo) | |
586 elif word[0] == 'n': | |
587 typeinfo_name = word[1:] | |
588 else: | |
589 stacktrace_begin = index | |
590 break | |
591 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] | |
592 for frame in stacktrace: | |
593 self._addresses[FUNCTION_ADDRESS].add(frame) | |
594 self._buckets[int(words[0])] = Bucket( | |
595 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name) | |
596 | |
597 def __iter__(self): | |
598 for bucket_id, bucket_content in self._buckets.iteritems(): | |
599 yield bucket_id, bucket_content | |
600 | |
601 def __getitem__(self, bucket_id): | |
602 return self._buckets[bucket_id] | |
603 | |
604 def get(self, bucket_id): | |
605 return self._buckets.get(bucket_id) | |
606 | |
607 def symbolize(self, symbol_cache): | |
608 for bucket_content in self._buckets.itervalues(): | |
609 bucket_content.symbolize(symbol_cache) | |
610 | |
611 def clear_component_cache(self): | |
612 for bucket_content in self._buckets.itervalues(): | |
613 bucket_content.clear_component_cache() | |
614 | |
615 def iter_addresses(self, address_type): | |
616 for function in self._addresses[address_type]: | |
617 yield function | |
618 | |
619 | |
620 class Dump(object): | |
621 """Represents a heap profile dump.""" | |
622 | |
623 def __init__(self): | |
624 self._path = '' | |
625 self._time = None | |
626 self._stacktrace_lines = [] | |
627 self._global_stats = {} # used only in apply_policy | |
628 | |
629 self._version = '' | |
630 self._lines = [] | |
631 | |
632 @property | |
633 def path(self): | |
634 return self._path | |
635 | |
636 @property | |
637 def time(self): | |
638 return self._time | |
639 | |
640 @property | |
641 def iter_stacktrace(self): | |
642 for line in self._stacktrace_lines: | |
643 yield line | |
644 | |
645 def global_stat(self, name): | |
646 return self._global_stats[name] | |
647 | |
648 @staticmethod | |
649 def load(path, log_header='Loading a heap profile dump: '): | |
650 """Loads a heap profile dump. | |
651 | |
652 Args: | |
653 path: A file path string to load. | |
654 log_header: A preceding string for log messages. | |
655 | |
656 Returns: | |
657 A loaded Dump object. | |
658 | |
659 Raises: | |
660 ParsingException for invalid heap profile dumps. | |
661 """ | |
662 dump = Dump() | |
663 dump._path = path | |
664 dump._time = os.stat(dump._path).st_mtime | |
665 dump._version = '' | |
666 | |
667 dump._lines = [line for line in open(dump._path, 'r') | |
668 if line and not line.startswith('#')] | |
669 | |
670 try: | |
671 dump._version, ln = dump._parse_version() | |
672 dump._parse_global_stats() | |
673 dump._extract_stacktrace_lines(ln) | |
674 except EmptyDumpException: | |
675 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path)) | |
676 except ParsingException, e: | |
677 LOGGER.error('%s%s ...error %s' % (log_header, path, e)) | |
678 raise | |
679 else: | |
680 LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version)) | |
681 | |
682 return dump | |
683 | |
684 def _parse_version(self): | |
685 """Parses a version string in self._lines. | |
686 | |
687 Returns: | |
688 A pair of (a string representing a version of the stacktrace dump, | |
689 and an integer indicating a line number next to the version string). | |
690 | |
691 Raises: | |
692 ParsingException for invalid dump versions. | |
693 """ | |
694 version = '' | |
695 | |
696 # Skip until an identifiable line. | |
697 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
698 if not self._lines: | |
699 raise EmptyDumpException('Empty heap dump file.') | |
700 (ln, found) = skip_while( | |
701 0, len(self._lines), | |
702 lambda n: not self._lines[n].startswith(headers)) | |
703 if not found: | |
704 raise InvalidDumpException('No version header.') | |
705 | |
706 # Identify a version. | |
707 if self._lines[ln].startswith('heap profile: '): | |
708 version = self._lines[ln][13:].strip() | |
709 if version == DUMP_DEEP_5: | |
710 (ln, _) = skip_while( | |
711 ln, len(self._lines), | |
712 lambda n: self._lines[n] != 'STACKTRACES:\n') | |
713 elif version in DUMP_DEEP_OBSOLETE: | |
714 raise ObsoleteDumpVersionException(version) | |
715 else: | |
716 raise InvalidDumpException('Invalid version: %s' % version) | |
717 elif self._lines[ln] == 'STACKTRACES:\n': | |
718 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | |
719 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': | |
720 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | |
721 | |
722 return (version, ln) | |
723 | |
724 def _parse_global_stats(self): | |
725 """Parses lines in self._lines as global stats.""" | |
726 (ln, _) = skip_while( | |
727 0, len(self._lines), | |
728 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') | |
729 | |
730 global_stat_names = [ | |
731 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other', | |
732 'nonprofiled-absent', 'nonprofiled-anonymous', | |
733 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | |
734 'nonprofiled-stack', 'nonprofiled-other', | |
735 'profiled-mmap', 'profiled-malloc'] | |
736 | |
737 for prefix in global_stat_names: | |
738 (ln, _) = skip_while( | |
739 ln, len(self._lines), | |
740 lambda n: self._lines[n].split()[0] != prefix) | |
741 words = self._lines[ln].split() | |
742 self._global_stats[prefix + '_virtual'] = int(words[-2]) | |
743 self._global_stats[prefix + '_committed'] = int(words[-1]) | |
744 | |
745 def _extract_stacktrace_lines(self, line_number): | |
746 """Extracts the position of stacktrace lines. | |
747 | |
748 Valid stacktrace lines are stored into self._stacktrace_lines. | |
749 | |
750 Args: | |
751 line_number: A line number to start parsing in lines. | |
752 | |
753 Raises: | |
754 ParsingException for invalid dump versions. | |
755 """ | |
756 if self._version == DUMP_DEEP_5: | |
757 (line_number, _) = skip_while( | |
758 line_number, len(self._lines), | |
759 lambda n: not self._lines[n].split()[0].isdigit()) | |
760 stacktrace_start = line_number | |
761 (line_number, _) = skip_while( | |
762 line_number, len(self._lines), | |
763 lambda n: self._check_stacktrace_line(self._lines[n])) | |
764 self._stacktrace_lines = self._lines[stacktrace_start:line_number] | |
765 | |
766 elif self._version in DUMP_DEEP_OBSOLETE: | |
767 raise ObsoleteDumpVersionException(self._version) | |
768 | |
769 else: | |
770 raise InvalidDumpException('Invalid version: %s' % self._version) | |
771 | |
772 @staticmethod | |
773 def _check_stacktrace_line(stacktrace_line): | |
774 """Checks if a given stacktrace_line is valid as stacktrace. | |
775 | |
776 Args: | |
777 stacktrace_line: A string to be checked. | |
778 | |
779 Returns: | |
780 True if the given stacktrace_line is valid. | |
781 """ | |
782 words = stacktrace_line.split() | |
783 if len(words) < BUCKET_ID + 1: | |
784 return False | |
785 if words[BUCKET_ID - 1] != '@': | |
786 return False | |
787 return True | |
788 | |
789 | |
790 class DumpList(object): | |
791 """Represents a sequence of heap profile dumps.""" | |
792 | |
793 def __init__(self, dump_list): | |
794 self._dump_list = dump_list | |
795 | |
796 @staticmethod | |
797 def load(path_list): | |
798 LOGGER.info('Loading heap dump profiles.') | |
799 dump_list = [] | |
800 for path in path_list: | |
801 dump_list.append(Dump.load(path, ' ')) | |
802 return DumpList(dump_list) | |
803 | |
804 def __len__(self): | |
805 return len(self._dump_list) | |
806 | |
807 def __iter__(self): | |
808 for dump in self._dump_list: | |
809 yield dump | |
810 | |
811 def __getitem__(self, index): | |
812 return self._dump_list[index] | |
813 | |
814 | |
815 class Command(object): | |
816 """Subclasses are a subcommand for this executable. | |
817 | |
818 See COMMANDS in main(). | |
819 """ | |
820 def __init__(self, usage): | |
821 self._parser = optparse.OptionParser(usage) | |
822 | |
823 @staticmethod | |
824 def load_basic_files(dump_path, multiple): | |
825 prefix = Command._find_prefix(dump_path) | |
826 symbol_mapping = SymbolMapping(prefix) | |
827 symbol_mapping.prepare() | |
828 bucket_set = BucketSet.load(prefix) | |
829 if multiple: | |
830 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | |
831 else: | |
832 dump = Dump.load(dump_path) | |
833 symbol_cache = SymbolCache(prefix) | |
834 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping) | |
835 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping) | |
836 bucket_set.symbolize(symbol_cache) | |
837 if multiple: | |
838 return (bucket_set, dump_list) | |
839 else: | |
840 return (bucket_set, dump) | |
841 | |
842 @staticmethod | |
843 def _find_prefix(path): | |
844 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | |
845 | |
846 @staticmethod | |
847 def _find_all_dumps(dump_path): | |
848 prefix = Command._find_prefix(dump_path) | |
849 dump_path_list = [dump_path] | |
850 | |
851 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) | |
852 n += 1 | |
853 while True: | |
854 p = '%s.%04d.heap' % (prefix, n) | |
855 if os.path.exists(p): | |
856 dump_path_list.append(p) | |
857 else: | |
858 break | |
859 n += 1 | |
860 | |
861 return dump_path_list | |
862 | |
863 def _parse_args(self, sys_argv, required): | |
864 options, args = self._parser.parse_args(sys_argv) | |
865 if len(args) != required + 1: | |
866 self._parser.error('needs %d argument(s).\n' % required) | |
867 return None | |
868 return (options, args) | |
869 | |
870 def _parse_policy_list(self, options_policy): | |
871 if options_policy: | |
872 return options_policy.split(',') | |
873 else: | |
874 return None | |
875 | |
876 | |
877 class StacktraceCommand(Command): | |
878 def __init__(self): | |
879 super(StacktraceCommand, self).__init__( | |
880 'Usage: %prog stacktrace <dump>') | |
881 | |
882 def do(self, sys_argv): | |
883 options, args = self._parse_args(sys_argv, 1) | |
884 dump_path = args[1] | |
885 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
886 | |
887 StacktraceCommand._output(dump, bucket_set, sys.stdout) | |
888 return 0 | |
889 | |
890 @staticmethod | |
891 def _output(dump, bucket_set, out): | |
892 """Outputs a given stacktrace. | |
893 | |
894 Args: | |
895 bucket_set: A BucketSet object. | |
896 out: A file object to output. | |
897 """ | |
898 for line in dump.iter_stacktrace: | |
899 words = line.split() | |
900 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
901 if not bucket: | |
902 continue | |
903 for i in range(0, BUCKET_ID - 1): | |
904 out.write(words[i] + ' ') | |
905 for frame in bucket.symbolized_stacktrace: | |
906 out.write(frame + ' ') | |
907 out.write('\n') | |
908 | |
909 | |
910 class PolicyCommands(Command): | |
911 def __init__(self, command): | |
912 super(PolicyCommands, self).__init__( | |
913 'Usage: %%prog %s [-p POLICY] <first-dump>' % command) | |
914 self._parser.add_option('-p', '--policy', type='string', dest='policy', | |
915 help='profile with POLICY', metavar='POLICY') | |
916 | |
917 def _set_up(self, sys_argv): | |
918 options, args = self._parse_args(sys_argv, 1) | |
919 dump_path = args[1] | |
920 (bucket_set, dumps) = Command.load_basic_files(dump_path, True) | |
921 | |
922 policy_set = PolicySet.load(self._parse_policy_list(options.policy)) | |
923 return policy_set, dumps, bucket_set | |
924 | |
925 def _apply_policy(self, dump, policy, bucket_set, first_dump_time): | |
926 """Aggregates the total memory size of each component. | |
927 | |
928 Iterate through all stacktraces and attribute them to one of the components | |
929 based on the policy. It is important to apply policy in right order. | |
930 | |
931 Args: | |
932 dump: A Dump object. | |
933 policy: A Policy object. | |
934 bucket_set: A BucketSet object. | |
935 first_dump_time: An integer representing time when the first dump is | |
936 dumped. | |
937 | |
938 Returns: | |
939 A dict mapping components and their corresponding sizes. | |
940 """ | |
941 LOGGER.info(' %s' % dump.path) | |
942 sizes = dict((c, 0) for c in policy.components) | |
943 | |
944 PolicyCommands._accumulate(dump, policy, bucket_set, sizes) | |
945 | |
946 sizes['mmap-no-log'] = ( | |
947 dump.global_stat('profiled-mmap_committed') - | |
948 sizes['mmap-total-log']) | |
949 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') | |
950 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') | |
951 | |
952 sizes['tc-no-log'] = ( | |
953 dump.global_stat('profiled-malloc_committed') - | |
954 sizes['tc-total-log']) | |
955 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') | |
956 sizes['tc-unused'] = ( | |
957 sizes['mmap-tcmalloc'] - | |
958 dump.global_stat('profiled-malloc_committed')) | |
959 sizes['tc-total'] = sizes['mmap-tcmalloc'] | |
960 | |
961 for key, value in { | |
962 'total': 'total_committed', | |
963 'filemapped': 'file_committed', | |
964 'file-exec': 'file-exec_committed', | |
965 'file-nonexec': 'file-nonexec_committed', | |
966 'anonymous': 'anonymous_committed', | |
967 'stack': 'stack_committed', | |
968 'other': 'other_committed', | |
969 'unhooked-absent': 'nonprofiled-absent_committed', | |
970 'unhooked-anonymous': 'nonprofiled-anonymous_committed', | |
971 'unhooked-file-exec': 'nonprofiled-file-exec_committed', | |
972 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed', | |
973 'unhooked-stack': 'nonprofiled-stack_committed', | |
974 'unhooked-other': 'nonprofiled-other_committed', | |
975 'total-vm': 'total_virtual', | |
976 'filemapped-vm': 'file_virtual', | |
977 'anonymous-vm': 'anonymous_virtual', | |
978 'other-vm': 'other_virtual' }.iteritems(): | |
979 if key in sizes: | |
980 sizes[key] = dump.global_stat(value) | |
981 | |
982 if 'mustbezero' in sizes: | |
983 removed_list = ( | |
984 'profiled-mmap_committed', | |
985 'nonprofiled-absent_committed', | |
986 'nonprofiled-anonymous_committed', | |
987 'nonprofiled-file-exec_committed', | |
988 'nonprofiled-file-nonexec_committed', | |
989 'nonprofiled-stack_committed', | |
990 'nonprofiled-other_committed') | |
991 sizes['mustbezero'] = ( | |
992 dump.global_stat('total_committed') - | |
993 sum(dump.global_stat(removed) for removed in removed_list)) | |
994 if 'total-exclude-profiler' in sizes: | |
995 sizes['total-exclude-profiler'] = ( | |
996 dump.global_stat('total_committed') - | |
997 (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) | |
998 if 'hour' in sizes: | |
999 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 | |
1000 if 'minute' in sizes: | |
1001 sizes['minute'] = (dump.time - first_dump_time) / 60.0 | |
1002 if 'second' in sizes: | |
1003 sizes['second'] = dump.time - first_dump_time | |
1004 | |
1005 return sizes | |
1006 | |
1007 @staticmethod | |
1008 def _accumulate(dump, policy, bucket_set, sizes): | |
1009 for line in dump.iter_stacktrace: | |
1010 words = line.split() | |
1011 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
1012 component_match = policy.find(bucket) | |
1013 sizes[component_match] += int(words[COMMITTED]) | |
1014 | |
1015 if component_match.startswith('tc-'): | |
1016 sizes['tc-total-log'] += int(words[COMMITTED]) | |
1017 elif component_match.startswith('mmap-'): | |
1018 sizes['mmap-total-log'] += int(words[COMMITTED]) | |
1019 else: | |
1020 sizes['other-total-log'] += int(words[COMMITTED]) | |
1021 | |
1022 | |
1023 class CSVCommand(PolicyCommands): | |
1024 def __init__(self): | |
1025 super(CSVCommand, self).__init__('csv') | |
1026 | |
1027 def do(self, sys_argv): | |
1028 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
1029 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
1030 | |
1031 def _output(self, policy_set, dumps, bucket_set, out): | |
1032 max_components = 0 | |
1033 for label in policy_set: | |
1034 max_components = max(max_components, len(policy_set[label].components)) | |
1035 | |
1036 for label in sorted(policy_set): | |
1037 components = policy_set[label].components | |
1038 if len(policy_set) > 1: | |
1039 out.write('%s%s\n' % (label, ',' * (max_components - 1))) | |
1040 out.write('%s%s\n' % ( | |
1041 ','.join(components), ',' * (max_components - len(components)))) | |
1042 | |
1043 LOGGER.info('Applying a policy %s to...' % label) | |
1044 for dump in dumps: | |
1045 component_sizes = self._apply_policy( | |
1046 dump, policy_set[label], bucket_set, dumps[0].time) | |
1047 s = [] | |
1048 for c in components: | |
1049 if c in ('hour', 'minute', 'second'): | |
1050 s.append('%05.5f' % (component_sizes[c])) | |
1051 else: | |
1052 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | |
1053 out.write('%s%s\n' % ( | |
1054 ','.join(s), ',' * (max_components - len(components)))) | |
1055 | |
1056 bucket_set.clear_component_cache() | |
1057 | |
1058 return 0 | |
1059 | |
1060 | |
1061 class JSONCommand(PolicyCommands): | |
1062 def __init__(self): | |
1063 super(JSONCommand, self).__init__('json') | |
1064 | |
1065 def do(self, sys_argv): | |
1066 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
1067 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
1068 | |
1069 def _output(self, policy_set, dumps, bucket_set, out): | |
1070 json_base = { | |
1071 'version': 'JSON_DEEP_2', | |
1072 'policies': {}, | |
1073 } | |
1074 | |
1075 for label in sorted(policy_set): | |
1076 json_base['policies'][label] = { | |
1077 'legends': policy_set[label].components, | |
1078 'snapshots': [], | |
1079 } | |
1080 | |
1081 LOGGER.info('Applying a policy %s to...' % label) | |
1082 for dump in dumps: | |
1083 component_sizes = self._apply_policy( | |
1084 dump, policy_set[label], bucket_set, dumps[0].time) | |
1085 component_sizes['dump_path'] = dump.path | |
1086 component_sizes['dump_time'] = datetime.fromtimestamp( | |
1087 dump.time).strftime('%Y-%m-%d %H:%M:%S') | |
1088 json_base['policies'][label]['snapshots'].append(component_sizes) | |
1089 | |
1090 bucket_set.clear_component_cache() | |
1091 | |
1092 json.dump(json_base, out, indent=2, sort_keys=True) | |
1093 | |
1094 return 0 | |
1095 | |
1096 | |
1097 class ListCommand(PolicyCommands): | |
1098 def __init__(self): | |
1099 super(ListCommand, self).__init__('list') | |
1100 | |
1101 def do(self, sys_argv): | |
1102 policy_set, dumps, bucket_set = self._set_up(sys_argv) | |
1103 return self._output(policy_set, dumps, bucket_set, sys.stdout) | |
1104 | |
1105 def _output(self, policy_set, dumps, bucket_set, out): | |
1106 for label in sorted(policy_set): | |
1107 LOGGER.info('Applying a policy %s to...' % label) | |
1108 for dump in dumps: | |
1109 component_sizes = self._apply_policy( | |
1110 dump, policy_set[label], bucket_set, dump.time) | |
1111 out.write('%s for %s:\n' % (label, dump.path)) | |
1112 for c in policy_set[label].components: | |
1113 if c in ['hour', 'minute', 'second']: | |
1114 out.write('%40s %12.3f\n' % (c, component_sizes[c])) | |
1115 else: | |
1116 out.write('%40s %12d\n' % (c, component_sizes[c])) | |
1117 | |
1118 bucket_set.clear_component_cache() | |
1119 | |
1120 return 0 | |
1121 | |
1122 | |
1123 class ExpandCommand(Command): | |
1124 def __init__(self): | |
1125 super(ExpandCommand, self).__init__( | |
1126 'Usage: %prog expand <dump> <policy> <component> <depth>') | |
1127 | |
1128 def do(self, sys_argv): | |
1129 options, args = self._parse_args(sys_argv, 4) | |
1130 dump_path = args[1] | |
1131 target_policy = args[2] | |
1132 component_name = args[3] | |
1133 depth = args[4] | |
1134 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
1135 policy_set = PolicySet.load(self._parse_policy_list(target_policy)) | |
1136 | |
1137 self._output(dump, policy_set[target_policy], bucket_set, | |
1138 component_name, int(depth), sys.stdout) | |
1139 return 0 | |
1140 | |
1141 def _output(self, dump, policy, bucket_set, component_name, depth, out): | |
1142 """Prints all stacktraces in a given component of given depth. | |
1143 | |
1144 Args: | |
1145 dump: A Dump object. | |
1146 policy: A Policy object. | |
1147 bucket_set: A BucketSet object. | |
1148 component_name: A name of component for filtering. | |
1149 depth: An integer representing depth to be printed. | |
1150 out: An IO object to output. | |
1151 """ | |
1152 sizes = {} | |
1153 | |
1154 ExpandCommand._accumulate( | |
1155 dump, policy, bucket_set, component_name, depth, sizes) | |
1156 | |
1157 sorted_sizes_list = sorted( | |
1158 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | |
1159 total = 0 | |
1160 for size_pair in sorted_sizes_list: | |
1161 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) | |
1162 total += size_pair[1] | |
1163 LOGGER.info('total: %d\n' % total) | |
1164 | |
1165 @staticmethod | |
1166 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): | |
1167 for line in dump.iter_stacktrace: | |
1168 words = line.split() | |
1169 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
1170 component_match = policy.find(bucket) | |
1171 if component_match == component_name: | |
1172 stacktrace_sequence = '' | |
1173 if bucket.typeinfo: | |
1174 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo | |
1175 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name | |
1176 for stack in bucket.symbolized_stacktrace[ | |
1177 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]: | |
1178 stacktrace_sequence += stack + ' ' | |
1179 if not stacktrace_sequence in sizes: | |
1180 sizes[stacktrace_sequence] = 0 | |
1181 sizes[stacktrace_sequence] += int(words[COMMITTED]) | |
1182 | |
1183 | |
1184 class PProfCommand(Command): | |
1185 def __init__(self): | |
1186 super(PProfCommand, self).__init__( | |
1187 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | |
1188 self._parser.add_option('-c', '--component', type='string', | |
1189 dest='component', | |
1190 help='restrict to COMPONENT', metavar='COMPONENT') | |
1191 | |
1192 def do(self, sys_argv): | |
1193 options, args = self._parse_args(sys_argv, 2) | |
1194 | |
1195 dump_path = args[1] | |
1196 target_policy = args[2] | |
1197 component = options.component | |
1198 | |
1199 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
1200 policy_set = PolicySet.load(self._parse_policy_list(target_policy)) | |
1201 | |
1202 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: | |
1203 maps_lines = maps_f.readlines() | |
1204 PProfCommand._output( | |
1205 dump, policy_set[target_policy], bucket_set, maps_lines, component, | |
1206 sys.stdout) | |
1207 | |
1208 return 0 | |
1209 | |
1210 @staticmethod | |
1211 def _output(dump, policy, bucket_set, maps_lines, component_name, out): | |
1212 """Converts the heap profile dump so it can be processed by pprof. | |
1213 | |
1214 Args: | |
1215 dump: A Dump object. | |
1216 policy: A Policy object. | |
1217 bucket_set: A BucketSet object. | |
1218 maps_lines: A list of strings containing /proc/.../maps. | |
1219 component_name: A name of component for filtering. | |
1220 out: An IO object to output. | |
1221 """ | |
1222 out.write('heap profile: ') | |
1223 com_committed, com_allocs = PProfCommand._accumulate( | |
1224 dump, policy, bucket_set, component_name) | |
1225 | |
1226 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | |
1227 com_allocs, com_committed, com_allocs, com_committed)) | |
1228 | |
1229 PProfCommand._output_stacktrace_lines( | |
1230 dump, policy, bucket_set, component_name, out) | |
1231 | |
1232 out.write('MAPPED_LIBRARIES:\n') | |
1233 for line in maps_lines: | |
1234 out.write(line) | |
1235 | |
1236 @staticmethod | |
1237 def _accumulate(dump, policy, bucket_set, component_name): | |
1238 """Accumulates size of committed chunks and the number of allocated chunks. | |
1239 | |
1240 Args: | |
1241 dump: A Dump object. | |
1242 policy: A Policy object. | |
1243 bucket_set: A BucketSet object. | |
1244 component_name: A name of component for filtering. | |
1245 | |
1246 Returns: | |
1247 Two integers which are the accumulated size of committed regions and the | |
1248 number of allocated chunks, respectively. | |
1249 """ | |
1250 com_committed = 0 | |
1251 com_allocs = 0 | |
1252 for line in dump.iter_stacktrace: | |
1253 words = line.split() | |
1254 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
1255 if (not bucket or | |
1256 (component_name and component_name != policy.find(bucket))): | |
1257 continue | |
1258 | |
1259 com_committed += int(words[COMMITTED]) | |
1260 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | |
1261 | |
1262 return com_committed, com_allocs | |
1263 | |
1264 @staticmethod | |
1265 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): | |
1266 """Prints information of stacktrace lines for pprof. | |
1267 | |
1268 Args: | |
1269 dump: A Dump object. | |
1270 policy: A Policy object. | |
1271 bucket_set: A BucketSet object. | |
1272 component_name: A name of component for filtering. | |
1273 out: An IO object to output. | |
1274 """ | |
1275 for line in dump.iter_stacktrace: | |
1276 words = line.split() | |
1277 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
1278 if (not bucket or | |
1279 (component_name and component_name != policy.find(bucket))): | |
1280 continue | |
1281 | |
1282 out.write('%6d: %8s [%6d: %8s] @' % ( | |
1283 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
1284 words[COMMITTED], | |
1285 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
1286 words[COMMITTED])) | |
1287 for address in bucket.stacktrace: | |
1288 out.write(' 0x%016x' % address) | |
1289 out.write('\n') | |
1290 | |
1291 | |
1292 def main(): | |
1293 COMMANDS = { | |
1294 'csv': CSVCommand, | |
1295 'expand': ExpandCommand, | |
1296 'json': JSONCommand, | |
1297 'list': ListCommand, | |
1298 'pprof': PProfCommand, | |
1299 'stacktrace': StacktraceCommand, | |
1300 } | |
1301 | |
1302 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): | |
1303 sys.stderr.write("""Usage: %s <command> [options] [<args>] | |
1304 | |
1305 Commands: | |
1306 csv Classify memory usage in CSV | |
1307 expand Show all stacktraces contained in the specified component | |
1308 json Classify memory usage in JSON | |
1309 list Classify memory usage in simple listing format | |
1310 pprof Format the profile dump so that it can be processed by pprof | |
1311 stacktrace Convert runtime addresses to symbol names | |
1312 | |
1313 Quick Reference: | |
1314 dmprof csv [-p POLICY] <first-dump> | |
1315 dmprof expand <dump> <policy> <component> <depth> | |
1316 dmprof json [-p POLICY] <first-dump> | |
1317 dmprof list [-p POLICY] <first-dump> | |
1318 dmprof pprof [-c COMPONENT] <dump> <policy> | |
1319 dmprof stacktrace <dump> | |
1320 """ % (sys.argv[0])) | |
1321 sys.exit(1) | |
1322 action = sys.argv.pop(1) | |
1323 | |
1324 LOGGER.setLevel(logging.DEBUG) | |
1325 handler = logging.StreamHandler() | |
1326 handler.setLevel(logging.INFO) | |
1327 formatter = logging.Formatter('%(message)s') | |
1328 handler.setFormatter(formatter) | |
1329 LOGGER.addHandler(handler) | |
1330 | |
1331 try: | |
1332 errorcode = COMMANDS[action]().do(sys.argv) | |
1333 except ParsingException, e: | |
1334 errorcode = 1 | |
1335 sys.stderr.write('Exit by parsing error: %s\n' % e) | |
1336 | |
1337 return errorcode | |
1338 | |
1339 | |
1340 if __name__ == '__main__': | |
1341 sys.exit(main()) | |
OLD | NEW |