Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env bash
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """The deep heap profiler script for Chrome.""" 6 # Re-direct the arguments to dmprof.py.
7 7
8 from datetime import datetime 8 BASEDIR=`dirname $0`
9 import json 9 ARGV="$@"
10 import logging
11 import optparse
12 import os
13 import re
14 import shutil
15 import subprocess
16 import sys
17 import tempfile
18 10
19 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) 11 PYTHONPATH=$BASEDIR/../python/google python \
20 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( 12 "$BASEDIR/dmprof.py" $ARGV
21 BASE_PATH, os.pardir, 'find_runtime_symbols')
22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
23
24 from find_runtime_symbols import find_runtime_symbols_list
25 from find_runtime_symbols import find_runtime_typeinfo_symbols_list
26 from find_runtime_symbols import RuntimeSymbolsInProcess
27 from prepare_symbol_info import prepare_symbol_info
28
29 BUCKET_ID = 5
30 VIRTUAL = 0
31 COMMITTED = 1
32 ALLOC_COUNT = 2
33 FREE_COUNT = 3
34 NULL_REGEX = re.compile('')
35
36 LOGGER = logging.getLogger('dmprof')
37 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
38 FUNCTION_ADDRESS = 'function'
39 TYPEINFO_ADDRESS = 'typeinfo'
40
41
42 # Heap Profile Dump versions
43
44 # DUMP_DEEP_[1-4] are obsolete.
45 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
46 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
47 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
48 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
49 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
50 DUMP_DEEP_1 = 'DUMP_DEEP_1'
51 DUMP_DEEP_2 = 'DUMP_DEEP_2'
52 DUMP_DEEP_3 = 'DUMP_DEEP_3'
53 DUMP_DEEP_4 = 'DUMP_DEEP_4'
54
55 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
56
57 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
58 # malloc and mmap are identified in bucket files.
59 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
60 DUMP_DEEP_5 = 'DUMP_DEEP_5'
61
62
63 # Heap Profile Policy versions
64
65 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
66 # mmap regions are distincted w/ mmap frames in the pattern column.
67 POLICY_DEEP_1 = 'POLICY_DEEP_1'
68
69 # POLICY_DEEP_2 DOES include allocation_type columns.
70 # mmap regions are distincted w/ the allocation_type column.
71 POLICY_DEEP_2 = 'POLICY_DEEP_2'
72
73 # POLICY_DEEP_3 is in JSON format.
74 POLICY_DEEP_3 = 'POLICY_DEEP_3'
75
76 # POLICY_DEEP_3 contains typeinfo.
77 POLICY_DEEP_4 = 'POLICY_DEEP_4'
78
79
80 class EmptyDumpException(Exception):
81 def __init__(self, value):
82 self.value = value
83 def __str__(self):
84 return repr(self.value)
85
86
87 class ParsingException(Exception):
88 def __init__(self, value):
89 self.value = value
90 def __str__(self):
91 return repr(self.value)
92
93
94 class InvalidDumpException(ParsingException):
95 def __init__(self, value):
96 self.value = value
97 def __str__(self):
98 return "invalid heap profile dump: %s" % repr(self.value)
99
100
101 class ObsoleteDumpVersionException(ParsingException):
102 def __init__(self, value):
103 self.value = value
104 def __str__(self):
105 return "obsolete heap profile dump version: %s" % repr(self.value)
106
107
108 def skip_while(index, max_index, skipping_condition):
109 """Increments |index| until |skipping_condition|(|index|) is False.
110
111 Returns:
112 A pair of an integer indicating a line number after skipped, and a
113 boolean value which is True if found a line which skipping_condition
114 is False for.
115 """
116 while skipping_condition(index):
117 index += 1
118 if index >= max_index:
119 return index, False
120 return index, True
121
122
123 class SymbolMapping(object):
124 """Manages all symbol information on process memory mapping.
125
126 The symbol information consists of all symbols in the binary files obtained
127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps,
128 nm and so on. It is minimum requisite information to run dmprof.
129
130 The information is prepared in a directory "|prefix|.symmap" by prepare().
131 The directory is more portable than Chromium binaries. Users can save it
132 and re-analyze with the portable one.
133
134 Note that loading the symbol information takes a long time. It is very big
135 in general -- it doesn't know which functions are called and which types are
136 used actually. Used symbols can be cached in the "SymbolCache" class.
137 """
138 def __init__(self, prefix):
139 self._prefix = prefix
140 self._prepared_symbol_mapping_path = None
141 self._loaded_symbol_mapping = None
142
143 def prepare(self):
144 """Extracts symbol mapping from binaries and prepares it to use.
145
146 The symbol mapping is stored in a directory whose name is stored in
147 |self._prepared_symbol_mapping_path|.
148
149 Returns:
150 True if succeeded.
151 """
152 LOGGER.info('Preparing symbol mapping...')
153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info(
154 self._prefix + '.maps', self._prefix + '.symmap', True)
155 if self._prepared_symbol_mapping_path:
156 LOGGER.info(' Prepared symbol mapping.')
157 if used_tempdir:
158 LOGGER.warn(' Using a temporary directory for symbol mapping.')
159 LOGGER.warn(' Delete it by yourself.')
160 LOGGER.warn(' Or, move the directory by yourself to use it later.')
161 return True
162 else:
163 LOGGER.warn(' Failed to prepare symbol mapping.')
164 return False
165
166 def get(self):
167 """Returns symbol mapping.
168
169 Returns:
170 Loaded symbol mapping. None if failed.
171 """
172 if not self._prepared_symbol_mapping_path and not self.prepare():
173 return None
174 if not self._loaded_symbol_mapping:
175 LOGGER.info('Loading symbol mapping...')
176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load(
177 self._prepared_symbol_mapping_path)
178 return self._loaded_symbol_mapping
179
180
181 class SymbolCache(object):
182 """Manages cache of used symbol mapping.
183
184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for
185 examples), and "SymbolCache" just caches "how dmprof interprets the address"
186 to speed-up another analysis for the same binary and profile dumps.
187 Handling all symbol mapping takes a long time in "SymbolMapping".
188 "SymbolCache" caches used symbol mapping on memory and in files.
189 """
190 def __init__(self, prefix):
191 self._prefix = prefix
192 self._symbol_cache_paths = {
193 FUNCTION_ADDRESS: prefix + '.funcsym',
194 TYPEINFO_ADDRESS: prefix + '.typesym',
195 }
196 self._find_runtime_symbols_functions = {
197 FUNCTION_ADDRESS: find_runtime_symbols_list,
198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list,
199 }
200 self._symbol_caches = {
201 FUNCTION_ADDRESS: {},
202 TYPEINFO_ADDRESS: {},
203 }
204
205 def update(self, address_type, bucket_set, symbol_mapping):
206 """Updates symbol mapping on memory and in a ".*sym" cache file.
207
208 It reads cached symbol mapping from a ".*sym" file if it exists. Then,
209 it looks up unresolved addresses from a given "SymbolMapping". Finally,
210 both symbol mappings on memory and in the ".*sym" cache file are updated.
211
212 Symbol files are formatted as follows:
213 <Address> <Symbol>
214 <Address> <Symbol>
215 <Address> <Symbol>
216 ...
217
218 Args:
219 address_type: A type of addresses to update. It should be one of
220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
221 bucket_set: A BucketSet object.
222 symbol_mapping: A SymbolMapping object.
223 """
224 self._load(address_type)
225
226 unresolved_addresses = sorted(
227 address for address in bucket_set.iter_addresses(address_type)
228 if address not in self._symbol_caches[address_type])
229
230 if not unresolved_addresses:
231 LOGGER.info('No need to resolve any more addresses.')
232 return
233
234 symbol_cache_path = self._symbol_cache_paths[address_type]
235 with open(symbol_cache_path, mode='a+') as symbol_f:
236 LOGGER.info('Loading %d unresolved addresses.' %
237 len(unresolved_addresses))
238 symbol_list = self._find_runtime_symbols_functions[address_type](
239 symbol_mapping.get(), unresolved_addresses)
240
241 for address, symbol in zip(unresolved_addresses, symbol_list):
242 stripped_symbol = symbol.strip() or '??'
243 self._symbol_caches[address_type][address] = stripped_symbol
244 symbol_f.write('%x %s\n' % (address, stripped_symbol))
245
246 def lookup(self, address_type, address):
247 """Looks up a symbol for a given |address|.
248
249 Args:
250 address_type: A type of addresses to lookup. It should be one of
251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
252 address: An integer that represents an address.
253
254 Returns:
255 A string that represents a symbol.
256 """
257 return self._symbol_caches[address_type].get(address)
258
259 def _load(self, address_type):
260 symbol_cache_path = self._symbol_cache_paths[address_type]
261 try:
262 with open(symbol_cache_path, mode='r') as symbol_f:
263 for line in symbol_f:
264 items = line.rstrip().split(None, 1)
265 if len(items) == 1:
266 items.append('??')
267 self._symbol_caches[address_type][int(items[0], 16)] = items[1]
268 LOGGER.info('Loaded %d entries from symbol cache.' %
269 len(self._symbol_caches[address_type]))
270 except IOError as e:
271 LOGGER.info('No valid symbol cache file is found.')
272
273
274 class Rule(object):
275 """Represents one matching rule in a policy file."""
276
277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None):
278 self._name = name
279 self._mmap = mmap
280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z')
281 if typeinfo_pattern:
282 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
283 else:
284 self._typeinfo_pattern = None
285
286 @property
287 def name(self):
288 return self._name
289
290 @property
291 def mmap(self):
292 return self._mmap
293
294 @property
295 def stacktrace_pattern(self):
296 return self._stacktrace_pattern
297
298 @property
299 def typeinfo_pattern(self):
300 return self._typeinfo_pattern
301
302
303 class Policy(object):
304 """Represents a policy, a content of a policy file."""
305
306 def __init__(self, rules, version, components):
307 self._rules = rules
308 self._version = version
309 self._components = components
310
311 @property
312 def rules(self):
313 return self._rules
314
315 @property
316 def version(self):
317 return self._version
318
319 @property
320 def components(self):
321 return self._components
322
323 def find(self, bucket):
324 """Finds a matching component name which a given |bucket| belongs to.
325
326 Args:
327 bucket: A Bucket object to be searched for.
328
329 Returns:
330 A string representing a component name.
331 """
332 if not bucket:
333 return 'no-bucket'
334 if bucket.component_cache:
335 return bucket.component_cache
336
337 stacktrace = bucket.symbolized_joined_stacktrace
338 typeinfo = bucket.symbolized_typeinfo
339 if typeinfo.startswith('0x'):
340 typeinfo = bucket.typeinfo_name
341
342 for rule in self._rules:
343 if (bucket.mmap == rule.mmap and
344 rule.stacktrace_pattern.match(stacktrace) and
345 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
346 bucket.component_cache = rule.name
347 return rule.name
348
349 assert False
350
351 @staticmethod
352 def load(filename, format):
353 """Loads a policy file of |filename| in a |format|.
354
355 Args:
356 filename: A filename to be loaded.
357 format: A string to specify a format of the file. Only 'json' is
358 supported for now.
359
360 Returns:
361 A loaded Policy object.
362 """
363 with open(os.path.join(BASE_PATH, filename)) as policy_f:
364 return Policy.parse(policy_f, format)
365
366 @staticmethod
367 def parse(policy_f, format):
368 """Parses a policy file content in a |format|.
369
370 Args:
371 policy_f: An IO object to be loaded.
372 format: A string to specify a format of the file. Only 'json' is
373 supported for now.
374
375 Returns:
376 A loaded Policy object.
377 """
378 if format == 'json':
379 return Policy._parse_json(policy_f)
380 else:
381 return None
382
383 @staticmethod
384 def _parse_json(policy_f):
385 """Parses policy file in json format.
386
387 A policy file contains component's names and their stacktrace pattern
388 written in regular expression. Those patterns are matched against each
389 symbols of each stacktraces in the order written in the policy file
390
391 Args:
392 policy_f: A File/IO object to read.
393
394 Returns:
395 A loaded policy object.
396 """
397 policy = json.load(policy_f)
398
399 rules = []
400 for rule in policy['rules']:
401 rules.append(Rule(
402 rule['name'],
403 rule['allocator'] == 'mmap',
404 rule['stacktrace'],
405 rule['typeinfo'] if 'typeinfo' in rule else None))
406 return Policy(rules, policy['version'], policy['components'])
407
408
409 class PolicySet(object):
410 """Represents a set of policies."""
411
412 def __init__(self, policy_directory):
413 self._policy_directory = policy_directory
414
415 @staticmethod
416 def load(labels=None):
417 """Loads a set of policies via the "default policy directory".
418
419 The "default policy directory" contains pairs of policies and their labels.
420 For example, a policy "policy.l0.json" is labeled "l0" in the default
421 policy directory "policies.json".
422
423 All policies in the directory are loaded by default. Policies can be
424 limited by |labels|.
425
426 Args:
427 labels: An array that contains policy labels to be loaded.
428
429 Returns:
430 A PolicySet object.
431 """
432 default_policy_directory = PolicySet._load_default_policy_directory()
433 if labels:
434 specified_policy_directory = {}
435 for label in labels:
436 if label in default_policy_directory:
437 specified_policy_directory[label] = default_policy_directory[label]
438 # TODO(dmikurube): Load an un-labeled policy file.
439 return PolicySet._load_policies(specified_policy_directory)
440 else:
441 return PolicySet._load_policies(default_policy_directory)
442
443 def __len__(self):
444 return len(self._policy_directory)
445
446 def __iter__(self):
447 for label in self._policy_directory:
448 yield label
449
450 def __getitem__(self, label):
451 return self._policy_directory[label]
452
453 @staticmethod
454 def _load_default_policy_directory():
455 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
456 default_policy_directory = json.load(policies_f)
457 return default_policy_directory
458
459 @staticmethod
460 def _load_policies(directory):
461 LOGGER.info('Loading policy files.')
462 policies = {}
463 for label in directory:
464 LOGGER.info(' %s: %s' % (label, directory[label]['file']))
465 loaded = Policy.load(directory[label]['file'], directory[label]['format'])
466 if loaded:
467 policies[label] = loaded
468 return PolicySet(policies)
469
470
471 class Bucket(object):
472 """Represents a bucket, which is a unit of memory block classification."""
473
474 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name):
475 self._stacktrace = stacktrace
476 self._mmap = mmap
477 self._typeinfo = typeinfo
478 self._typeinfo_name = typeinfo_name
479
480 self._symbolized_stacktrace = stacktrace
481 self._symbolized_joined_stacktrace = ''
482 self._symbolized_typeinfo = typeinfo_name
483
484 self.component_cache = ''
485
486 def symbolize(self, symbol_cache):
487 """Makes a symbolized stacktrace and typeinfo with |symbol_cache|.
488
489 Args:
490 symbol_cache: A SymbolCache object.
491 """
492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
493 self._symbolized_stacktrace = [
494 symbol_cache.lookup(FUNCTION_ADDRESS, address)
495 for address in self._stacktrace]
496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace)
497 if not self._typeinfo:
498 self._symbolized_typeinfo = 'no typeinfo'
499 else:
500 self._symbolized_typeinfo = symbol_cache.lookup(
501 TYPEINFO_ADDRESS, self._typeinfo)
502 if not self._symbolized_typeinfo:
503 self._symbolized_typeinfo = 'no typeinfo'
504
505 def clear_component_cache(self):
506 self.component_cache = ''
507
508 @property
509 def stacktrace(self):
510 return self._stacktrace
511
512 @property
513 def mmap(self):
514 return self._mmap
515
516 @property
517 def typeinfo(self):
518 return self._typeinfo
519
520 @property
521 def typeinfo_name(self):
522 return self._typeinfo_name
523
524 @property
525 def symbolized_stacktrace(self):
526 return self._symbolized_stacktrace
527
528 @property
529 def symbolized_joined_stacktrace(self):
530 return self._symbolized_joined_stacktrace
531
532 @property
533 def symbolized_typeinfo(self):
534 return self._symbolized_typeinfo
535
536
537 class BucketSet(object):
538 """Represents a set of bucket."""
539 def __init__(self):
540 self._buckets = {}
541 self._addresses = {
542 FUNCTION_ADDRESS: set(),
543 TYPEINFO_ADDRESS: set(),
544 }
545
546 @staticmethod
547 def load(prefix):
548 """Loads all related bucket files.
549
550 Args:
551 prefix: A prefix string for bucket file names.
552
553 Returns:
554 A loaded BucketSet object.
555 """
556 LOGGER.info('Loading bucket files.')
557 bucket_set = BucketSet()
558
559 n = 0
560 while True:
561 path = '%s.%04d.buckets' % (prefix, n)
562 if not os.path.exists(path):
563 if n > 10:
564 break
565 n += 1
566 continue
567 LOGGER.info(' %s' % path)
568 with open(path, 'r') as f:
569 bucket_set._load_file(f)
570 n += 1
571
572 return bucket_set
573
574 def _load_file(self, bucket_f):
575 for line in bucket_f:
576 words = line.split()
577 typeinfo = None
578 typeinfo_name = ''
579 stacktrace_begin = 2
580 for index, word in enumerate(words):
581 if index < 2:
582 continue
583 if word[0] == 't':
584 typeinfo = int(word[1:], 16)
585 self._addresses[TYPEINFO_ADDRESS].add(typeinfo)
586 elif word[0] == 'n':
587 typeinfo_name = word[1:]
588 else:
589 stacktrace_begin = index
590 break
591 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]
592 for frame in stacktrace:
593 self._addresses[FUNCTION_ADDRESS].add(frame)
594 self._buckets[int(words[0])] = Bucket(
595 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name)
596
597 def __iter__(self):
598 for bucket_id, bucket_content in self._buckets.iteritems():
599 yield bucket_id, bucket_content
600
601 def __getitem__(self, bucket_id):
602 return self._buckets[bucket_id]
603
604 def get(self, bucket_id):
605 return self._buckets.get(bucket_id)
606
607 def symbolize(self, symbol_cache):
608 for bucket_content in self._buckets.itervalues():
609 bucket_content.symbolize(symbol_cache)
610
611 def clear_component_cache(self):
612 for bucket_content in self._buckets.itervalues():
613 bucket_content.clear_component_cache()
614
615 def iter_addresses(self, address_type):
616 for function in self._addresses[address_type]:
617 yield function
618
619
620 class Dump(object):
621 """Represents a heap profile dump."""
622
623 def __init__(self):
624 self._path = ''
625 self._time = None
626 self._stacktrace_lines = []
627 self._global_stats = {} # used only in apply_policy
628
629 self._version = ''
630 self._lines = []
631
632 @property
633 def path(self):
634 return self._path
635
636 @property
637 def time(self):
638 return self._time
639
640 @property
641 def iter_stacktrace(self):
642 for line in self._stacktrace_lines:
643 yield line
644
645 def global_stat(self, name):
646 return self._global_stats[name]
647
648 @staticmethod
649 def load(path, log_header='Loading a heap profile dump: '):
650 """Loads a heap profile dump.
651
652 Args:
653 path: A file path string to load.
654 log_header: A preceding string for log messages.
655
656 Returns:
657 A loaded Dump object.
658
659 Raises:
660 ParsingException for invalid heap profile dumps.
661 """
662 dump = Dump()
663 dump._path = path
664 dump._time = os.stat(dump._path).st_mtime
665 dump._version = ''
666
667 dump._lines = [line for line in open(dump._path, 'r')
668 if line and not line.startswith('#')]
669
670 try:
671 dump._version, ln = dump._parse_version()
672 dump._parse_global_stats()
673 dump._extract_stacktrace_lines(ln)
674 except EmptyDumpException:
675 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path))
676 except ParsingException, e:
677 LOGGER.error('%s%s ...error %s' % (log_header, path, e))
678 raise
679 else:
680 LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version))
681
682 return dump
683
684 def _parse_version(self):
685 """Parses a version string in self._lines.
686
687 Returns:
688 A pair of (a string representing a version of the stacktrace dump,
689 and an integer indicating a line number next to the version string).
690
691 Raises:
692 ParsingException for invalid dump versions.
693 """
694 version = ''
695
696 # Skip until an identifiable line.
697 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
698 if not self._lines:
699 raise EmptyDumpException('Empty heap dump file.')
700 (ln, found) = skip_while(
701 0, len(self._lines),
702 lambda n: not self._lines[n].startswith(headers))
703 if not found:
704 raise InvalidDumpException('No version header.')
705
706 # Identify a version.
707 if self._lines[ln].startswith('heap profile: '):
708 version = self._lines[ln][13:].strip()
709 if version == DUMP_DEEP_5:
710 (ln, _) = skip_while(
711 ln, len(self._lines),
712 lambda n: self._lines[n] != 'STACKTRACES:\n')
713 elif version in DUMP_DEEP_OBSOLETE:
714 raise ObsoleteDumpVersionException(version)
715 else:
716 raise InvalidDumpException('Invalid version: %s' % version)
717 elif self._lines[ln] == 'STACKTRACES:\n':
718 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
719 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
720 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
721
722 return (version, ln)
723
724 def _parse_global_stats(self):
725 """Parses lines in self._lines as global stats."""
726 (ln, _) = skip_while(
727 0, len(self._lines),
728 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
729
730 global_stat_names = [
731 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
732 'nonprofiled-absent', 'nonprofiled-anonymous',
733 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
734 'nonprofiled-stack', 'nonprofiled-other',
735 'profiled-mmap', 'profiled-malloc']
736
737 for prefix in global_stat_names:
738 (ln, _) = skip_while(
739 ln, len(self._lines),
740 lambda n: self._lines[n].split()[0] != prefix)
741 words = self._lines[ln].split()
742 self._global_stats[prefix + '_virtual'] = int(words[-2])
743 self._global_stats[prefix + '_committed'] = int(words[-1])
744
745 def _extract_stacktrace_lines(self, line_number):
746 """Extracts the position of stacktrace lines.
747
748 Valid stacktrace lines are stored into self._stacktrace_lines.
749
750 Args:
751 line_number: A line number to start parsing in lines.
752
753 Raises:
754 ParsingException for invalid dump versions.
755 """
756 if self._version == DUMP_DEEP_5:
757 (line_number, _) = skip_while(
758 line_number, len(self._lines),
759 lambda n: not self._lines[n].split()[0].isdigit())
760 stacktrace_start = line_number
761 (line_number, _) = skip_while(
762 line_number, len(self._lines),
763 lambda n: self._check_stacktrace_line(self._lines[n]))
764 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
765
766 elif self._version in DUMP_DEEP_OBSOLETE:
767 raise ObsoleteDumpVersionException(self._version)
768
769 else:
770 raise InvalidDumpException('Invalid version: %s' % self._version)
771
772 @staticmethod
773 def _check_stacktrace_line(stacktrace_line):
774 """Checks if a given stacktrace_line is valid as stacktrace.
775
776 Args:
777 stacktrace_line: A string to be checked.
778
779 Returns:
780 True if the given stacktrace_line is valid.
781 """
782 words = stacktrace_line.split()
783 if len(words) < BUCKET_ID + 1:
784 return False
785 if words[BUCKET_ID - 1] != '@':
786 return False
787 return True
788
789
790 class DumpList(object):
791 """Represents a sequence of heap profile dumps."""
792
793 def __init__(self, dump_list):
794 self._dump_list = dump_list
795
796 @staticmethod
797 def load(path_list):
798 LOGGER.info('Loading heap dump profiles.')
799 dump_list = []
800 for path in path_list:
801 dump_list.append(Dump.load(path, ' '))
802 return DumpList(dump_list)
803
804 def __len__(self):
805 return len(self._dump_list)
806
807 def __iter__(self):
808 for dump in self._dump_list:
809 yield dump
810
811 def __getitem__(self, index):
812 return self._dump_list[index]
813
814
815 class Command(object):
816 """Subclasses are a subcommand for this executable.
817
818 See COMMANDS in main().
819 """
820 def __init__(self, usage):
821 self._parser = optparse.OptionParser(usage)
822
823 @staticmethod
824 def load_basic_files(dump_path, multiple):
825 prefix = Command._find_prefix(dump_path)
826 symbol_mapping = SymbolMapping(prefix)
827 symbol_mapping.prepare()
828 bucket_set = BucketSet.load(prefix)
829 if multiple:
830 dump_list = DumpList.load(Command._find_all_dumps(dump_path))
831 else:
832 dump = Dump.load(dump_path)
833 symbol_cache = SymbolCache(prefix)
834 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping)
835 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping)
836 bucket_set.symbolize(symbol_cache)
837 if multiple:
838 return (bucket_set, dump_list)
839 else:
840 return (bucket_set, dump)
841
842 @staticmethod
843 def _find_prefix(path):
844 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
845
846 @staticmethod
847 def _find_all_dumps(dump_path):
848 prefix = Command._find_prefix(dump_path)
849 dump_path_list = [dump_path]
850
851 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
852 n += 1
853 while True:
854 p = '%s.%04d.heap' % (prefix, n)
855 if os.path.exists(p):
856 dump_path_list.append(p)
857 else:
858 break
859 n += 1
860
861 return dump_path_list
862
863 def _parse_args(self, sys_argv, required):
864 options, args = self._parser.parse_args(sys_argv)
865 if len(args) != required + 1:
866 self._parser.error('needs %d argument(s).\n' % required)
867 return None
868 return (options, args)
869
870 def _parse_policy_list(self, options_policy):
871 if options_policy:
872 return options_policy.split(',')
873 else:
874 return None
875
876
877 class StacktraceCommand(Command):
878 def __init__(self):
879 super(StacktraceCommand, self).__init__(
880 'Usage: %prog stacktrace <dump>')
881
882 def do(self, sys_argv):
883 options, args = self._parse_args(sys_argv, 1)
884 dump_path = args[1]
885 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
886
887 StacktraceCommand._output(dump, bucket_set, sys.stdout)
888 return 0
889
890 @staticmethod
891 def _output(dump, bucket_set, out):
892 """Outputs a given stacktrace.
893
894 Args:
895 bucket_set: A BucketSet object.
896 out: A file object to output.
897 """
898 for line in dump.iter_stacktrace:
899 words = line.split()
900 bucket = bucket_set.get(int(words[BUCKET_ID]))
901 if not bucket:
902 continue
903 for i in range(0, BUCKET_ID - 1):
904 out.write(words[i] + ' ')
905 for frame in bucket.symbolized_stacktrace:
906 out.write(frame + ' ')
907 out.write('\n')
908
909
910 class PolicyCommands(Command):
911 def __init__(self, command):
912 super(PolicyCommands, self).__init__(
913 'Usage: %%prog %s [-p POLICY] <first-dump>' % command)
914 self._parser.add_option('-p', '--policy', type='string', dest='policy',
915 help='profile with POLICY', metavar='POLICY')
916
917 def _set_up(self, sys_argv):
918 options, args = self._parse_args(sys_argv, 1)
919 dump_path = args[1]
920 (bucket_set, dumps) = Command.load_basic_files(dump_path, True)
921
922 policy_set = PolicySet.load(self._parse_policy_list(options.policy))
923 return policy_set, dumps, bucket_set
924
925 def _apply_policy(self, dump, policy, bucket_set, first_dump_time):
926 """Aggregates the total memory size of each component.
927
928 Iterate through all stacktraces and attribute them to one of the components
929 based on the policy. It is important to apply policy in right order.
930
931 Args:
932 dump: A Dump object.
933 policy: A Policy object.
934 bucket_set: A BucketSet object.
935 first_dump_time: An integer representing time when the first dump is
936 dumped.
937
938 Returns:
939 A dict mapping components and their corresponding sizes.
940 """
941 LOGGER.info(' %s' % dump.path)
942 sizes = dict((c, 0) for c in policy.components)
943
944 PolicyCommands._accumulate(dump, policy, bucket_set, sizes)
945
946 sizes['mmap-no-log'] = (
947 dump.global_stat('profiled-mmap_committed') -
948 sizes['mmap-total-log'])
949 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
950 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
951
952 sizes['tc-no-log'] = (
953 dump.global_stat('profiled-malloc_committed') -
954 sizes['tc-total-log'])
955 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
956 sizes['tc-unused'] = (
957 sizes['mmap-tcmalloc'] -
958 dump.global_stat('profiled-malloc_committed'))
959 sizes['tc-total'] = sizes['mmap-tcmalloc']
960
961 for key, value in {
962 'total': 'total_committed',
963 'filemapped': 'file_committed',
964 'file-exec': 'file-exec_committed',
965 'file-nonexec': 'file-nonexec_committed',
966 'anonymous': 'anonymous_committed',
967 'stack': 'stack_committed',
968 'other': 'other_committed',
969 'unhooked-absent': 'nonprofiled-absent_committed',
970 'unhooked-anonymous': 'nonprofiled-anonymous_committed',
971 'unhooked-file-exec': 'nonprofiled-file-exec_committed',
972 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',
973 'unhooked-stack': 'nonprofiled-stack_committed',
974 'unhooked-other': 'nonprofiled-other_committed',
975 'total-vm': 'total_virtual',
976 'filemapped-vm': 'file_virtual',
977 'anonymous-vm': 'anonymous_virtual',
978 'other-vm': 'other_virtual' }.iteritems():
979 if key in sizes:
980 sizes[key] = dump.global_stat(value)
981
982 if 'mustbezero' in sizes:
983 removed_list = (
984 'profiled-mmap_committed',
985 'nonprofiled-absent_committed',
986 'nonprofiled-anonymous_committed',
987 'nonprofiled-file-exec_committed',
988 'nonprofiled-file-nonexec_committed',
989 'nonprofiled-stack_committed',
990 'nonprofiled-other_committed')
991 sizes['mustbezero'] = (
992 dump.global_stat('total_committed') -
993 sum(dump.global_stat(removed) for removed in removed_list))
994 if 'total-exclude-profiler' in sizes:
995 sizes['total-exclude-profiler'] = (
996 dump.global_stat('total_committed') -
997 (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
998 if 'hour' in sizes:
999 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
1000 if 'minute' in sizes:
1001 sizes['minute'] = (dump.time - first_dump_time) / 60.0
1002 if 'second' in sizes:
1003 sizes['second'] = dump.time - first_dump_time
1004
1005 return sizes
1006
1007 @staticmethod
1008 def _accumulate(dump, policy, bucket_set, sizes):
1009 for line in dump.iter_stacktrace:
1010 words = line.split()
1011 bucket = bucket_set.get(int(words[BUCKET_ID]))
1012 component_match = policy.find(bucket)
1013 sizes[component_match] += int(words[COMMITTED])
1014
1015 if component_match.startswith('tc-'):
1016 sizes['tc-total-log'] += int(words[COMMITTED])
1017 elif component_match.startswith('mmap-'):
1018 sizes['mmap-total-log'] += int(words[COMMITTED])
1019 else:
1020 sizes['other-total-log'] += int(words[COMMITTED])
1021
1022
1023 class CSVCommand(PolicyCommands):
1024 def __init__(self):
1025 super(CSVCommand, self).__init__('csv')
1026
1027 def do(self, sys_argv):
1028 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1029 return self._output(policy_set, dumps, bucket_set, sys.stdout)
1030
1031 def _output(self, policy_set, dumps, bucket_set, out):
1032 max_components = 0
1033 for label in policy_set:
1034 max_components = max(max_components, len(policy_set[label].components))
1035
1036 for label in sorted(policy_set):
1037 components = policy_set[label].components
1038 if len(policy_set) > 1:
1039 out.write('%s%s\n' % (label, ',' * (max_components - 1)))
1040 out.write('%s%s\n' % (
1041 ','.join(components), ',' * (max_components - len(components))))
1042
1043 LOGGER.info('Applying a policy %s to...' % label)
1044 for dump in dumps:
1045 component_sizes = self._apply_policy(
1046 dump, policy_set[label], bucket_set, dumps[0].time)
1047 s = []
1048 for c in components:
1049 if c in ('hour', 'minute', 'second'):
1050 s.append('%05.5f' % (component_sizes[c]))
1051 else:
1052 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
1053 out.write('%s%s\n' % (
1054 ','.join(s), ',' * (max_components - len(components))))
1055
1056 bucket_set.clear_component_cache()
1057
1058 return 0
1059
1060
1061 class JSONCommand(PolicyCommands):
1062 def __init__(self):
1063 super(JSONCommand, self).__init__('json')
1064
1065 def do(self, sys_argv):
1066 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1067 return self._output(policy_set, dumps, bucket_set, sys.stdout)
1068
1069 def _output(self, policy_set, dumps, bucket_set, out):
1070 json_base = {
1071 'version': 'JSON_DEEP_2',
1072 'policies': {},
1073 }
1074
1075 for label in sorted(policy_set):
1076 json_base['policies'][label] = {
1077 'legends': policy_set[label].components,
1078 'snapshots': [],
1079 }
1080
1081 LOGGER.info('Applying a policy %s to...' % label)
1082 for dump in dumps:
1083 component_sizes = self._apply_policy(
1084 dump, policy_set[label], bucket_set, dumps[0].time)
1085 component_sizes['dump_path'] = dump.path
1086 component_sizes['dump_time'] = datetime.fromtimestamp(
1087 dump.time).strftime('%Y-%m-%d %H:%M:%S')
1088 json_base['policies'][label]['snapshots'].append(component_sizes)
1089
1090 bucket_set.clear_component_cache()
1091
1092 json.dump(json_base, out, indent=2, sort_keys=True)
1093
1094 return 0
1095
1096
1097 class ListCommand(PolicyCommands):
1098 def __init__(self):
1099 super(ListCommand, self).__init__('list')
1100
1101 def do(self, sys_argv):
1102 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1103 return self._output(policy_set, dumps, bucket_set, sys.stdout)
1104
1105 def _output(self, policy_set, dumps, bucket_set, out):
1106 for label in sorted(policy_set):
1107 LOGGER.info('Applying a policy %s to...' % label)
1108 for dump in dumps:
1109 component_sizes = self._apply_policy(
1110 dump, policy_set[label], bucket_set, dump.time)
1111 out.write('%s for %s:\n' % (label, dump.path))
1112 for c in policy_set[label].components:
1113 if c in ['hour', 'minute', 'second']:
1114 out.write('%40s %12.3f\n' % (c, component_sizes[c]))
1115 else:
1116 out.write('%40s %12d\n' % (c, component_sizes[c]))
1117
1118 bucket_set.clear_component_cache()
1119
1120 return 0
1121
1122
1123 class ExpandCommand(Command):
1124 def __init__(self):
1125 super(ExpandCommand, self).__init__(
1126 'Usage: %prog expand <dump> <policy> <component> <depth>')
1127
1128 def do(self, sys_argv):
1129 options, args = self._parse_args(sys_argv, 4)
1130 dump_path = args[1]
1131 target_policy = args[2]
1132 component_name = args[3]
1133 depth = args[4]
1134 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
1135 policy_set = PolicySet.load(self._parse_policy_list(target_policy))
1136
1137 self._output(dump, policy_set[target_policy], bucket_set,
1138 component_name, int(depth), sys.stdout)
1139 return 0
1140
1141 def _output(self, dump, policy, bucket_set, component_name, depth, out):
1142 """Prints all stacktraces in a given component of given depth.
1143
1144 Args:
1145 dump: A Dump object.
1146 policy: A Policy object.
1147 bucket_set: A BucketSet object.
1148 component_name: A name of component for filtering.
1149 depth: An integer representing depth to be printed.
1150 out: An IO object to output.
1151 """
1152 sizes = {}
1153
1154 ExpandCommand._accumulate(
1155 dump, policy, bucket_set, component_name, depth, sizes)
1156
1157 sorted_sizes_list = sorted(
1158 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
1159 total = 0
1160 for size_pair in sorted_sizes_list:
1161 out.write('%10d %s\n' % (size_pair[1], size_pair[0]))
1162 total += size_pair[1]
1163 LOGGER.info('total: %d\n' % total)
1164
1165 @staticmethod
1166 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):
1167 for line in dump.iter_stacktrace:
1168 words = line.split()
1169 bucket = bucket_set.get(int(words[BUCKET_ID]))
1170 component_match = policy.find(bucket)
1171 if component_match == component_name:
1172 stacktrace_sequence = ''
1173 if bucket.typeinfo:
1174 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo
1175 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name
1176 for stack in bucket.symbolized_stacktrace[
1177 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]:
1178 stacktrace_sequence += stack + ' '
1179 if not stacktrace_sequence in sizes:
1180 sizes[stacktrace_sequence] = 0
1181 sizes[stacktrace_sequence] += int(words[COMMITTED])
1182
1183
1184 class PProfCommand(Command):
1185 def __init__(self):
1186 super(PProfCommand, self).__init__(
1187 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
1188 self._parser.add_option('-c', '--component', type='string',
1189 dest='component',
1190 help='restrict to COMPONENT', metavar='COMPONENT')
1191
1192 def do(self, sys_argv):
1193 options, args = self._parse_args(sys_argv, 2)
1194
1195 dump_path = args[1]
1196 target_policy = args[2]
1197 component = options.component
1198
1199 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
1200 policy_set = PolicySet.load(self._parse_policy_list(target_policy))
1201
1202 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:
1203 maps_lines = maps_f.readlines()
1204 PProfCommand._output(
1205 dump, policy_set[target_policy], bucket_set, maps_lines, component,
1206 sys.stdout)
1207
1208 return 0
1209
1210 @staticmethod
1211 def _output(dump, policy, bucket_set, maps_lines, component_name, out):
1212 """Converts the heap profile dump so it can be processed by pprof.
1213
1214 Args:
1215 dump: A Dump object.
1216 policy: A Policy object.
1217 bucket_set: A BucketSet object.
1218 maps_lines: A list of strings containing /proc/.../maps.
1219 component_name: A name of component for filtering.
1220 out: An IO object to output.
1221 """
1222 out.write('heap profile: ')
1223 com_committed, com_allocs = PProfCommand._accumulate(
1224 dump, policy, bucket_set, component_name)
1225
1226 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
1227 com_allocs, com_committed, com_allocs, com_committed))
1228
1229 PProfCommand._output_stacktrace_lines(
1230 dump, policy, bucket_set, component_name, out)
1231
1232 out.write('MAPPED_LIBRARIES:\n')
1233 for line in maps_lines:
1234 out.write(line)
1235
1236 @staticmethod
1237 def _accumulate(dump, policy, bucket_set, component_name):
1238 """Accumulates size of committed chunks and the number of allocated chunks.
1239
1240 Args:
1241 dump: A Dump object.
1242 policy: A Policy object.
1243 bucket_set: A BucketSet object.
1244 component_name: A name of component for filtering.
1245
1246 Returns:
1247 Two integers which are the accumulated size of committed regions and the
1248 number of allocated chunks, respectively.
1249 """
1250 com_committed = 0
1251 com_allocs = 0
1252 for line in dump.iter_stacktrace:
1253 words = line.split()
1254 bucket = bucket_set.get(int(words[BUCKET_ID]))
1255 if (not bucket or
1256 (component_name and component_name != policy.find(bucket))):
1257 continue
1258
1259 com_committed += int(words[COMMITTED])
1260 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
1261
1262 return com_committed, com_allocs
1263
1264 @staticmethod
1265 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):
1266 """Prints information of stacktrace lines for pprof.
1267
1268 Args:
1269 dump: A Dump object.
1270 policy: A Policy object.
1271 bucket_set: A BucketSet object.
1272 component_name: A name of component for filtering.
1273 out: An IO object to output.
1274 """
1275 for line in dump.iter_stacktrace:
1276 words = line.split()
1277 bucket = bucket_set.get(int(words[BUCKET_ID]))
1278 if (not bucket or
1279 (component_name and component_name != policy.find(bucket))):
1280 continue
1281
1282 out.write('%6d: %8s [%6d: %8s] @' % (
1283 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
1284 words[COMMITTED],
1285 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
1286 words[COMMITTED]))
1287 for address in bucket.stacktrace:
1288 out.write(' 0x%016x' % address)
1289 out.write('\n')
1290
1291
1292 def main():
1293 COMMANDS = {
1294 'csv': CSVCommand,
1295 'expand': ExpandCommand,
1296 'json': JSONCommand,
1297 'list': ListCommand,
1298 'pprof': PProfCommand,
1299 'stacktrace': StacktraceCommand,
1300 }
1301
1302 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
1303 sys.stderr.write("""Usage: %s <command> [options] [<args>]
1304
1305 Commands:
1306 csv Classify memory usage in CSV
1307 expand Show all stacktraces contained in the specified component
1308 json Classify memory usage in JSON
1309 list Classify memory usage in simple listing format
1310 pprof Format the profile dump so that it can be processed by pprof
1311 stacktrace Convert runtime addresses to symbol names
1312
1313 Quick Reference:
1314 dmprof csv [-p POLICY] <first-dump>
1315 dmprof expand <dump> <policy> <component> <depth>
1316 dmprof json [-p POLICY] <first-dump>
1317 dmprof list [-p POLICY] <first-dump>
1318 dmprof pprof [-c COMPONENT] <dump> <policy>
1319 dmprof stacktrace <dump>
1320 """ % (sys.argv[0]))
1321 sys.exit(1)
1322 action = sys.argv.pop(1)
1323
1324 LOGGER.setLevel(logging.DEBUG)
1325 handler = logging.StreamHandler()
1326 handler.setLevel(logging.INFO)
1327 formatter = logging.Formatter('%(message)s')
1328 handler.setFormatter(formatter)
1329 LOGGER.addHandler(handler)
1330
1331 try:
1332 errorcode = COMMANDS[action]().do(sys.argv)
1333 except ParsingException, e:
1334 errorcode = 1
1335 sys.stderr.write('Exit by parsing error: %s\n' % e)
1336
1337 return errorcode
1338
1339
1340 if __name__ == '__main__':
1341 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698