Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(660)

Side by Side Diff: tools/deep_memory_profiler/dmprof.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/deep_memory_profiler/lib/__init__.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """The deep heap profiler script for Chrome.""" 5 """The Deep Memory Profiler analyzer script.
6 6
7 import copy 7 See http://dev.chromium.org/developers/deep-memory-profiler for details.
8 import cStringIO 8 """
9 import datetime 9
10 import json
11 import logging 10 import logging
12 import optparse
13 import os
14 import re
15 import struct
16 import subprocess
17 import sys 11 import sys
18 import tempfile
19 import time
20 import zipfile
21 12
22 try: 13 from lib.exceptions import ParsingException
23 from collections import OrderedDict # pylint: disable=E0611 14 import subcommands
24 except ImportError:
25 # TODO(dmikurube): Remove this once Python 2.7 is required.
26 BASE_PATH = os.path.dirname(os.path.abspath(__file__))
27 SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party')
28 sys.path.insert(0, SIMPLEJSON_PATH)
29 from simplejson import OrderedDict
30 15
31 from range_dict import ExclusiveRangeDict
32
33 BASE_PATH = os.path.dirname(os.path.abspath(__file__))
34 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
35 BASE_PATH, os.pardir, 'find_runtime_symbols')
36 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
37
38 import find_runtime_symbols
39 import prepare_symbol_info
40 import proc_maps
41
42 from find_runtime_symbols import FUNCTION_SYMBOLS
43 from find_runtime_symbols import SOURCEFILE_SYMBOLS
44 from find_runtime_symbols import TYPEINFO_SYMBOLS
45
46 BUCKET_ID = 5
47 VIRTUAL = 0
48 COMMITTED = 1
49 ALLOC_COUNT = 2
50 FREE_COUNT = 3
51 NULL_REGEX = re.compile('')
52 16
53 LOGGER = logging.getLogger('dmprof') 17 LOGGER = logging.getLogger('dmprof')
54 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
55 CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir)
56
57 DEFAULT_SORTERS = [
58 os.path.join(BASE_PATH, 'sorter.malloc-component.json'),
59 os.path.join(BASE_PATH, 'sorter.malloc-type.json'),
60 os.path.join(BASE_PATH, 'sorter.vm-map.json'),
61 os.path.join(BASE_PATH, 'sorter.vm-sharing.json'),
62 ]
63
64
65 # Heap Profile Dump versions
66
67 # DUMP_DEEP_[1-4] are obsolete.
68 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
69 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
70 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
71 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
72 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
73 DUMP_DEEP_1 = 'DUMP_DEEP_1'
74 DUMP_DEEP_2 = 'DUMP_DEEP_2'
75 DUMP_DEEP_3 = 'DUMP_DEEP_3'
76 DUMP_DEEP_4 = 'DUMP_DEEP_4'
77
78 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
79
80 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
81 # malloc and mmap are identified in bucket files.
82 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
83 DUMP_DEEP_5 = 'DUMP_DEEP_5'
84
85 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
86 DUMP_DEEP_6 = 'DUMP_DEEP_6'
87
88 # Heap Profile Policy versions
89
90 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
91 # mmap regions are distincted w/ mmap frames in the pattern column.
92 POLICY_DEEP_1 = 'POLICY_DEEP_1'
93
94 # POLICY_DEEP_2 DOES include allocation_type columns.
95 # mmap regions are distincted w/ the allocation_type column.
96 POLICY_DEEP_2 = 'POLICY_DEEP_2'
97
98 # POLICY_DEEP_3 is in JSON format.
99 POLICY_DEEP_3 = 'POLICY_DEEP_3'
100
101 # POLICY_DEEP_3 contains typeinfo.
102 POLICY_DEEP_4 = 'POLICY_DEEP_4'
103
104
105 class EmptyDumpException(Exception):
106 def __init__(self, value=''):
107 super(EmptyDumpException, self).__init__()
108 self.value = value
109 def __str__(self):
110 return repr(self.value)
111
112
113 class ParsingException(Exception):
114 def __init__(self, value=''):
115 super(ParsingException, self).__init__()
116 self.value = value
117 def __str__(self):
118 return repr(self.value)
119
120
121 class InvalidDumpException(ParsingException):
122 def __init__(self, value):
123 super(InvalidDumpException, self).__init__()
124 self.value = value
125 def __str__(self):
126 return "invalid heap profile dump: %s" % repr(self.value)
127
128
129 class ObsoleteDumpVersionException(ParsingException):
130 def __init__(self, value):
131 super(ObsoleteDumpVersionException, self).__init__()
132 self.value = value
133 def __str__(self):
134 return "obsolete heap profile dump version: %s" % repr(self.value)
135
136
137 class ListAttribute(ExclusiveRangeDict.RangeAttribute):
138 """Represents a list for an attribute in range_dict.ExclusiveRangeDict."""
139 def __init__(self):
140 super(ListAttribute, self).__init__()
141 self._list = []
142
143 def __str__(self):
144 return str(self._list)
145
146 def __repr__(self):
147 return 'ListAttribute' + str(self._list)
148
149 def __len__(self):
150 return len(self._list)
151
152 def __iter__(self):
153 for x in self._list:
154 yield x
155
156 def __getitem__(self, index):
157 return self._list[index]
158
159 def __setitem__(self, index, value):
160 if index >= len(self._list):
161 self._list.extend([None] * (index + 1 - len(self._list)))
162 self._list[index] = value
163
164 def copy(self):
165 new_list = ListAttribute()
166 for index, item in enumerate(self._list):
167 new_list[index] = copy.deepcopy(item)
168 return new_list
169
170
171 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
172 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
173 _DUMMY_ENTRY = proc_maps.ProcMapsEntry(
174 0, # begin
175 0, # end
176 '-', # readable
177 '-', # writable
178 '-', # executable
179 '-', # private
180 0, # offset
181 '00', # major
182 '00', # minor
183 0, # inode
184 '' # name
185 )
186
187 def __init__(self):
188 super(ProcMapsEntryAttribute, self).__init__()
189 self._entry = self._DUMMY_ENTRY.as_dict()
190
191 def __str__(self):
192 return str(self._entry)
193
194 def __repr__(self):
195 return 'ProcMapsEntryAttribute' + str(self._entry)
196
197 def __getitem__(self, key):
198 return self._entry[key]
199
200 def __setitem__(self, key, value):
201 if key not in self._entry:
202 raise KeyError(key)
203 self._entry[key] = value
204
205 def copy(self):
206 new_entry = ProcMapsEntryAttribute()
207 for key, value in self._entry.iteritems():
208 new_entry[key] = copy.deepcopy(value)
209 return new_entry
210
211
212 def skip_while(index, max_index, skipping_condition):
213 """Increments |index| until |skipping_condition|(|index|) is False.
214
215 Returns:
216 A pair of an integer indicating a line number after skipped, and a
217 boolean value which is True if found a line which skipping_condition
218 is False for.
219 """
220 while skipping_condition(index):
221 index += 1
222 if index >= max_index:
223 return index, False
224 return index, True
225
226
227 class SymbolDataSources(object):
228 """Manages symbol data sources in a process.
229
230 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
231 so on. They are collected into a directory '|prefix|.symmap' from the binary
232 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
233
234 Binaries are not mandatory to profile. The prepared data sources work in
235 place of the binary even if the binary has been overwritten with another
236 binary.
237
238 Note that loading the symbol data sources takes a long time. They are often
239 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
240 which caches actually used symbols.
241 """
242 def __init__(self, prefix, alternative_dirs=None):
243 self._prefix = prefix
244 self._prepared_symbol_data_sources_path = None
245 self._loaded_symbol_data_sources = None
246 self._alternative_dirs = alternative_dirs or {}
247
248 def prepare(self):
249 """Prepares symbol data sources by extracting mapping from a binary.
250
251 The prepared symbol data sources are stored in a directory. The directory
252 name is stored in |self._prepared_symbol_data_sources_path|.
253
254 Returns:
255 True if succeeded.
256 """
257 LOGGER.info('Preparing symbol mapping...')
258 self._prepared_symbol_data_sources_path, used_tempdir = (
259 prepare_symbol_info.prepare_symbol_info(
260 self._prefix + '.maps',
261 output_dir_path=self._prefix + '.symmap',
262 alternative_dirs=self._alternative_dirs,
263 use_tempdir=True,
264 use_source_file_name=True))
265 if self._prepared_symbol_data_sources_path:
266 LOGGER.info(' Prepared symbol mapping.')
267 if used_tempdir:
268 LOGGER.warn(' Using a temporary directory for symbol mapping.')
269 LOGGER.warn(' Delete it by yourself.')
270 LOGGER.warn(' Or, move the directory by yourself to use it later.')
271 return True
272 else:
273 LOGGER.warn(' Failed to prepare symbol mapping.')
274 return False
275
276 def get(self):
277 """Returns the prepared symbol data sources.
278
279 Returns:
280 The prepared symbol data sources. None if failed.
281 """
282 if not self._prepared_symbol_data_sources_path and not self.prepare():
283 return None
284 if not self._loaded_symbol_data_sources:
285 LOGGER.info('Loading symbol mapping...')
286 self._loaded_symbol_data_sources = (
287 find_runtime_symbols.RuntimeSymbolsInProcess.load(
288 self._prepared_symbol_data_sources_path))
289 return self._loaded_symbol_data_sources
290
291 def path(self):
292 """Returns the path of the prepared symbol data sources if possible."""
293 if not self._prepared_symbol_data_sources_path and not self.prepare():
294 return None
295 return self._prepared_symbol_data_sources_path
296
297
298 class SymbolFinder(object):
299 """Finds corresponding symbols from addresses.
300
301 This class does only 'find()' symbols from a specified |address_list|.
302 It is introduced to make a finder mockable.
303 """
304 def __init__(self, symbol_type, symbol_data_sources):
305 self._symbol_type = symbol_type
306 self._symbol_data_sources = symbol_data_sources
307
308 def find(self, address_list):
309 return find_runtime_symbols.find_runtime_symbols(
310 self._symbol_type, self._symbol_data_sources.get(), address_list)
311
312
313 class SymbolMappingCache(object):
314 """Caches mapping from actually used addresses to symbols.
315
316 'update()' updates the cache from the original symbol data sources via
317 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.
318 """
319 def __init__(self):
320 self._symbol_mapping_caches = {
321 FUNCTION_SYMBOLS: {},
322 SOURCEFILE_SYMBOLS: {},
323 TYPEINFO_SYMBOLS: {},
324 }
325
326 def update(self, symbol_type, bucket_set, symbol_finder, cache_f):
327 """Updates symbol mapping cache on memory and in a symbol cache file.
328
329 It reads cached symbol mapping from a symbol cache file |cache_f| if it
330 exists. Unresolved addresses are then resolved and added to the cache
331 both on memory and in the symbol cache file with using 'SymbolFinder'.
332
333 A cache file is formatted as follows:
334 <Address> <Symbol>
335 <Address> <Symbol>
336 <Address> <Symbol>
337 ...
338
339 Args:
340 symbol_type: A type of symbols to update. It should be one of
341 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
342 bucket_set: A BucketSet object.
343 symbol_finder: A SymbolFinder object to find symbols.
344 cache_f: A readable and writable IO object of the symbol cache file.
345 """
346 cache_f.seek(0, os.SEEK_SET)
347 self._load(cache_f, symbol_type)
348
349 unresolved_addresses = sorted(
350 address for address in bucket_set.iter_addresses(symbol_type)
351 if address not in self._symbol_mapping_caches[symbol_type])
352
353 if not unresolved_addresses:
354 LOGGER.info('No need to resolve any more addresses.')
355 return
356
357 cache_f.seek(0, os.SEEK_END)
358 LOGGER.info('Loading %d unresolved addresses.' %
359 len(unresolved_addresses))
360 symbol_dict = symbol_finder.find(unresolved_addresses)
361
362 for address, symbol in symbol_dict.iteritems():
363 stripped_symbol = symbol.strip() or '?'
364 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol
365 cache_f.write('%x %s\n' % (address, stripped_symbol))
366
367 def lookup(self, symbol_type, address):
368 """Looks up a symbol for a given |address|.
369
370 Args:
371 symbol_type: A type of symbols to update. It should be one of
372 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
373 address: An integer that represents an address.
374
375 Returns:
376 A string that represents a symbol.
377 """
378 return self._symbol_mapping_caches[symbol_type].get(address)
379
380 def _load(self, cache_f, symbol_type):
381 try:
382 for line in cache_f:
383 items = line.rstrip().split(None, 1)
384 if len(items) == 1:
385 items.append('??')
386 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]
387 LOGGER.info('Loaded %d entries from symbol cache.' %
388 len(self._symbol_mapping_caches[symbol_type]))
389 except IOError as e:
390 LOGGER.info('The symbol cache file is invalid: %s' % e)
391
392
393 class Rule(object):
394 """Represents one matching rule in a policy file."""
395
396 def __init__(self,
397 name,
398 allocator_type,
399 stackfunction_pattern=None,
400 stacksourcefile_pattern=None,
401 typeinfo_pattern=None,
402 mappedpathname_pattern=None,
403 mappedpermission_pattern=None,
404 sharedwith=None):
405 self._name = name
406 self._allocator_type = allocator_type
407
408 self._stackfunction_pattern = None
409 if stackfunction_pattern:
410 self._stackfunction_pattern = re.compile(
411 stackfunction_pattern + r'\Z')
412
413 self._stacksourcefile_pattern = None
414 if stacksourcefile_pattern:
415 self._stacksourcefile_pattern = re.compile(
416 stacksourcefile_pattern + r'\Z')
417
418 self._typeinfo_pattern = None
419 if typeinfo_pattern:
420 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
421
422 self._mappedpathname_pattern = None
423 if mappedpathname_pattern:
424 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')
425
426 self._mappedpermission_pattern = None
427 if mappedpermission_pattern:
428 self._mappedpermission_pattern = re.compile(
429 mappedpermission_pattern + r'\Z')
430
431 self._sharedwith = []
432 if sharedwith:
433 self._sharedwith = sharedwith
434
435 @property
436 def name(self):
437 return self._name
438
439 @property
440 def allocator_type(self):
441 return self._allocator_type
442
443 @property
444 def stackfunction_pattern(self):
445 return self._stackfunction_pattern
446
447 @property
448 def stacksourcefile_pattern(self):
449 return self._stacksourcefile_pattern
450
451 @property
452 def typeinfo_pattern(self):
453 return self._typeinfo_pattern
454
455 @property
456 def mappedpathname_pattern(self):
457 return self._mappedpathname_pattern
458
459 @property
460 def mappedpermission_pattern(self):
461 return self._mappedpermission_pattern
462
463 @property
464 def sharedwith(self):
465 return self._sharedwith
466
467
468 class Policy(object):
469 """Represents a policy, a content of a policy file."""
470
471 def __init__(self, rules, version, components):
472 self._rules = rules
473 self._version = version
474 self._components = components
475
476 @property
477 def rules(self):
478 return self._rules
479
480 @property
481 def version(self):
482 return self._version
483
484 @property
485 def components(self):
486 return self._components
487
488 def find_rule(self, component_name):
489 """Finds a rule whose name is |component_name|. """
490 for rule in self._rules:
491 if rule.name == component_name:
492 return rule
493 return None
494
495 def find_malloc(self, bucket):
496 """Finds a matching component name which a given |bucket| belongs to.
497
498 Args:
499 bucket: A Bucket object to be searched for.
500
501 Returns:
502 A string representing a component name.
503 """
504 assert not bucket or bucket.allocator_type == 'malloc'
505
506 if not bucket:
507 return 'no-bucket'
508 if bucket.component_cache:
509 return bucket.component_cache
510
511 stackfunction = bucket.symbolized_joined_stackfunction
512 stacksourcefile = bucket.symbolized_joined_stacksourcefile
513 typeinfo = bucket.symbolized_typeinfo
514 if typeinfo.startswith('0x'):
515 typeinfo = bucket.typeinfo_name
516
517 for rule in self._rules:
518 if (rule.allocator_type == 'malloc' and
519 (not rule.stackfunction_pattern or
520 rule.stackfunction_pattern.match(stackfunction)) and
521 (not rule.stacksourcefile_pattern or
522 rule.stacksourcefile_pattern.match(stacksourcefile)) and
523 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
524 bucket.component_cache = rule.name
525 return rule.name
526
527 assert False
528
529 def find_mmap(self, region, bucket_set,
530 pageframe=None, group_pfn_counts=None):
531 """Finds a matching component which a given mmap |region| belongs to.
532
533 It uses |bucket_set| to match with backtraces. If |pageframe| is given,
534 it considers memory sharing among processes.
535
536 NOTE: Don't use Bucket's |component_cache| for mmap regions because they're
537 classified not only with bucket information (mappedpathname for example).
538
539 Args:
540 region: A tuple representing a memory region.
541 bucket_set: A BucketSet object to look up backtraces.
542 pageframe: A PageFrame object representing a pageframe maybe including
543 a pagecount.
544 group_pfn_counts: A dict mapping a PFN to the number of times the
545 the pageframe is mapped by the known "group (Chrome)" processes.
546
547 Returns:
548 A string representing a component name.
549 """
550 assert region[0] == 'hooked'
551 bucket = bucket_set.get(region[1]['bucket_id'])
552 assert not bucket or bucket.allocator_type == 'mmap'
553
554 if not bucket:
555 return 'no-bucket', None
556
557 stackfunction = bucket.symbolized_joined_stackfunction
558 stacksourcefile = bucket.symbolized_joined_stacksourcefile
559 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
560
561 for rule in self._rules:
562 if (rule.allocator_type == 'mmap' and
563 (not rule.stackfunction_pattern or
564 rule.stackfunction_pattern.match(stackfunction)) and
565 (not rule.stacksourcefile_pattern or
566 rule.stacksourcefile_pattern.match(stacksourcefile)) and
567 (not rule.mappedpathname_pattern or
568 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
569 (not rule.mappedpermission_pattern or
570 rule.mappedpermission_pattern.match(
571 region[1]['vma']['readable'] +
572 region[1]['vma']['writable'] +
573 region[1]['vma']['executable'] +
574 region[1]['vma']['private'])) and
575 (not rule.sharedwith or
576 not pageframe or sharedwith in rule.sharedwith)):
577 return rule.name, bucket
578
579 assert False
580
581 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):
582 """Finds a matching component which a given unhooked |region| belongs to.
583
584 If |pageframe| is given, it considers memory sharing among processes.
585
586 Args:
587 region: A tuple representing a memory region.
588 pageframe: A PageFrame object representing a pageframe maybe including
589 a pagecount.
590 group_pfn_counts: A dict mapping a PFN to the number of times the
591 the pageframe is mapped by the known "group (Chrome)" processes.
592
593 Returns:
594 A string representing a component name.
595 """
596 assert region[0] == 'unhooked'
597 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
598
599 for rule in self._rules:
600 if (rule.allocator_type == 'unhooked' and
601 (not rule.mappedpathname_pattern or
602 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
603 (not rule.mappedpermission_pattern or
604 rule.mappedpermission_pattern.match(
605 region[1]['vma']['readable'] +
606 region[1]['vma']['writable'] +
607 region[1]['vma']['executable'] +
608 region[1]['vma']['private'])) and
609 (not rule.sharedwith or
610 not pageframe or sharedwith in rule.sharedwith)):
611 return rule.name
612
613 assert False
614
615 @staticmethod
616 def load(filename, filetype):
617 """Loads a policy file of |filename| in a |format|.
618
619 Args:
620 filename: A filename to be loaded.
621 filetype: A string to specify a type of the file. Only 'json' is
622 supported for now.
623
624 Returns:
625 A loaded Policy object.
626 """
627 with open(os.path.join(BASE_PATH, filename)) as policy_f:
628 return Policy.parse(policy_f, filetype)
629
630 @staticmethod
631 def parse(policy_f, filetype):
632 """Parses a policy file content in a |format|.
633
634 Args:
635 policy_f: An IO object to be loaded.
636 filetype: A string to specify a type of the file. Only 'json' is
637 supported for now.
638
639 Returns:
640 A loaded Policy object.
641 """
642 if filetype == 'json':
643 return Policy._parse_json(policy_f)
644 else:
645 return None
646
647 @staticmethod
648 def _parse_json(policy_f):
649 """Parses policy file in json format.
650
651 A policy file contains component's names and their stacktrace pattern
652 written in regular expression. Those patterns are matched against each
653 symbols of each stacktraces in the order written in the policy file
654
655 Args:
656 policy_f: A File/IO object to read.
657
658 Returns:
659 A loaded policy object.
660 """
661 policy = json.load(policy_f)
662
663 rules = []
664 for rule in policy['rules']:
665 stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
666 stacksourcefile = rule.get('stacksourcefile')
667 rules.append(Rule(
668 rule['name'],
669 rule['allocator'], # allocator_type
670 stackfunction,
671 stacksourcefile,
672 rule['typeinfo'] if 'typeinfo' in rule else None,
673 rule.get('mappedpathname'),
674 rule.get('mappedpermission'),
675 rule.get('sharedwith')))
676
677 return Policy(rules, policy['version'], policy['components'])
678
679 @staticmethod
680 def _categorize_pageframe(pageframe, group_pfn_counts):
681 """Categorizes a pageframe based on its sharing status.
682
683 Returns:
684 'private' if |pageframe| is not shared with other processes. 'group'
685 if |pageframe| is shared only with group (Chrome-related) processes.
686 'others' if |pageframe| is shared with non-group processes.
687 """
688 if not pageframe:
689 return 'private'
690
691 if pageframe.pagecount:
692 if pageframe.pagecount == 1:
693 return 'private'
694 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:
695 return 'group'
696 else:
697 return 'others'
698 else:
699 if pageframe.pfn in group_pfn_counts:
700 return 'group'
701 else:
702 return 'private'
703
704
705 class PolicySet(object):
706 """Represents a set of policies."""
707
708 def __init__(self, policy_directory):
709 self._policy_directory = policy_directory
710
711 @staticmethod
712 def load(labels=None):
713 """Loads a set of policies via the "default policy directory".
714
715 The "default policy directory" contains pairs of policies and their labels.
716 For example, a policy "policy.l0.json" is labeled "l0" in the default
717 policy directory "policies.json".
718
719 All policies in the directory are loaded by default. Policies can be
720 limited by |labels|.
721
722 Args:
723 labels: An array that contains policy labels to be loaded.
724
725 Returns:
726 A PolicySet object.
727 """
728 default_policy_directory = PolicySet._load_default_policy_directory()
729 if labels:
730 specified_policy_directory = {}
731 for label in labels:
732 if label in default_policy_directory:
733 specified_policy_directory[label] = default_policy_directory[label]
734 # TODO(dmikurube): Load an un-labeled policy file.
735 return PolicySet._load_policies(specified_policy_directory)
736 else:
737 return PolicySet._load_policies(default_policy_directory)
738
739 def __len__(self):
740 return len(self._policy_directory)
741
742 def __iter__(self):
743 for label in self._policy_directory:
744 yield label
745
746 def __getitem__(self, label):
747 return self._policy_directory[label]
748
749 @staticmethod
750 def _load_default_policy_directory():
751 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
752 default_policy_directory = json.load(policies_f)
753 return default_policy_directory
754
755 @staticmethod
756 def _load_policies(directory):
757 LOGGER.info('Loading policy files.')
758 policies = {}
759 for label in directory:
760 LOGGER.info(' %s: %s' % (label, directory[label]['file']))
761 loaded = Policy.load(directory[label]['file'], directory[label]['format'])
762 if loaded:
763 policies[label] = loaded
764 return PolicySet(policies)
765
766
767 class Bucket(object):
768 """Represents a bucket, which is a unit of memory block classification."""
769
770 def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name):
771 self._stacktrace = stacktrace
772 self._allocator_type = allocator_type
773 self._typeinfo = typeinfo
774 self._typeinfo_name = typeinfo_name
775
776 self._symbolized_stackfunction = stacktrace
777 self._symbolized_joined_stackfunction = ''
778 self._symbolized_stacksourcefile = stacktrace
779 self._symbolized_joined_stacksourcefile = ''
780 self._symbolized_typeinfo = typeinfo_name
781
782 self.component_cache = ''
783
784 def __str__(self):
785 result = []
786 result.append(self._allocator_type)
787 if self._symbolized_typeinfo == 'no typeinfo':
788 result.append('tno_typeinfo')
789 else:
790 result.append('t' + self._symbolized_typeinfo)
791 result.append('n' + self._typeinfo_name)
792 result.extend(['%s(@%s)' % (function, sourcefile)
793 for function, sourcefile
794 in zip(self._symbolized_stackfunction,
795 self._symbolized_stacksourcefile)])
796 return ' '.join(result)
797
798 def symbolize(self, symbol_mapping_cache):
799 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|.
800
801 Args:
802 symbol_mapping_cache: A SymbolMappingCache object.
803 """
804 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
805 self._symbolized_stackfunction = [
806 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address)
807 for address in self._stacktrace]
808 self._symbolized_joined_stackfunction = ' '.join(
809 self._symbolized_stackfunction)
810 self._symbolized_stacksourcefile = [
811 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address)
812 for address in self._stacktrace]
813 self._symbolized_joined_stacksourcefile = ' '.join(
814 self._symbolized_stacksourcefile)
815 if not self._typeinfo:
816 self._symbolized_typeinfo = 'no typeinfo'
817 else:
818 self._symbolized_typeinfo = symbol_mapping_cache.lookup(
819 TYPEINFO_SYMBOLS, self._typeinfo)
820 if not self._symbolized_typeinfo:
821 self._symbolized_typeinfo = 'no typeinfo'
822
823 def clear_component_cache(self):
824 self.component_cache = ''
825
826 @property
827 def stacktrace(self):
828 return self._stacktrace
829
830 @property
831 def allocator_type(self):
832 return self._allocator_type
833
834 @property
835 def typeinfo(self):
836 return self._typeinfo
837
838 @property
839 def typeinfo_name(self):
840 return self._typeinfo_name
841
842 @property
843 def symbolized_stackfunction(self):
844 return self._symbolized_stackfunction
845
846 @property
847 def symbolized_joined_stackfunction(self):
848 return self._symbolized_joined_stackfunction
849
850 @property
851 def symbolized_stacksourcefile(self):
852 return self._symbolized_stacksourcefile
853
854 @property
855 def symbolized_joined_stacksourcefile(self):
856 return self._symbolized_joined_stacksourcefile
857
858 @property
859 def symbolized_typeinfo(self):
860 return self._symbolized_typeinfo
861
862
863 class BucketSet(object):
864 """Represents a set of bucket."""
865 def __init__(self):
866 self._buckets = {}
867 self._code_addresses = set()
868 self._typeinfo_addresses = set()
869
870 def load(self, prefix):
871 """Loads all related bucket files.
872
873 Args:
874 prefix: A prefix string for bucket file names.
875 """
876 LOGGER.info('Loading bucket files.')
877
878 n = 0
879 skipped = 0
880 while True:
881 path = '%s.%04d.buckets' % (prefix, n)
882 if not os.path.exists(path) or not os.stat(path).st_size:
883 if skipped > 10:
884 break
885 n += 1
886 skipped += 1
887 continue
888 LOGGER.info(' %s' % path)
889 with open(path, 'r') as f:
890 self._load_file(f)
891 n += 1
892 skipped = 0
893
894 def _load_file(self, bucket_f):
895 for line in bucket_f:
896 words = line.split()
897 typeinfo = None
898 typeinfo_name = ''
899 stacktrace_begin = 2
900 for index, word in enumerate(words):
901 if index < 2:
902 continue
903 if word[0] == 't':
904 typeinfo = int(word[1:], 16)
905 self._typeinfo_addresses.add(typeinfo)
906 elif word[0] == 'n':
907 typeinfo_name = word[1:]
908 else:
909 stacktrace_begin = index
910 break
911 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]
912 for frame in stacktrace:
913 self._code_addresses.add(frame)
914 self._buckets[int(words[0])] = Bucket(
915 stacktrace, words[1], typeinfo, typeinfo_name)
916
917 def __iter__(self):
918 for bucket_id, bucket_content in self._buckets.iteritems():
919 yield bucket_id, bucket_content
920
921 def __getitem__(self, bucket_id):
922 return self._buckets[bucket_id]
923
924 def get(self, bucket_id):
925 return self._buckets.get(bucket_id)
926
927 def symbolize(self, symbol_mapping_cache):
928 for bucket_content in self._buckets.itervalues():
929 bucket_content.symbolize(symbol_mapping_cache)
930
931 def clear_component_cache(self):
932 for bucket_content in self._buckets.itervalues():
933 bucket_content.clear_component_cache()
934
935 def iter_addresses(self, symbol_type):
936 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]:
937 for function in self._code_addresses:
938 yield function
939 else:
940 for function in self._typeinfo_addresses:
941 yield function
942
943
944 class PageFrame(object):
945 """Represents a pageframe and maybe its shared count."""
946 def __init__(self, pfn, size, pagecount, start_truncated, end_truncated):
947 self._pfn = pfn
948 self._size = size
949 self._pagecount = pagecount
950 self._start_truncated = start_truncated
951 self._end_truncated = end_truncated
952
953 def __str__(self):
954 result = str()
955 if self._start_truncated:
956 result += '<'
957 result += '%06x#%d' % (self._pfn, self._pagecount)
958 if self._end_truncated:
959 result += '>'
960 return result
961
962 def __repr__(self):
963 return str(self)
964
965 @staticmethod
966 def parse(encoded_pfn, size):
967 start = 0
968 end = len(encoded_pfn)
969 end_truncated = False
970 if encoded_pfn.endswith('>'):
971 end = len(encoded_pfn) - 1
972 end_truncated = True
973 pagecount_found = encoded_pfn.find('#')
974 pagecount = None
975 if pagecount_found >= 0:
976 encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end]
977 pagecount = struct.unpack(
978 '>I', '\x00' + encoded_pagecount.decode('base64'))[0]
979 end = pagecount_found
980 start_truncated = False
981 if encoded_pfn.startswith('<'):
982 start = 1
983 start_truncated = True
984
985 pfn = struct.unpack(
986 '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0]
987
988 return PageFrame(pfn, size, pagecount, start_truncated, end_truncated)
989
990 @property
991 def pfn(self):
992 return self._pfn
993
994 @property
995 def size(self):
996 return self._size
997
998 def set_size(self, size):
999 self._size = size
1000
1001 @property
1002 def pagecount(self):
1003 return self._pagecount
1004
1005 @property
1006 def start_truncated(self):
1007 return self._start_truncated
1008
1009 @property
1010 def end_truncated(self):
1011 return self._end_truncated
1012
1013
1014 class PFNCounts(object):
1015 """Represents counts of PFNs in a process."""
1016
1017 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
1018
1019 def __init__(self, path, modified_time):
1020 matched = self._PATH_PATTERN.match(path)
1021 if matched:
1022 self._pid = int(matched.group(2))
1023 else:
1024 self._pid = 0
1025 self._command_line = ''
1026 self._pagesize = 4096
1027 self._path = path
1028 self._pfn_meta = ''
1029 self._pfnset = {}
1030 self._reason = ''
1031 self._time = modified_time
1032
1033 @staticmethod
1034 def load(path, log_header='Loading PFNs from a heap profile dump: '):
1035 pfnset = PFNCounts(path, float(os.stat(path).st_mtime))
1036 LOGGER.info('%s%s' % (log_header, path))
1037
1038 with open(path, 'r') as pfnset_f:
1039 pfnset.load_file(pfnset_f)
1040
1041 return pfnset
1042
1043 @property
1044 def path(self):
1045 return self._path
1046
1047 @property
1048 def pid(self):
1049 return self._pid
1050
1051 @property
1052 def time(self):
1053 return self._time
1054
1055 @property
1056 def reason(self):
1057 return self._reason
1058
1059 @property
1060 def iter_pfn(self):
1061 for pfn, count in self._pfnset.iteritems():
1062 yield pfn, count
1063
1064 def load_file(self, pfnset_f):
1065 prev_pfn_end_truncated = None
1066 for line in pfnset_f:
1067 line = line.strip()
1068 if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'):
1069 break
1070 elif line.startswith('PF: '):
1071 for encoded_pfn in line[3:].split():
1072 page_frame = PageFrame.parse(encoded_pfn, self._pagesize)
1073 if page_frame.start_truncated and (
1074 not prev_pfn_end_truncated or
1075 prev_pfn_end_truncated != page_frame.pfn):
1076 LOGGER.error('Broken page frame number: %s.' % encoded_pfn)
1077 self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1
1078 if page_frame.end_truncated:
1079 prev_pfn_end_truncated = page_frame.pfn
1080 else:
1081 prev_pfn_end_truncated = None
1082 elif line.startswith('PageSize: '):
1083 self._pagesize = int(line[10:])
1084 elif line.startswith('PFN: '):
1085 self._pfn_meta = line[5:]
1086 elif line.startswith('PageFrame: '):
1087 self._pfn_meta = line[11:]
1088 elif line.startswith('Time: '):
1089 self._time = float(line[6:])
1090 elif line.startswith('CommandLine: '):
1091 self._command_line = line[13:]
1092 elif line.startswith('Reason: '):
1093 self._reason = line[8:]
1094
1095
1096 class Dump(object):
1097 """Represents a heap profile dump."""
1098
1099 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
1100
1101 _HOOK_PATTERN = re.compile(
1102 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
1103 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
1104
1105 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
1106 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
1107 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
1108 '(?P<RESERVED>[0-9]+)')
1109
1110 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
1111 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
1112
1113 _TIME_PATTERN_FORMAT = re.compile(
1114 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
1115 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
1116
1117 def __init__(self, path, modified_time):
1118 self._path = path
1119 matched = self._PATH_PATTERN.match(path)
1120 self._pid = int(matched.group(2))
1121 self._count = int(matched.group(3))
1122 self._time = modified_time
1123 self._map = {}
1124 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
1125 self._stacktrace_lines = []
1126 self._global_stats = {} # used only in apply_policy
1127
1128 self._run_id = ''
1129 self._pagesize = 4096
1130 self._pageframe_length = 0
1131 self._pageframe_encoding = ''
1132 self._has_pagecount = False
1133
1134 self._version = ''
1135 self._lines = []
1136
1137 @property
1138 def path(self):
1139 return self._path
1140
1141 @property
1142 def count(self):
1143 return self._count
1144
1145 @property
1146 def time(self):
1147 return self._time
1148
1149 @property
1150 def iter_map(self):
1151 for region in sorted(self._map.iteritems()):
1152 yield region[0], region[1]
1153
1154 def iter_procmaps(self):
1155 for begin, end, attr in self._map.iter_range():
1156 yield begin, end, attr
1157
1158 @property
1159 def iter_stacktrace(self):
1160 for line in self._stacktrace_lines:
1161 yield line
1162
1163 def global_stat(self, name):
1164 return self._global_stats[name]
1165
1166 @property
1167 def run_id(self):
1168 return self._run_id
1169
1170 @property
1171 def pagesize(self):
1172 return self._pagesize
1173
1174 @property
1175 def pageframe_length(self):
1176 return self._pageframe_length
1177
1178 @property
1179 def pageframe_encoding(self):
1180 return self._pageframe_encoding
1181
1182 @property
1183 def has_pagecount(self):
1184 return self._has_pagecount
1185
1186 @staticmethod
1187 def load(path, log_header='Loading a heap profile dump: '):
1188 """Loads a heap profile dump.
1189
1190 Args:
1191 path: A file path string to load.
1192 log_header: A preceding string for log messages.
1193
1194 Returns:
1195 A loaded Dump object.
1196
1197 Raises:
1198 ParsingException for invalid heap profile dumps.
1199 """
1200 dump = Dump(path, os.stat(path).st_mtime)
1201 with open(path, 'r') as f:
1202 dump.load_file(f, log_header)
1203 return dump
1204
1205 def load_file(self, f, log_header):
1206 self._lines = [line for line in f
1207 if line and not line.startswith('#')]
1208
1209 try:
1210 self._version, ln = self._parse_version()
1211 self._parse_meta_information()
1212 if self._version == DUMP_DEEP_6:
1213 self._parse_mmap_list()
1214 self._parse_global_stats()
1215 self._extract_stacktrace_lines(ln)
1216 except EmptyDumpException:
1217 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
1218 except ParsingException, e:
1219 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
1220 raise
1221 else:
1222 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
1223
1224 def _parse_version(self):
1225 """Parses a version string in self._lines.
1226
1227 Returns:
1228 A pair of (a string representing a version of the stacktrace dump,
1229 and an integer indicating a line number next to the version string).
1230
1231 Raises:
1232 ParsingException for invalid dump versions.
1233 """
1234 version = ''
1235
1236 # Skip until an identifiable line.
1237 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
1238 if not self._lines:
1239 raise EmptyDumpException('Empty heap dump file.')
1240 (ln, found) = skip_while(
1241 0, len(self._lines),
1242 lambda n: not self._lines[n].startswith(headers))
1243 if not found:
1244 raise InvalidDumpException('No version header.')
1245
1246 # Identify a version.
1247 if self._lines[ln].startswith('heap profile: '):
1248 version = self._lines[ln][13:].strip()
1249 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
1250 (ln, _) = skip_while(
1251 ln, len(self._lines),
1252 lambda n: self._lines[n] != 'STACKTRACES:\n')
1253 elif version in DUMP_DEEP_OBSOLETE:
1254 raise ObsoleteDumpVersionException(version)
1255 else:
1256 raise InvalidDumpException('Invalid version: %s' % version)
1257 elif self._lines[ln] == 'STACKTRACES:\n':
1258 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
1259 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
1260 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
1261
1262 return (version, ln)
1263
1264 def _parse_global_stats(self):
1265 """Parses lines in self._lines as global stats."""
1266 (ln, _) = skip_while(
1267 0, len(self._lines),
1268 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
1269
1270 global_stat_names = [
1271 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
1272 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
1273 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
1274 'nonprofiled-stack', 'nonprofiled-other',
1275 'profiled-mmap', 'profiled-malloc']
1276
1277 for prefix in global_stat_names:
1278 (ln, _) = skip_while(
1279 ln, len(self._lines),
1280 lambda n: self._lines[n].split()[0] != prefix)
1281 words = self._lines[ln].split()
1282 self._global_stats[prefix + '_virtual'] = int(words[-2])
1283 self._global_stats[prefix + '_committed'] = int(words[-1])
1284
1285 def _parse_meta_information(self):
1286 """Parses lines in self._lines for meta information."""
1287 (ln, found) = skip_while(
1288 0, len(self._lines),
1289 lambda n: self._lines[n] != 'META:\n')
1290 if not found:
1291 return
1292 ln += 1
1293
1294 while True:
1295 if self._lines[ln].startswith('Time:'):
1296 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
1297 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
1298 if matched_format:
1299 self._time = time.mktime(datetime.datetime.strptime(
1300 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
1301 if matched_format.group(2):
1302 self._time += float(matched_format.group(2)[1:]) / 1000.0
1303 elif matched_seconds:
1304 self._time = float(matched_seconds.group(1))
1305 elif self._lines[ln].startswith('Reason:'):
1306 pass # Nothing to do for 'Reason:'
1307 elif self._lines[ln].startswith('PageSize: '):
1308 self._pagesize = int(self._lines[ln][10:])
1309 elif self._lines[ln].startswith('CommandLine:'):
1310 pass
1311 elif (self._lines[ln].startswith('PageFrame: ') or
1312 self._lines[ln].startswith('PFN: ')):
1313 if self._lines[ln].startswith('PageFrame: '):
1314 words = self._lines[ln][11:].split(',')
1315 else:
1316 words = self._lines[ln][5:].split(',')
1317 for word in words:
1318 if word == '24':
1319 self._pageframe_length = 24
1320 elif word == 'Base64':
1321 self._pageframe_encoding = 'base64'
1322 elif word == 'PageCount':
1323 self._has_pagecount = True
1324 elif self._lines[ln].startswith('RunID: '):
1325 self._run_id = self._lines[ln][7:].strip()
1326 elif (self._lines[ln].startswith('MMAP_LIST:') or
1327 self._lines[ln].startswith('GLOBAL_STATS:')):
1328 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
1329 break
1330 else:
1331 pass
1332 ln += 1
1333
1334 def _parse_mmap_list(self):
1335 """Parses lines in self._lines as a mmap list."""
1336 (ln, found) = skip_while(
1337 0, len(self._lines),
1338 lambda n: self._lines[n] != 'MMAP_LIST:\n')
1339 if not found:
1340 return {}
1341
1342 ln += 1
1343 self._map = {}
1344 current_vma = {}
1345 pageframe_list = []
1346 while True:
1347 entry = proc_maps.ProcMaps.parse_line(self._lines[ln])
1348 if entry:
1349 current_vma = {}
1350 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
1351 for key, value in entry.as_dict().iteritems():
1352 attr[key] = value
1353 current_vma[key] = value
1354 ln += 1
1355 continue
1356
1357 if self._lines[ln].startswith(' PF: '):
1358 for pageframe in self._lines[ln][5:].split():
1359 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
1360 ln += 1
1361 continue
1362
1363 matched = self._HOOK_PATTERN.match(self._lines[ln])
1364 if not matched:
1365 break
1366 # 2: starting address
1367 # 5: end address
1368 # 7: hooked or unhooked
1369 # 8: additional information
1370 if matched.group(7) == 'hooked':
1371 submatched = self._HOOKED_PATTERN.match(matched.group(8))
1372 if not submatched:
1373 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
1374 elif matched.group(7) == 'unhooked':
1375 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
1376 if not submatched:
1377 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
1378 else:
1379 assert matched.group(7) in ['hooked', 'unhooked']
1380
1381 submatched_dict = submatched.groupdict()
1382 region_info = { 'vma': current_vma }
1383 if submatched_dict.get('TYPE'):
1384 region_info['type'] = submatched_dict['TYPE'].strip()
1385 if submatched_dict.get('COMMITTED'):
1386 region_info['committed'] = int(submatched_dict['COMMITTED'])
1387 if submatched_dict.get('RESERVED'):
1388 region_info['reserved'] = int(submatched_dict['RESERVED'])
1389 if submatched_dict.get('BUCKETID'):
1390 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
1391
1392 if matched.group(1) == '(':
1393 start = current_vma['begin']
1394 else:
1395 start = int(matched.group(2), 16)
1396 if matched.group(4) == '(':
1397 end = current_vma['end']
1398 else:
1399 end = int(matched.group(5), 16)
1400
1401 if pageframe_list and pageframe_list[0].start_truncated:
1402 pageframe_list[0].set_size(
1403 pageframe_list[0].size - start % self._pagesize)
1404 if pageframe_list and pageframe_list[-1].end_truncated:
1405 pageframe_list[-1].set_size(
1406 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
1407 region_info['pageframe'] = pageframe_list
1408 pageframe_list = []
1409
1410 self._map[(start, end)] = (matched.group(7), region_info)
1411 ln += 1
1412
1413 def _extract_stacktrace_lines(self, line_number):
1414 """Extracts the position of stacktrace lines.
1415
1416 Valid stacktrace lines are stored into self._stacktrace_lines.
1417
1418 Args:
1419 line_number: A line number to start parsing in lines.
1420
1421 Raises:
1422 ParsingException for invalid dump versions.
1423 """
1424 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
1425 (line_number, _) = skip_while(
1426 line_number, len(self._lines),
1427 lambda n: not self._lines[n].split()[0].isdigit())
1428 stacktrace_start = line_number
1429 (line_number, _) = skip_while(
1430 line_number, len(self._lines),
1431 lambda n: self._check_stacktrace_line(self._lines[n]))
1432 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
1433
1434 elif self._version in DUMP_DEEP_OBSOLETE:
1435 raise ObsoleteDumpVersionException(self._version)
1436
1437 else:
1438 raise InvalidDumpException('Invalid version: %s' % self._version)
1439
1440 @staticmethod
1441 def _check_stacktrace_line(stacktrace_line):
1442 """Checks if a given stacktrace_line is valid as stacktrace.
1443
1444 Args:
1445 stacktrace_line: A string to be checked.
1446
1447 Returns:
1448 True if the given stacktrace_line is valid.
1449 """
1450 words = stacktrace_line.split()
1451 if len(words) < BUCKET_ID + 1:
1452 return False
1453 if words[BUCKET_ID - 1] != '@':
1454 return False
1455 return True
1456
1457
1458 class DumpList(object):
1459 """Represents a sequence of heap profile dumps."""
1460
1461 def __init__(self, dump_list):
1462 self._dump_list = dump_list
1463
1464 @staticmethod
1465 def load(path_list):
1466 LOGGER.info('Loading heap dump profiles.')
1467 dump_list = []
1468 for path in path_list:
1469 dump_list.append(Dump.load(path, ' '))
1470 return DumpList(dump_list)
1471
1472 def __len__(self):
1473 return len(self._dump_list)
1474
1475 def __iter__(self):
1476 for dump in self._dump_list:
1477 yield dump
1478
1479 def __getitem__(self, index):
1480 return self._dump_list[index]
1481
1482
1483 class Unit(object):
1484 """Represents a minimum unit of memory usage categorization.
1485
1486 It is supposed to be inherited for some different spaces like the entire
1487 virtual memory and malloc arena. Such different spaces are called "worlds"
1488 in dmprof. (For example, the "vm" world and the "malloc" world.)
1489 """
1490 def __init__(self, unit_id, size):
1491 self._unit_id = unit_id
1492 self._size = size
1493
1494 @property
1495 def unit_id(self):
1496 return self._unit_id
1497
1498 @property
1499 def size(self):
1500 return self._size
1501
1502
1503 class VMUnit(Unit):
1504 """Represents a Unit for a memory region on virtual memory."""
1505 def __init__(self, unit_id, committed, reserved, mmap, region,
1506 pageframe=None, group_pfn_counts=None):
1507 super(VMUnit, self).__init__(unit_id, committed)
1508 self._reserved = reserved
1509 self._mmap = mmap
1510 self._region = region
1511 self._pageframe = pageframe
1512 self._group_pfn_counts = group_pfn_counts
1513
1514 @property
1515 def committed(self):
1516 return self._size
1517
1518 @property
1519 def reserved(self):
1520 return self._reserved
1521
1522 @property
1523 def mmap(self):
1524 return self._mmap
1525
1526 @property
1527 def region(self):
1528 return self._region
1529
1530 @property
1531 def pageframe(self):
1532 return self._pageframe
1533
1534 @property
1535 def group_pfn_counts(self):
1536 return self._group_pfn_counts
1537
1538
1539 class MMapUnit(VMUnit):
1540 """Represents a Unit for a mmap'ed region."""
1541 def __init__(self, unit_id, committed, reserved, region, bucket_set,
1542 pageframe=None, group_pfn_counts=None):
1543 super(MMapUnit, self).__init__(unit_id, committed, reserved, True,
1544 region, pageframe, group_pfn_counts)
1545 self._bucket_set = bucket_set
1546
1547 def __repr__(self):
1548 return str(self.region)
1549
1550 @property
1551 def bucket_set(self):
1552 return self._bucket_set
1553
1554
1555 class UnhookedUnit(VMUnit):
1556 """Represents a Unit for a non-mmap'ed memory region on virtual memory."""
1557 def __init__(self, unit_id, committed, reserved, region,
1558 pageframe=None, group_pfn_counts=None):
1559 super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False,
1560 region, pageframe, group_pfn_counts)
1561
1562 def __repr__(self):
1563 return str(self.region)
1564
1565
1566 class MallocUnit(Unit):
1567 """Represents a Unit for a malloc'ed memory block."""
1568 def __init__(self, unit_id, size, alloc_count, free_count, bucket):
1569 super(MallocUnit, self).__init__(unit_id, size)
1570 self._bucket = bucket
1571 self._alloc_count = alloc_count
1572 self._free_count = free_count
1573
1574 def __repr__(self):
1575 return str(self.bucket)
1576
1577 @property
1578 def bucket(self):
1579 return self._bucket
1580
1581 @property
1582 def alloc_count(self):
1583 return self._alloc_count
1584
1585 @property
1586 def free_count(self):
1587 return self._free_count
1588
1589
1590 class UnitSet(object):
1591 """Represents an iterable set of Units."""
1592 def __init__(self, world):
1593 self._units = {}
1594 self._world = world
1595
1596 def __repr__(self):
1597 return str(self._units)
1598
1599 def __iter__(self):
1600 for unit_id in sorted(self._units):
1601 yield self._units[unit_id]
1602
1603 def append(self, unit, overwrite=False):
1604 if not overwrite and unit.unit_id in self._units:
1605 LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id))
1606 self._units[unit.unit_id] = unit
1607
1608
1609 class AbstractRule(object):
1610 """An abstract class for rules to be matched with units."""
1611 def __init__(self, dct):
1612 self._name = dct['name']
1613 self._hidden = dct.get('hidden', False)
1614 self._subworlds = dct.get('subworlds', [])
1615
1616 def match(self, unit):
1617 raise NotImplementedError()
1618
1619 @property
1620 def name(self):
1621 return self._name
1622
1623 @property
1624 def hidden(self):
1625 return self._hidden
1626
1627 def iter_subworld(self):
1628 for subworld in self._subworlds:
1629 yield subworld
1630
1631
1632 class VMRule(AbstractRule):
1633 """Represents a Rule to match with virtual memory regions."""
1634 def __init__(self, dct):
1635 super(VMRule, self).__init__(dct)
1636 self._backtrace_function = dct.get('backtrace_function', None)
1637 if self._backtrace_function:
1638 self._backtrace_function = re.compile(self._backtrace_function)
1639 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)
1640 if self._backtrace_sourcefile:
1641 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)
1642 self._mmap = dct.get('mmap', None)
1643 self._sharedwith = dct.get('sharedwith', [])
1644 self._mapped_pathname = dct.get('mapped_pathname', None)
1645 if self._mapped_pathname:
1646 self._mapped_pathname = re.compile(self._mapped_pathname)
1647 self._mapped_permission = dct.get('mapped_permission', None)
1648 if self._mapped_permission:
1649 self._mapped_permission = re.compile(self._mapped_permission)
1650
1651 def __repr__(self):
1652 result = cStringIO.StringIO()
1653 result.write('{"%s"=>' % self._name)
1654 attributes = []
1655 attributes.append('mmap: %s' % self._mmap)
1656 if self._backtrace_function:
1657 attributes.append('backtrace_function: "%s"' %
1658 self._backtrace_function.pattern)
1659 if self._sharedwith:
1660 attributes.append('sharedwith: "%s"' % self._sharedwith)
1661 if self._mapped_pathname:
1662 attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern)
1663 if self._mapped_permission:
1664 attributes.append('mapped_permission: "%s"' %
1665 self._mapped_permission.pattern)
1666 result.write('%s}' % ', '.join(attributes))
1667 return result.getvalue()
1668
1669 def match(self, unit):
1670 if unit.mmap:
1671 assert unit.region[0] == 'hooked'
1672 bucket = unit.bucket_set.get(unit.region[1]['bucket_id'])
1673 assert bucket
1674 assert bucket.allocator_type == 'mmap'
1675
1676 stackfunction = bucket.symbolized_joined_stackfunction
1677 stacksourcefile = bucket.symbolized_joined_stacksourcefile
1678
1679 # TODO(dmikurube): Support shared memory.
1680 sharedwith = None
1681
1682 if self._mmap == False: # (self._mmap == None) should go through.
1683 return False
1684 if (self._backtrace_function and
1685 not self._backtrace_function.match(stackfunction)):
1686 return False
1687 if (self._backtrace_sourcefile and
1688 not self._backtrace_sourcefile.match(stacksourcefile)):
1689 return False
1690 if (self._mapped_pathname and
1691 not self._mapped_pathname.match(unit.region[1]['vma']['name'])):
1692 return False
1693 if (self._mapped_permission and
1694 not self._mapped_permission.match(
1695 unit.region[1]['vma']['readable'] +
1696 unit.region[1]['vma']['writable'] +
1697 unit.region[1]['vma']['executable'] +
1698 unit.region[1]['vma']['private'])):
1699 return False
1700 if (self._sharedwith and
1701 unit.pageframe and sharedwith not in self._sharedwith):
1702 return False
1703
1704 return True
1705
1706 else:
1707 assert unit.region[0] == 'unhooked'
1708
1709 # TODO(dmikurube): Support shared memory.
1710 sharedwith = None
1711
1712 if self._mmap == True: # (self._mmap == None) should go through.
1713 return False
1714 if (self._mapped_pathname and
1715 not self._mapped_pathname.match(unit.region[1]['vma']['name'])):
1716 return False
1717 if (self._mapped_permission and
1718 not self._mapped_permission.match(
1719 unit.region[1]['vma']['readable'] +
1720 unit.region[1]['vma']['writable'] +
1721 unit.region[1]['vma']['executable'] +
1722 unit.region[1]['vma']['private'])):
1723 return False
1724 if (self._sharedwith and
1725 unit.pageframe and sharedwith not in self._sharedwith):
1726 return False
1727
1728 return True
1729
1730
1731 class MallocRule(AbstractRule):
1732 """Represents a Rule to match with malloc'ed blocks."""
1733 def __init__(self, dct):
1734 super(MallocRule, self).__init__(dct)
1735 self._backtrace_function = dct.get('backtrace_function', None)
1736 if self._backtrace_function:
1737 self._backtrace_function = re.compile(self._backtrace_function)
1738 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)
1739 if self._backtrace_sourcefile:
1740 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)
1741 self._typeinfo = dct.get('typeinfo', None)
1742 if self._typeinfo:
1743 self._typeinfo = re.compile(self._typeinfo)
1744
1745 def __repr__(self):
1746 result = cStringIO.StringIO()
1747 result.write('{"%s"=>' % self._name)
1748 attributes = []
1749 if self._backtrace_function:
1750 attributes.append('backtrace_function: "%s"' % self._backtrace_function)
1751 if self._typeinfo:
1752 attributes.append('typeinfo: "%s"' % self._typeinfo)
1753 result.write('%s}' % ', '.join(attributes))
1754 return result.getvalue()
1755
1756 def match(self, unit):
1757 assert unit.bucket.allocator_type == 'malloc'
1758
1759 stackfunction = unit.bucket.symbolized_joined_stackfunction
1760 stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile
1761 typeinfo = unit.bucket.symbolized_typeinfo
1762 if typeinfo.startswith('0x'):
1763 typeinfo = unit.bucket.typeinfo_name
1764
1765 return ((not self._backtrace_function or
1766 self._backtrace_function.match(stackfunction)) and
1767 (not self._backtrace_sourcefile or
1768 self._backtrace_sourcefile.match(stacksourcefile)) and
1769 (not self._typeinfo or self._typeinfo.match(typeinfo)))
1770
1771
1772 class NoBucketMallocRule(MallocRule):
1773 """Represents a Rule that small ignorable units match with."""
1774 def __init__(self):
1775 super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'})
1776 self._no_bucket = True
1777
1778 @property
1779 def no_bucket(self):
1780 return self._no_bucket
1781
1782
1783 class AbstractSorter(object):
1784 """An abstract class for classifying Units with a set of Rules."""
1785 def __init__(self, dct):
1786 self._type = 'sorter'
1787 self._version = dct['version']
1788 self._world = dct['world']
1789 self._name = dct['name']
1790 self._order = dct['order']
1791
1792 self._rules = []
1793 for rule in dct['rules']:
1794 if dct['world'] == 'vm':
1795 self._rules.append(VMRule(rule))
1796 elif dct['world'] == 'malloc':
1797 self._rules.append(MallocRule(rule))
1798 else:
1799 LOGGER.error('Unknown sorter world type')
1800
1801 def __repr__(self):
1802 result = cStringIO.StringIO()
1803 result.write('world=%s' % self._world)
1804 result.write('order=%s' % self._order)
1805 result.write('rules:')
1806 for rule in self._rules:
1807 result.write(' %s' % rule)
1808 return result.getvalue()
1809
1810 @staticmethod
1811 def load(filename):
1812 with open(filename) as sorter_f:
1813 sorter_dict = json.load(sorter_f)
1814 if sorter_dict['world'] == 'vm':
1815 return VMSorter(sorter_dict)
1816 elif sorter_dict['world'] == 'malloc':
1817 return MallocSorter(sorter_dict)
1818 else:
1819 LOGGER.error('Unknown sorter world type')
1820 return None
1821
1822 @property
1823 def world(self):
1824 return self._world
1825
1826 @property
1827 def name(self):
1828 return self._name
1829
1830 def find(self, unit):
1831 raise NotImplementedError()
1832
1833 def find_rule(self, name):
1834 """Finds a rule whose name is |name|. """
1835 for rule in self._rules:
1836 if rule.name == name:
1837 return rule
1838 return None
1839
1840
1841 class VMSorter(AbstractSorter):
1842 """Represents a Sorter for memory regions on virtual memory."""
1843 def __init__(self, dct):
1844 assert dct['world'] == 'vm'
1845 super(VMSorter, self).__init__(dct)
1846
1847 def find(self, unit):
1848 for rule in self._rules:
1849 if rule.match(unit):
1850 return rule
1851 assert False
1852
1853
1854 class MallocSorter(AbstractSorter):
1855 """Represents a Sorter for malloc'ed blocks."""
1856 def __init__(self, dct):
1857 assert dct['world'] == 'malloc'
1858 super(MallocSorter, self).__init__(dct)
1859 self._no_bucket_rule = NoBucketMallocRule()
1860
1861 def find(self, unit):
1862 if not unit.bucket:
1863 return self._no_bucket_rule
1864 assert unit.bucket.allocator_type == 'malloc'
1865
1866 if unit.bucket.component_cache:
1867 return unit.bucket.component_cache
1868
1869 for rule in self._rules:
1870 if rule.match(unit):
1871 unit.bucket.component_cache = rule
1872 return rule
1873 assert False
1874
1875
1876 class SorterSet(object):
1877 """Represents an iterable set of Sorters."""
1878 def __init__(self, additional=None, default=None):
1879 if not additional:
1880 additional = []
1881 if not default:
1882 default = DEFAULT_SORTERS
1883 self._sorters = {}
1884 for filename in default + additional:
1885 sorter = AbstractSorter.load(filename)
1886 if sorter.world not in self._sorters:
1887 self._sorters[sorter.world] = []
1888 self._sorters[sorter.world].append(sorter)
1889
1890 def __repr__(self):
1891 result = cStringIO.StringIO()
1892 result.write(self._sorters)
1893 return result.getvalue()
1894
1895 def __iter__(self):
1896 for sorters in self._sorters.itervalues():
1897 for sorter in sorters:
1898 yield sorter
1899
1900 def iter_world(self, world):
1901 for sorter in self._sorters.get(world, []):
1902 yield sorter
1903
1904
1905 class Command(object):
1906 """Subclasses are a subcommand for this executable.
1907
1908 See COMMANDS in main().
1909 """
1910 _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp']
1911
1912 def __init__(self, usage):
1913 self._parser = optparse.OptionParser(usage)
1914
1915 @staticmethod
1916 def load_basic_files(
1917 dump_path, multiple, no_dump=False, alternative_dirs=None):
1918 prefix = Command._find_prefix(dump_path)
1919 # If the target process is estimated to be working on Android, converts
1920 # a path in the Android device to a path estimated to be corresponding in
1921 # the host. Use --alternative-dirs to specify the conversion manually.
1922 if not alternative_dirs:
1923 alternative_dirs = Command._estimate_alternative_dirs(prefix)
1924 if alternative_dirs:
1925 for device, host in alternative_dirs.iteritems():
1926 LOGGER.info('Assuming %s on device as %s on host' % (device, host))
1927 symbol_data_sources = SymbolDataSources(prefix, alternative_dirs)
1928 symbol_data_sources.prepare()
1929 bucket_set = BucketSet()
1930 bucket_set.load(prefix)
1931 if not no_dump:
1932 if multiple:
1933 dump_list = DumpList.load(Command._find_all_dumps(dump_path))
1934 else:
1935 dump = Dump.load(dump_path)
1936 symbol_mapping_cache = SymbolMappingCache()
1937 with open(prefix + '.cache.function', 'a+') as cache_f:
1938 symbol_mapping_cache.update(
1939 FUNCTION_SYMBOLS, bucket_set,
1940 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f)
1941 with open(prefix + '.cache.typeinfo', 'a+') as cache_f:
1942 symbol_mapping_cache.update(
1943 TYPEINFO_SYMBOLS, bucket_set,
1944 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f)
1945 with open(prefix + '.cache.sourcefile', 'a+') as cache_f:
1946 symbol_mapping_cache.update(
1947 SOURCEFILE_SYMBOLS, bucket_set,
1948 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f)
1949 bucket_set.symbolize(symbol_mapping_cache)
1950 if no_dump:
1951 return bucket_set
1952 elif multiple:
1953 return (bucket_set, dump_list)
1954 else:
1955 return (bucket_set, dump)
1956
1957 @staticmethod
1958 def _find_prefix(path):
1959 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
1960
1961 @staticmethod
1962 def _estimate_alternative_dirs(prefix):
1963 """Estimates a path in host from a corresponding path in target device.
1964
1965 For Android, dmprof.py should find symbol information from binaries in
1966 the host instead of the Android device because dmprof.py doesn't run on
1967 the Android device. This method estimates a path in the host
1968 corresponding to a path in the Android device.
1969
1970 Returns:
1971 A dict that maps a path in the Android device to a path in the host.
1972 If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it
1973 assumes the process was running on Android and maps the path to
1974 "out/Debug/lib" in the Chromium directory. An empty dict is returned
1975 unless Android.
1976 """
1977 device_lib_path_candidates = set()
1978
1979 with open(prefix + '.maps') as maps_f:
1980 maps = proc_maps.ProcMaps.load(maps_f)
1981 for entry in maps:
1982 name = entry.as_dict()['name']
1983 if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]):
1984 device_lib_path_candidates.add(os.path.dirname(name))
1985
1986 if len(device_lib_path_candidates) == 1:
1987 return {device_lib_path_candidates.pop(): os.path.join(
1988 CHROME_SRC_PATH, 'out', 'Debug', 'lib')}
1989 else:
1990 return {}
1991
1992 @staticmethod
1993 def _find_all_dumps(dump_path):
1994 prefix = Command._find_prefix(dump_path)
1995 dump_path_list = [dump_path]
1996
1997 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
1998 n += 1
1999 skipped = 0
2000 while True:
2001 p = '%s.%04d.heap' % (prefix, n)
2002 if os.path.exists(p) and os.stat(p).st_size:
2003 dump_path_list.append(p)
2004 else:
2005 if skipped > 10:
2006 break
2007 skipped += 1
2008 n += 1
2009
2010 return dump_path_list
2011
2012 @staticmethod
2013 def _find_all_buckets(dump_path):
2014 prefix = Command._find_prefix(dump_path)
2015 bucket_path_list = []
2016
2017 n = 0
2018 while True:
2019 path = '%s.%04d.buckets' % (prefix, n)
2020 if not os.path.exists(path):
2021 if n > 10:
2022 break
2023 n += 1
2024 continue
2025 bucket_path_list.append(path)
2026 n += 1
2027
2028 return bucket_path_list
2029
2030 def _parse_args(self, sys_argv, required):
2031 options, args = self._parser.parse_args(sys_argv)
2032 if len(args) < required + 1:
2033 self._parser.error('needs %d argument(s).\n' % required)
2034 return None
2035 return (options, args)
2036
2037 @staticmethod
2038 def _parse_policy_list(options_policy):
2039 if options_policy:
2040 return options_policy.split(',')
2041 else:
2042 return None
2043
2044
2045 class BucketsCommand(Command):
2046 def __init__(self):
2047 super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>')
2048
2049 def do(self, sys_argv, out=sys.stdout):
2050 _, args = self._parse_args(sys_argv, 1)
2051 dump_path = args[1]
2052 bucket_set = Command.load_basic_files(dump_path, True, True)
2053
2054 BucketsCommand._output(bucket_set, out)
2055 return 0
2056
2057 @staticmethod
2058 def _output(bucket_set, out):
2059 """Prints all buckets with resolving symbols.
2060
2061 Args:
2062 bucket_set: A BucketSet object.
2063 out: An IO object to output.
2064 """
2065 for bucket_id, bucket in sorted(bucket_set):
2066 out.write('%d: %s\n' % (bucket_id, bucket))
2067
2068
2069 class StacktraceCommand(Command):
2070 def __init__(self):
2071 super(StacktraceCommand, self).__init__(
2072 'Usage: %prog stacktrace <dump>')
2073
2074 def do(self, sys_argv):
2075 _, args = self._parse_args(sys_argv, 1)
2076 dump_path = args[1]
2077 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
2078
2079 StacktraceCommand._output(dump, bucket_set, sys.stdout)
2080 return 0
2081
2082 @staticmethod
2083 def _output(dump, bucket_set, out):
2084 """Outputs a given stacktrace.
2085
2086 Args:
2087 bucket_set: A BucketSet object.
2088 out: A file object to output.
2089 """
2090 for line in dump.iter_stacktrace:
2091 words = line.split()
2092 bucket = bucket_set.get(int(words[BUCKET_ID]))
2093 if not bucket:
2094 continue
2095 for i in range(0, BUCKET_ID - 1):
2096 out.write(words[i] + ' ')
2097 for frame in bucket.symbolized_stackfunction:
2098 out.write(frame + ' ')
2099 out.write('\n')
2100
2101
2102 class PolicyCommands(Command):
2103 def __init__(self, command):
2104 super(PolicyCommands, self).__init__(
2105 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %
2106 command)
2107 self._parser.add_option('-p', '--policy', type='string', dest='policy',
2108 help='profile with POLICY', metavar='POLICY')
2109 self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
2110 metavar='/path/on/target@/path/on/host[:...]',
2111 help='Read files in /path/on/host/ instead of '
2112 'files in /path/on/target/.')
2113
2114 def _set_up(self, sys_argv):
2115 options, args = self._parse_args(sys_argv, 1)
2116 dump_path = args[1]
2117 shared_first_dump_paths = args[2:]
2118 alternative_dirs_dict = {}
2119 if options.alternative_dirs:
2120 for alternative_dir_pair in options.alternative_dirs.split(':'):
2121 target_path, host_path = alternative_dir_pair.split('@', 1)
2122 alternative_dirs_dict[target_path] = host_path
2123 (bucket_set, dumps) = Command.load_basic_files(
2124 dump_path, True, alternative_dirs=alternative_dirs_dict)
2125
2126 pfn_counts_dict = {}
2127 for shared_first_dump_path in shared_first_dump_paths:
2128 shared_dumps = Command._find_all_dumps(shared_first_dump_path)
2129 for shared_dump in shared_dumps:
2130 pfn_counts = PFNCounts.load(shared_dump)
2131 if pfn_counts.pid not in pfn_counts_dict:
2132 pfn_counts_dict[pfn_counts.pid] = []
2133 pfn_counts_dict[pfn_counts.pid].append(pfn_counts)
2134
2135 policy_set = PolicySet.load(Command._parse_policy_list(options.policy))
2136 return policy_set, dumps, pfn_counts_dict, bucket_set
2137
2138 @staticmethod
2139 def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time):
2140 """Aggregates the total memory size of each component.
2141
2142 Iterate through all stacktraces and attribute them to one of the components
2143 based on the policy. It is important to apply policy in right order.
2144
2145 Args:
2146 dump: A Dump object.
2147 pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.
2148 policy: A Policy object.
2149 bucket_set: A BucketSet object.
2150 first_dump_time: An integer representing time when the first dump is
2151 dumped.
2152
2153 Returns:
2154 A dict mapping components and their corresponding sizes.
2155 """
2156 LOGGER.info(' %s' % dump.path)
2157 all_pfn_dict = {}
2158 if pfn_counts_dict:
2159 LOGGER.info(' shared with...')
2160 for pid, pfnset_list in pfn_counts_dict.iteritems():
2161 closest_pfnset_index = None
2162 closest_pfnset_difference = 1024.0
2163 for index, pfnset in enumerate(pfnset_list):
2164 time_difference = pfnset.time - dump.time
2165 if time_difference >= 3.0:
2166 break
2167 elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or
2168 (0.0 <= time_difference and time_difference < 3.0)):
2169 closest_pfnset_index = index
2170 closest_pfnset_difference = time_difference
2171 elif time_difference < 0.0 and pfnset.reason == 'Exiting':
2172 closest_pfnset_index = None
2173 break
2174 if closest_pfnset_index:
2175 for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:
2176 all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count
2177 LOGGER.info(' %s (time difference = %f)' %
2178 (pfnset_list[closest_pfnset_index].path,
2179 closest_pfnset_difference))
2180 else:
2181 LOGGER.info(' (no match with pid:%d)' % pid)
2182
2183 sizes = dict((c, 0) for c in policy.components)
2184
2185 PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)
2186 verify_global_stats = PolicyCommands._accumulate_maps(
2187 dump, all_pfn_dict, policy, bucket_set, sizes)
2188
2189 # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.
2190 # http://crbug.com/245603.
2191 for verify_key, verify_value in verify_global_stats.iteritems():
2192 dump_value = dump.global_stat('%s_committed' % verify_key)
2193 if dump_value != verify_value:
2194 LOGGER.warn('%25s: %12d != %d (%d)' % (
2195 verify_key, dump_value, verify_value, dump_value - verify_value))
2196
2197 sizes['mmap-no-log'] = (
2198 dump.global_stat('profiled-mmap_committed') -
2199 sizes['mmap-total-log'])
2200 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
2201 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
2202
2203 sizes['tc-no-log'] = (
2204 dump.global_stat('profiled-malloc_committed') -
2205 sizes['tc-total-log'])
2206 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
2207 sizes['tc-unused'] = (
2208 sizes['mmap-tcmalloc'] -
2209 dump.global_stat('profiled-malloc_committed'))
2210 if sizes['tc-unused'] < 0:
2211 LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' %
2212 sizes['tc-unused'])
2213 sizes['tc-unused'] = 0
2214 sizes['tc-total'] = sizes['mmap-tcmalloc']
2215
2216 # TODO(dmikurube): global_stat will be deprecated.
2217 # See http://crbug.com/245603.
2218 for key, value in {
2219 'total': 'total_committed',
2220 'filemapped': 'file_committed',
2221 'absent': 'absent_committed',
2222 'file-exec': 'file-exec_committed',
2223 'file-nonexec': 'file-nonexec_committed',
2224 'anonymous': 'anonymous_committed',
2225 'stack': 'stack_committed',
2226 'other': 'other_committed',
2227 'unhooked-absent': 'nonprofiled-absent_committed',
2228 'total-vm': 'total_virtual',
2229 'filemapped-vm': 'file_virtual',
2230 'anonymous-vm': 'anonymous_virtual',
2231 'other-vm': 'other_virtual' }.iteritems():
2232 if key in sizes:
2233 sizes[key] = dump.global_stat(value)
2234
2235 if 'mustbezero' in sizes:
2236 removed_list = (
2237 'profiled-mmap_committed',
2238 'nonprofiled-absent_committed',
2239 'nonprofiled-anonymous_committed',
2240 'nonprofiled-file-exec_committed',
2241 'nonprofiled-file-nonexec_committed',
2242 'nonprofiled-stack_committed',
2243 'nonprofiled-other_committed')
2244 sizes['mustbezero'] = (
2245 dump.global_stat('total_committed') -
2246 sum(dump.global_stat(removed) for removed in removed_list))
2247 if 'total-exclude-profiler' in sizes:
2248 sizes['total-exclude-profiler'] = (
2249 dump.global_stat('total_committed') -
2250 (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
2251 if 'hour' in sizes:
2252 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
2253 if 'minute' in sizes:
2254 sizes['minute'] = (dump.time - first_dump_time) / 60.0
2255 if 'second' in sizes:
2256 sizes['second'] = dump.time - first_dump_time
2257
2258 return sizes
2259
2260 @staticmethod
2261 def _accumulate_malloc(dump, policy, bucket_set, sizes):
2262 for line in dump.iter_stacktrace:
2263 words = line.split()
2264 bucket = bucket_set.get(int(words[BUCKET_ID]))
2265 if not bucket or bucket.allocator_type == 'malloc':
2266 component_match = policy.find_malloc(bucket)
2267 elif bucket.allocator_type == 'mmap':
2268 continue
2269 else:
2270 assert False
2271 sizes[component_match] += int(words[COMMITTED])
2272
2273 assert not component_match.startswith('mmap-')
2274 if component_match.startswith('tc-'):
2275 sizes['tc-total-log'] += int(words[COMMITTED])
2276 else:
2277 sizes['other-total-log'] += int(words[COMMITTED])
2278
2279 @staticmethod
2280 def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):
2281 # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.
2282 # http://crbug.com/245603.
2283 global_stats = {
2284 'total': 0,
2285 'file-exec': 0,
2286 'file-nonexec': 0,
2287 'anonymous': 0,
2288 'stack': 0,
2289 'other': 0,
2290 'nonprofiled-file-exec': 0,
2291 'nonprofiled-file-nonexec': 0,
2292 'nonprofiled-anonymous': 0,
2293 'nonprofiled-stack': 0,
2294 'nonprofiled-other': 0,
2295 'profiled-mmap': 0,
2296 }
2297
2298 for key, value in dump.iter_map:
2299 # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.
2300 # It's temporary verification code for transition described in
2301 # http://crbug.com/245603.
2302 committed = 0
2303 if 'committed' in value[1]:
2304 committed = value[1]['committed']
2305 global_stats['total'] += committed
2306 key = 'other'
2307 name = value[1]['vma']['name']
2308 if name.startswith('/'):
2309 if value[1]['vma']['executable'] == 'x':
2310 key = 'file-exec'
2311 else:
2312 key = 'file-nonexec'
2313 elif name == '[stack]':
2314 key = 'stack'
2315 elif name == '':
2316 key = 'anonymous'
2317 global_stats[key] += committed
2318 if value[0] == 'unhooked':
2319 global_stats['nonprofiled-' + key] += committed
2320 if value[0] == 'hooked':
2321 global_stats['profiled-mmap'] += committed
2322
2323 if value[0] == 'unhooked':
2324 if pfn_dict and dump.pageframe_length:
2325 for pageframe in value[1]['pageframe']:
2326 component_match = policy.find_unhooked(value, pageframe, pfn_dict)
2327 sizes[component_match] += pageframe.size
2328 else:
2329 component_match = policy.find_unhooked(value)
2330 sizes[component_match] += int(value[1]['committed'])
2331 elif value[0] == 'hooked':
2332 if pfn_dict and dump.pageframe_length:
2333 for pageframe in value[1]['pageframe']:
2334 component_match, _ = policy.find_mmap(
2335 value, bucket_set, pageframe, pfn_dict)
2336 sizes[component_match] += pageframe.size
2337 assert not component_match.startswith('tc-')
2338 if component_match.startswith('mmap-'):
2339 sizes['mmap-total-log'] += pageframe.size
2340 else:
2341 sizes['other-total-log'] += pageframe.size
2342 else:
2343 component_match, _ = policy.find_mmap(value, bucket_set)
2344 sizes[component_match] += int(value[1]['committed'])
2345 if component_match.startswith('mmap-'):
2346 sizes['mmap-total-log'] += int(value[1]['committed'])
2347 else:
2348 sizes['other-total-log'] += int(value[1]['committed'])
2349 else:
2350 LOGGER.error('Unrecognized mapping status: %s' % value[0])
2351
2352 return global_stats
2353
2354
2355 class CSVCommand(PolicyCommands):
2356 def __init__(self):
2357 super(CSVCommand, self).__init__('csv')
2358
2359 def do(self, sys_argv):
2360 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
2361 return CSVCommand._output(
2362 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
2363
2364 @staticmethod
2365 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):
2366 max_components = 0
2367 for label in policy_set:
2368 max_components = max(max_components, len(policy_set[label].components))
2369
2370 for label in sorted(policy_set):
2371 components = policy_set[label].components
2372 if len(policy_set) > 1:
2373 out.write('%s%s\n' % (label, ',' * (max_components - 1)))
2374 out.write('%s%s\n' % (
2375 ','.join(components), ',' * (max_components - len(components))))
2376
2377 LOGGER.info('Applying a policy %s to...' % label)
2378 for dump in dumps:
2379 component_sizes = PolicyCommands._apply_policy(
2380 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)
2381 s = []
2382 for c in components:
2383 if c in ('hour', 'minute', 'second'):
2384 s.append('%05.5f' % (component_sizes[c]))
2385 else:
2386 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
2387 out.write('%s%s\n' % (
2388 ','.join(s), ',' * (max_components - len(components))))
2389
2390 bucket_set.clear_component_cache()
2391
2392 return 0
2393
2394
2395 class JSONCommand(PolicyCommands):
2396 def __init__(self):
2397 super(JSONCommand, self).__init__('json')
2398
2399 def do(self, sys_argv):
2400 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
2401 return JSONCommand._output(
2402 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
2403
2404 @staticmethod
2405 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):
2406 json_base = {
2407 'version': 'JSON_DEEP_2',
2408 'policies': {},
2409 }
2410
2411 for label in sorted(policy_set):
2412 json_base['policies'][label] = {
2413 'legends': policy_set[label].components,
2414 'snapshots': [],
2415 }
2416
2417 LOGGER.info('Applying a policy %s to...' % label)
2418 for dump in dumps:
2419 component_sizes = PolicyCommands._apply_policy(
2420 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)
2421 component_sizes['dump_path'] = dump.path
2422 component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
2423 dump.time).strftime('%Y-%m-%d %H:%M:%S')
2424 json_base['policies'][label]['snapshots'].append(component_sizes)
2425
2426 bucket_set.clear_component_cache()
2427
2428 json.dump(json_base, out, indent=2, sort_keys=True)
2429
2430 return 0
2431
2432
2433 class ListCommand(PolicyCommands):
2434 def __init__(self):
2435 super(ListCommand, self).__init__('list')
2436
2437 def do(self, sys_argv):
2438 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
2439 return ListCommand._output(
2440 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
2441
2442 @staticmethod
2443 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):
2444 for label in sorted(policy_set):
2445 LOGGER.info('Applying a policy %s to...' % label)
2446 for dump in dumps:
2447 component_sizes = PolicyCommands._apply_policy(
2448 dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)
2449 out.write('%s for %s:\n' % (label, dump.path))
2450 for c in policy_set[label].components:
2451 if c in ['hour', 'minute', 'second']:
2452 out.write('%40s %12.3f\n' % (c, component_sizes[c]))
2453 else:
2454 out.write('%40s %12d\n' % (c, component_sizes[c]))
2455
2456 bucket_set.clear_component_cache()
2457
2458 return 0
2459
2460
2461 class MapCommand(Command):
2462 def __init__(self):
2463 super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>')
2464
2465 def do(self, sys_argv, out=sys.stdout):
2466 _, args = self._parse_args(sys_argv, 2)
2467 dump_path = args[1]
2468 target_policy = args[2]
2469 (bucket_set, dumps) = Command.load_basic_files(dump_path, True)
2470 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
2471
2472 MapCommand._output(dumps, bucket_set, policy_set[target_policy], out)
2473 return 0
2474
2475 @staticmethod
2476 def _output(dumps, bucket_set, policy, out):
2477 """Prints all stacktraces in a given component of given depth.
2478
2479 Args:
2480 dumps: A list of Dump objects.
2481 bucket_set: A BucketSet object.
2482 policy: A Policy object.
2483 out: An IO object to output.
2484 """
2485 max_dump_count = 0
2486 range_dict = ExclusiveRangeDict(ListAttribute)
2487 for dump in dumps:
2488 max_dump_count = max(max_dump_count, dump.count)
2489 for key, value in dump.iter_map:
2490 for begin, end, attr in range_dict.iter_range(key[0], key[1]):
2491 attr[dump.count] = value
2492
2493 max_dump_count_digit = len(str(max_dump_count))
2494 for begin, end, attr in range_dict.iter_range():
2495 out.write('%x-%x\n' % (begin, end))
2496 if len(attr) < max_dump_count:
2497 attr[max_dump_count] = None
2498 for index, value in enumerate(attr[1:]):
2499 out.write(' #%0*d: ' % (max_dump_count_digit, index + 1))
2500 if not value:
2501 out.write('None\n')
2502 elif value[0] == 'hooked':
2503 component_match, _ = policy.find_mmap(value, bucket_set)
2504 out.write('%s @ %d\n' % (component_match, value[1]['bucket_id']))
2505 else:
2506 component_match = policy.find_unhooked(value)
2507 region_info = value[1]
2508 size = region_info['committed']
2509 out.write('%s [%d bytes] %s%s%s%s %s\n' % (
2510 component_match, size, value[1]['vma']['readable'],
2511 value[1]['vma']['writable'], value[1]['vma']['executable'],
2512 value[1]['vma']['private'], value[1]['vma']['name']))
2513
2514
2515 class ExpandCommand(Command):
2516 def __init__(self):
2517 super(ExpandCommand, self).__init__(
2518 'Usage: %prog expand <dump> <policy> <component> <depth>')
2519
2520 def do(self, sys_argv):
2521 _, args = self._parse_args(sys_argv, 4)
2522 dump_path = args[1]
2523 target_policy = args[2]
2524 component_name = args[3]
2525 depth = args[4]
2526 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
2527 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
2528
2529 ExpandCommand._output(dump, policy_set[target_policy], bucket_set,
2530 component_name, int(depth), sys.stdout)
2531 return 0
2532
2533 @staticmethod
2534 def _output(dump, policy, bucket_set, component_name, depth, out):
2535 """Prints all stacktraces in a given component of given depth.
2536
2537 Args:
2538 dump: A Dump object.
2539 policy: A Policy object.
2540 bucket_set: A BucketSet object.
2541 component_name: A name of component for filtering.
2542 depth: An integer representing depth to be printed.
2543 out: An IO object to output.
2544 """
2545 sizes = {}
2546
2547 ExpandCommand._accumulate(
2548 dump, policy, bucket_set, component_name, depth, sizes)
2549
2550 sorted_sizes_list = sorted(
2551 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
2552 total = 0
2553 # TODO(dmikurube): Better formatting.
2554 for size_pair in sorted_sizes_list:
2555 out.write('%10d %s\n' % (size_pair[1], size_pair[0]))
2556 total += size_pair[1]
2557 LOGGER.info('total: %d\n' % total)
2558
2559 @staticmethod
2560 def _add_size(precedence, bucket, depth, committed, sizes):
2561 stacktrace_sequence = precedence
2562 for function, sourcefile in zip(
2563 bucket.symbolized_stackfunction[
2564 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)],
2565 bucket.symbolized_stacksourcefile[
2566 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]):
2567 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile)
2568 if not stacktrace_sequence in sizes:
2569 sizes[stacktrace_sequence] = 0
2570 sizes[stacktrace_sequence] += committed
2571
2572 @staticmethod
2573 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):
2574 rule = policy.find_rule(component_name)
2575 if not rule:
2576 pass
2577 elif rule.allocator_type == 'malloc':
2578 for line in dump.iter_stacktrace:
2579 words = line.split()
2580 bucket = bucket_set.get(int(words[BUCKET_ID]))
2581 if not bucket or bucket.allocator_type == 'malloc':
2582 component_match = policy.find_malloc(bucket)
2583 elif bucket.allocator_type == 'mmap':
2584 continue
2585 else:
2586 assert False
2587 if component_match == component_name:
2588 precedence = ''
2589 precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT])
2590 precedence += '(free=%d) ' % int(words[FREE_COUNT])
2591 if bucket.typeinfo:
2592 precedence += '(type=%s) ' % bucket.symbolized_typeinfo
2593 precedence += '(type.name=%s) ' % bucket.typeinfo_name
2594 ExpandCommand._add_size(precedence, bucket, depth,
2595 int(words[COMMITTED]), sizes)
2596 elif rule.allocator_type == 'mmap':
2597 for _, region in dump.iter_map:
2598 if region[0] != 'hooked':
2599 continue
2600 component_match, bucket = policy.find_mmap(region, bucket_set)
2601 if component_match == component_name:
2602 ExpandCommand._add_size('', bucket, depth,
2603 region[1]['committed'], sizes)
2604
2605
2606 class PProfCommand(Command):
2607 def __init__(self):
2608 super(PProfCommand, self).__init__(
2609 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
2610 self._parser.add_option('-c', '--component', type='string',
2611 dest='component',
2612 help='restrict to COMPONENT', metavar='COMPONENT')
2613
2614 def do(self, sys_argv):
2615 options, args = self._parse_args(sys_argv, 2)
2616
2617 dump_path = args[1]
2618 target_policy = args[2]
2619 component = options.component
2620
2621 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
2622 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
2623
2624 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:
2625 maps_lines = maps_f.readlines()
2626 PProfCommand._output(
2627 dump, policy_set[target_policy], bucket_set, maps_lines, component,
2628 sys.stdout)
2629
2630 return 0
2631
2632 @staticmethod
2633 def _output(dump, policy, bucket_set, maps_lines, component_name, out):
2634 """Converts the heap profile dump so it can be processed by pprof.
2635
2636 Args:
2637 dump: A Dump object.
2638 policy: A Policy object.
2639 bucket_set: A BucketSet object.
2640 maps_lines: A list of strings containing /proc/.../maps.
2641 component_name: A name of component for filtering.
2642 out: An IO object to output.
2643 """
2644 out.write('heap profile: ')
2645 com_committed, com_allocs = PProfCommand._accumulate(
2646 dump, policy, bucket_set, component_name)
2647
2648 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
2649 com_allocs, com_committed, com_allocs, com_committed))
2650
2651 PProfCommand._output_stacktrace_lines(
2652 dump, policy, bucket_set, component_name, out)
2653
2654 out.write('MAPPED_LIBRARIES:\n')
2655 for line in maps_lines:
2656 out.write(line)
2657
2658 @staticmethod
2659 def _accumulate(dump, policy, bucket_set, component_name):
2660 """Accumulates size of committed chunks and the number of allocated chunks.
2661
2662 Args:
2663 dump: A Dump object.
2664 policy: A Policy object.
2665 bucket_set: A BucketSet object.
2666 component_name: A name of component for filtering.
2667
2668 Returns:
2669 Two integers which are the accumulated size of committed regions and the
2670 number of allocated chunks, respectively.
2671 """
2672 com_committed = 0
2673 com_allocs = 0
2674
2675 for _, region in dump.iter_map:
2676 if region[0] != 'hooked':
2677 continue
2678 component_match, bucket = policy.find_mmap(region, bucket_set)
2679
2680 if (component_name and component_name != component_match) or (
2681 region[1]['committed'] == 0):
2682 continue
2683
2684 com_committed += region[1]['committed']
2685 com_allocs += 1
2686
2687 for line in dump.iter_stacktrace:
2688 words = line.split()
2689 bucket = bucket_set.get(int(words[BUCKET_ID]))
2690 if not bucket or bucket.allocator_type == 'malloc':
2691 component_match = policy.find_malloc(bucket)
2692 elif bucket.allocator_type == 'mmap':
2693 continue
2694 else:
2695 assert False
2696 if (not bucket or
2697 (component_name and component_name != component_match)):
2698 continue
2699
2700 com_committed += int(words[COMMITTED])
2701 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
2702
2703 return com_committed, com_allocs
2704
2705 @staticmethod
2706 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):
2707 """Prints information of stacktrace lines for pprof.
2708
2709 Args:
2710 dump: A Dump object.
2711 policy: A Policy object.
2712 bucket_set: A BucketSet object.
2713 component_name: A name of component for filtering.
2714 out: An IO object to output.
2715 """
2716 for _, region in dump.iter_map:
2717 if region[0] != 'hooked':
2718 continue
2719 component_match, bucket = policy.find_mmap(region, bucket_set)
2720
2721 if (component_name and component_name != component_match) or (
2722 region[1]['committed'] == 0):
2723 continue
2724
2725 out.write(' 1: %8s [ 1: %8s] @' % (
2726 region[1]['committed'], region[1]['committed']))
2727 for address in bucket.stacktrace:
2728 out.write(' 0x%016x' % address)
2729 out.write('\n')
2730
2731 for line in dump.iter_stacktrace:
2732 words = line.split()
2733 bucket = bucket_set.get(int(words[BUCKET_ID]))
2734 if not bucket or bucket.allocator_type == 'malloc':
2735 component_match = policy.find_malloc(bucket)
2736 elif bucket.allocator_type == 'mmap':
2737 continue
2738 else:
2739 assert False
2740 if (not bucket or
2741 (component_name and component_name != component_match)):
2742 continue
2743
2744 out.write('%6d: %8s [%6d: %8s] @' % (
2745 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
2746 words[COMMITTED],
2747 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
2748 words[COMMITTED]))
2749 for address in bucket.stacktrace:
2750 out.write(' 0x%016x' % address)
2751 out.write('\n')
2752
2753
2754 class UploadCommand(Command):
2755 def __init__(self):
2756 super(UploadCommand, self).__init__(
2757 'Usage: %prog upload [--gsutil path/to/gsutil] '
2758 '<first-dump> <destination-gs-path>')
2759 self._parser.add_option('--gsutil', default='gsutil',
2760 help='path to GSUTIL', metavar='GSUTIL')
2761
2762 def do(self, sys_argv):
2763 options, args = self._parse_args(sys_argv, 2)
2764 dump_path = args[1]
2765 gs_path = args[2]
2766
2767 dump_files = Command._find_all_dumps(dump_path)
2768 bucket_files = Command._find_all_buckets(dump_path)
2769 prefix = Command._find_prefix(dump_path)
2770 symbol_data_sources = SymbolDataSources(prefix)
2771 symbol_data_sources.prepare()
2772 symbol_path = symbol_data_sources.path()
2773
2774 handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof')
2775 os.close(handle_zip)
2776
2777 try:
2778 file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED)
2779 for filename in dump_files:
2780 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))
2781 for filename in bucket_files:
2782 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))
2783
2784 symbol_basename = os.path.basename(os.path.abspath(symbol_path))
2785 for filename in os.listdir(symbol_path):
2786 if not filename.startswith('.'):
2787 file_zip.write(os.path.join(symbol_path, filename),
2788 os.path.join(symbol_basename, os.path.basename(
2789 os.path.abspath(filename))))
2790 file_zip.close()
2791
2792 returncode = UploadCommand._run_gsutil(
2793 options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path)
2794 finally:
2795 os.remove(filename_zip)
2796
2797 return returncode
2798
2799 @staticmethod
2800 def _run_gsutil(gsutil, *args):
2801 """Run gsutil as a subprocess.
2802
2803 Args:
2804 *args: Arguments to pass to gsutil. The first argument should be an
2805 operation such as ls, cp or cat.
2806 Returns:
2807 The return code from the process.
2808 """
2809 command = [gsutil] + list(args)
2810 LOGGER.info("Running: %s", command)
2811
2812 try:
2813 return subprocess.call(command)
2814 except OSError, e:
2815 LOGGER.error('Error to run gsutil: %s', e)
2816
2817
2818 class CatCommand(Command):
2819 def __init__(self):
2820 super(CatCommand, self).__init__('Usage: %prog cat <first-dump>')
2821 self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
2822 metavar='/path/on/target@/path/on/host[:...]',
2823 help='Read files in /path/on/host/ instead of '
2824 'files in /path/on/target/.')
2825 self._parser.add_option('--indent', dest='indent', action='store_true',
2826 help='Indent the output.')
2827
2828 def do(self, sys_argv):
2829 options, args = self._parse_args(sys_argv, 1)
2830 dump_path = args[1]
2831 # TODO(dmikurube): Support shared memory.
2832 alternative_dirs_dict = {}
2833 if options.alternative_dirs:
2834 for alternative_dir_pair in options.alternative_dirs.split(':'):
2835 target_path, host_path = alternative_dir_pair.split('@', 1)
2836 alternative_dirs_dict[target_path] = host_path
2837 (bucket_set, dumps) = Command.load_basic_files(
2838 dump_path, True, alternative_dirs=alternative_dirs_dict)
2839
2840 json_root = OrderedDict()
2841 json_root['version'] = 1
2842 json_root['run_id'] = None
2843 for dump in dumps:
2844 if json_root['run_id'] and json_root['run_id'] != dump.run_id:
2845 LOGGER.error('Inconsistent heap profile dumps.')
2846 json_root['run_id'] = ''
2847 break
2848 json_root['run_id'] = dump.run_id
2849 json_root['snapshots'] = []
2850
2851 # Load all sorters.
2852 sorters = SorterSet()
2853
2854 for dump in dumps:
2855 json_root['snapshots'].append(
2856 self._fill_snapshot(dump, bucket_set, sorters))
2857
2858 if options.indent:
2859 json.dump(json_root, sys.stdout, indent=2)
2860 else:
2861 json.dump(json_root, sys.stdout)
2862 print ''
2863
2864 @staticmethod
2865 def _fill_snapshot(dump, bucket_set, sorters):
2866 root = OrderedDict()
2867 root['time'] = dump.time
2868 root['worlds'] = OrderedDict()
2869 root['worlds']['vm'] = CatCommand._fill_world(
2870 dump, bucket_set, sorters, 'vm')
2871 root['worlds']['malloc'] = CatCommand._fill_world(
2872 dump, bucket_set, sorters, 'malloc')
2873 return root
2874
2875 @staticmethod
2876 def _fill_world(dump, bucket_set, sorters, world):
2877 root = OrderedDict()
2878
2879 root['name'] = 'world'
2880 if world == 'vm':
2881 root['unit_fields'] = ['committed', 'reserved']
2882 elif world == 'malloc':
2883 root['unit_fields'] = ['size', 'alloc_count', 'free_count']
2884
2885 # Make { vm | malloc } units with their sizes.
2886 root['units'] = OrderedDict()
2887 unit_set = UnitSet(world)
2888 if world == 'vm':
2889 for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set):
2890 unit_set.append(unit)
2891 for unit in unit_set:
2892 root['units'][unit.unit_id] = [unit.committed, unit.reserved]
2893 elif world == 'malloc':
2894 for unit in CatCommand._iterate_malloc_unit(dump, bucket_set):
2895 unit_set.append(unit)
2896 for unit in unit_set:
2897 root['units'][unit.unit_id] = [
2898 unit.size, unit.alloc_count, unit.free_count]
2899
2900 # Iterate for { vm | malloc } sorters.
2901 root['breakdown'] = OrderedDict()
2902 for sorter in sorters.iter_world(world):
2903 breakdown = OrderedDict()
2904 for unit in unit_set:
2905 found = sorter.find(unit)
2906 if found.name not in breakdown:
2907 category = OrderedDict()
2908 category['name'] = found.name
2909 category['color'] = 'random'
2910 subworlds = {}
2911 for subworld in found.iter_subworld():
2912 subworlds[subworld] = False
2913 if subworlds:
2914 category['subworlds'] = subworlds
2915 if found.hidden:
2916 category['hidden'] = True
2917 category['units'] = []
2918 breakdown[found.name] = category
2919 breakdown[found.name]['units'].append(unit.unit_id)
2920 root['breakdown'][sorter.name] = breakdown
2921
2922 return root
2923
2924 @staticmethod
2925 def _iterate_vm_unit(dump, pfn_dict, bucket_set):
2926 unit_id = 0
2927 for _, region in dump.iter_map:
2928 unit_id += 1
2929 if region[0] == 'unhooked':
2930 if pfn_dict and dump.pageframe_length:
2931 for pageframe in region[1]['pageframe']:
2932 yield UnhookedUnit(unit_id, pageframe.size, pageframe.size,
2933 region, pageframe, pfn_dict)
2934 else:
2935 yield UnhookedUnit(unit_id,
2936 int(region[1]['committed']),
2937 int(region[1]['reserved']),
2938 region)
2939 elif region[0] == 'hooked':
2940 if pfn_dict and dump.pageframe_length:
2941 for pageframe in region[1]['pageframe']:
2942 yield MMapUnit(unit_id,
2943 pageframe.size,
2944 pageframe.size,
2945 region, bucket_set, pageframe, pfn_dict)
2946 else:
2947 yield MMapUnit(unit_id,
2948 int(region[1]['committed']),
2949 int(region[1]['reserved']),
2950 region,
2951 bucket_set)
2952 else:
2953 LOGGER.error('Unrecognized mapping status: %s' % region[0])
2954
2955 @staticmethod
2956 def _iterate_malloc_unit(dump, bucket_set):
2957 for line in dump.iter_stacktrace:
2958 words = line.split()
2959 bucket = bucket_set.get(int(words[BUCKET_ID]))
2960 if bucket and bucket.allocator_type == 'malloc':
2961 yield MallocUnit(int(words[BUCKET_ID]),
2962 int(words[COMMITTED]),
2963 int(words[ALLOC_COUNT]),
2964 int(words[FREE_COUNT]),
2965 bucket)
2966 elif not bucket:
2967 # 'Not-found' buckets are all assumed as malloc buckets.
2968 yield MallocUnit(int(words[BUCKET_ID]),
2969 int(words[COMMITTED]),
2970 int(words[ALLOC_COUNT]),
2971 int(words[FREE_COUNT]),
2972 None)
2973 18
2974 19
2975 def main(): 20 def main():
2976 COMMANDS = { 21 COMMANDS = {
2977 'buckets': BucketsCommand, 22 'buckets': subcommands.BucketsCommand,
2978 'cat': CatCommand, 23 'cat': subcommands.CatCommand,
2979 'csv': CSVCommand, 24 'csv': subcommands.CSVCommand,
2980 'expand': ExpandCommand, 25 'expand': subcommands.ExpandCommand,
2981 'json': JSONCommand, 26 'json': subcommands.JSONCommand,
2982 'list': ListCommand, 27 'list': subcommands.ListCommand,
2983 'map': MapCommand, 28 'map': subcommands.MapCommand,
2984 'pprof': PProfCommand, 29 'pprof': subcommands.PProfCommand,
2985 'stacktrace': StacktraceCommand, 30 'stacktrace': subcommands.StacktraceCommand,
2986 'upload': UploadCommand, 31 'upload': subcommands.UploadCommand,
2987 } 32 }
2988 33
2989 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): 34 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
2990 sys.stderr.write("""Usage: dmprof <command> [options] [<args>] 35 sys.stderr.write("""Usage: dmprof <command> [options] [<args>]
2991 36
2992 Commands: 37 Commands:
2993 buckets Dump a bucket list with resolving symbols 38 buckets Dump a bucket list with resolving symbols
2994 cat Categorize memory usage (under development) 39 cat Categorize memory usage (under development)
2995 csv Classify memory usage in CSV 40 csv Classify memory usage in CSV
2996 expand Show all stacktraces contained in the specified component 41 expand Show all stacktraces contained in the specified component
(...skipping 30 matching lines...) Expand all
3027 errorcode = COMMANDS[action]().do(sys.argv) 72 errorcode = COMMANDS[action]().do(sys.argv)
3028 except ParsingException, e: 73 except ParsingException, e:
3029 errorcode = 1 74 errorcode = 1
3030 sys.stderr.write('Exit by parsing error: %s\n' % e) 75 sys.stderr.write('Exit by parsing error: %s\n' % e)
3031 76
3032 return errorcode 77 return errorcode
3033 78
3034 79
3035 if __name__ == '__main__': 80 if __name__ == '__main__':
3036 sys.exit(main()) 81 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/deep_memory_profiler/lib/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698