OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """The deep heap profiler script for Chrome.""" | 5 """The Deep Memory Profiler analyzer script. |
6 | 6 |
7 import copy | 7 See http://dev.chromium.org/developers/deep-memory-profiler for details. |
8 import cStringIO | 8 """ |
9 import datetime | 9 |
10 import json | |
11 import logging | 10 import logging |
12 import optparse | |
13 import os | |
14 import re | |
15 import struct | |
16 import subprocess | |
17 import sys | 11 import sys |
18 import tempfile | |
19 import time | |
20 import zipfile | |
21 | 12 |
22 try: | 13 from lib.exceptions import ParsingException |
23 from collections import OrderedDict # pylint: disable=E0611 | 14 import subcommands |
24 except ImportError: | |
25 # TODO(dmikurube): Remove this once Python 2.7 is required. | |
26 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | |
27 SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party') | |
28 sys.path.insert(0, SIMPLEJSON_PATH) | |
29 from simplejson import OrderedDict | |
30 | 15 |
31 from range_dict import ExclusiveRangeDict | |
32 | |
33 BASE_PATH = os.path.dirname(os.path.abspath(__file__)) | |
34 FIND_RUNTIME_SYMBOLS_PATH = os.path.join( | |
35 BASE_PATH, os.pardir, 'find_runtime_symbols') | |
36 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH) | |
37 | |
38 import find_runtime_symbols | |
39 import prepare_symbol_info | |
40 import proc_maps | |
41 | |
42 from find_runtime_symbols import FUNCTION_SYMBOLS | |
43 from find_runtime_symbols import SOURCEFILE_SYMBOLS | |
44 from find_runtime_symbols import TYPEINFO_SYMBOLS | |
45 | |
46 BUCKET_ID = 5 | |
47 VIRTUAL = 0 | |
48 COMMITTED = 1 | |
49 ALLOC_COUNT = 2 | |
50 FREE_COUNT = 3 | |
51 NULL_REGEX = re.compile('') | |
52 | 16 |
53 LOGGER = logging.getLogger('dmprof') | 17 LOGGER = logging.getLogger('dmprof') |
54 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') | |
55 CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir) | |
56 | |
57 DEFAULT_SORTERS = [ | |
58 os.path.join(BASE_PATH, 'sorter.malloc-component.json'), | |
59 os.path.join(BASE_PATH, 'sorter.malloc-type.json'), | |
60 os.path.join(BASE_PATH, 'sorter.vm-map.json'), | |
61 os.path.join(BASE_PATH, 'sorter.vm-sharing.json'), | |
62 ] | |
63 | |
64 | |
65 # Heap Profile Dump versions | |
66 | |
67 # DUMP_DEEP_[1-4] are obsolete. | |
68 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. | |
69 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. | |
70 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". | |
71 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. | |
72 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. | |
73 DUMP_DEEP_1 = 'DUMP_DEEP_1' | |
74 DUMP_DEEP_2 = 'DUMP_DEEP_2' | |
75 DUMP_DEEP_3 = 'DUMP_DEEP_3' | |
76 DUMP_DEEP_4 = 'DUMP_DEEP_4' | |
77 | |
78 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) | |
79 | |
80 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. | |
81 # malloc and mmap are identified in bucket files. | |
82 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. | |
83 DUMP_DEEP_5 = 'DUMP_DEEP_5' | |
84 | |
85 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. | |
86 DUMP_DEEP_6 = 'DUMP_DEEP_6' | |
87 | |
88 # Heap Profile Policy versions | |
89 | |
90 # POLICY_DEEP_1 DOES NOT include allocation_type columns. | |
91 # mmap regions are distincted w/ mmap frames in the pattern column. | |
92 POLICY_DEEP_1 = 'POLICY_DEEP_1' | |
93 | |
94 # POLICY_DEEP_2 DOES include allocation_type columns. | |
95 # mmap regions are distincted w/ the allocation_type column. | |
96 POLICY_DEEP_2 = 'POLICY_DEEP_2' | |
97 | |
98 # POLICY_DEEP_3 is in JSON format. | |
99 POLICY_DEEP_3 = 'POLICY_DEEP_3' | |
100 | |
101 # POLICY_DEEP_3 contains typeinfo. | |
102 POLICY_DEEP_4 = 'POLICY_DEEP_4' | |
103 | |
104 | |
105 class EmptyDumpException(Exception): | |
106 def __init__(self, value=''): | |
107 super(EmptyDumpException, self).__init__() | |
108 self.value = value | |
109 def __str__(self): | |
110 return repr(self.value) | |
111 | |
112 | |
113 class ParsingException(Exception): | |
114 def __init__(self, value=''): | |
115 super(ParsingException, self).__init__() | |
116 self.value = value | |
117 def __str__(self): | |
118 return repr(self.value) | |
119 | |
120 | |
121 class InvalidDumpException(ParsingException): | |
122 def __init__(self, value): | |
123 super(InvalidDumpException, self).__init__() | |
124 self.value = value | |
125 def __str__(self): | |
126 return "invalid heap profile dump: %s" % repr(self.value) | |
127 | |
128 | |
129 class ObsoleteDumpVersionException(ParsingException): | |
130 def __init__(self, value): | |
131 super(ObsoleteDumpVersionException, self).__init__() | |
132 self.value = value | |
133 def __str__(self): | |
134 return "obsolete heap profile dump version: %s" % repr(self.value) | |
135 | |
136 | |
137 class ListAttribute(ExclusiveRangeDict.RangeAttribute): | |
138 """Represents a list for an attribute in range_dict.ExclusiveRangeDict.""" | |
139 def __init__(self): | |
140 super(ListAttribute, self).__init__() | |
141 self._list = [] | |
142 | |
143 def __str__(self): | |
144 return str(self._list) | |
145 | |
146 def __repr__(self): | |
147 return 'ListAttribute' + str(self._list) | |
148 | |
149 def __len__(self): | |
150 return len(self._list) | |
151 | |
152 def __iter__(self): | |
153 for x in self._list: | |
154 yield x | |
155 | |
156 def __getitem__(self, index): | |
157 return self._list[index] | |
158 | |
159 def __setitem__(self, index, value): | |
160 if index >= len(self._list): | |
161 self._list.extend([None] * (index + 1 - len(self._list))) | |
162 self._list[index] = value | |
163 | |
164 def copy(self): | |
165 new_list = ListAttribute() | |
166 for index, item in enumerate(self._list): | |
167 new_list[index] = copy.deepcopy(item) | |
168 return new_list | |
169 | |
170 | |
171 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): | |
172 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" | |
173 _DUMMY_ENTRY = proc_maps.ProcMapsEntry( | |
174 0, # begin | |
175 0, # end | |
176 '-', # readable | |
177 '-', # writable | |
178 '-', # executable | |
179 '-', # private | |
180 0, # offset | |
181 '00', # major | |
182 '00', # minor | |
183 0, # inode | |
184 '' # name | |
185 ) | |
186 | |
187 def __init__(self): | |
188 super(ProcMapsEntryAttribute, self).__init__() | |
189 self._entry = self._DUMMY_ENTRY.as_dict() | |
190 | |
191 def __str__(self): | |
192 return str(self._entry) | |
193 | |
194 def __repr__(self): | |
195 return 'ProcMapsEntryAttribute' + str(self._entry) | |
196 | |
197 def __getitem__(self, key): | |
198 return self._entry[key] | |
199 | |
200 def __setitem__(self, key, value): | |
201 if key not in self._entry: | |
202 raise KeyError(key) | |
203 self._entry[key] = value | |
204 | |
205 def copy(self): | |
206 new_entry = ProcMapsEntryAttribute() | |
207 for key, value in self._entry.iteritems(): | |
208 new_entry[key] = copy.deepcopy(value) | |
209 return new_entry | |
210 | |
211 | |
212 def skip_while(index, max_index, skipping_condition): | |
213 """Increments |index| until |skipping_condition|(|index|) is False. | |
214 | |
215 Returns: | |
216 A pair of an integer indicating a line number after skipped, and a | |
217 boolean value which is True if found a line which skipping_condition | |
218 is False for. | |
219 """ | |
220 while skipping_condition(index): | |
221 index += 1 | |
222 if index >= max_index: | |
223 return index, False | |
224 return index, True | |
225 | |
226 | |
227 class SymbolDataSources(object): | |
228 """Manages symbol data sources in a process. | |
229 | |
230 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and | |
231 so on. They are collected into a directory '|prefix|.symmap' from the binary | |
232 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py. | |
233 | |
234 Binaries are not mandatory to profile. The prepared data sources work in | |
235 place of the binary even if the binary has been overwritten with another | |
236 binary. | |
237 | |
238 Note that loading the symbol data sources takes a long time. They are often | |
239 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache' | |
240 which caches actually used symbols. | |
241 """ | |
242 def __init__(self, prefix, alternative_dirs=None): | |
243 self._prefix = prefix | |
244 self._prepared_symbol_data_sources_path = None | |
245 self._loaded_symbol_data_sources = None | |
246 self._alternative_dirs = alternative_dirs or {} | |
247 | |
248 def prepare(self): | |
249 """Prepares symbol data sources by extracting mapping from a binary. | |
250 | |
251 The prepared symbol data sources are stored in a directory. The directory | |
252 name is stored in |self._prepared_symbol_data_sources_path|. | |
253 | |
254 Returns: | |
255 True if succeeded. | |
256 """ | |
257 LOGGER.info('Preparing symbol mapping...') | |
258 self._prepared_symbol_data_sources_path, used_tempdir = ( | |
259 prepare_symbol_info.prepare_symbol_info( | |
260 self._prefix + '.maps', | |
261 output_dir_path=self._prefix + '.symmap', | |
262 alternative_dirs=self._alternative_dirs, | |
263 use_tempdir=True, | |
264 use_source_file_name=True)) | |
265 if self._prepared_symbol_data_sources_path: | |
266 LOGGER.info(' Prepared symbol mapping.') | |
267 if used_tempdir: | |
268 LOGGER.warn(' Using a temporary directory for symbol mapping.') | |
269 LOGGER.warn(' Delete it by yourself.') | |
270 LOGGER.warn(' Or, move the directory by yourself to use it later.') | |
271 return True | |
272 else: | |
273 LOGGER.warn(' Failed to prepare symbol mapping.') | |
274 return False | |
275 | |
276 def get(self): | |
277 """Returns the prepared symbol data sources. | |
278 | |
279 Returns: | |
280 The prepared symbol data sources. None if failed. | |
281 """ | |
282 if not self._prepared_symbol_data_sources_path and not self.prepare(): | |
283 return None | |
284 if not self._loaded_symbol_data_sources: | |
285 LOGGER.info('Loading symbol mapping...') | |
286 self._loaded_symbol_data_sources = ( | |
287 find_runtime_symbols.RuntimeSymbolsInProcess.load( | |
288 self._prepared_symbol_data_sources_path)) | |
289 return self._loaded_symbol_data_sources | |
290 | |
291 def path(self): | |
292 """Returns the path of the prepared symbol data sources if possible.""" | |
293 if not self._prepared_symbol_data_sources_path and not self.prepare(): | |
294 return None | |
295 return self._prepared_symbol_data_sources_path | |
296 | |
297 | |
298 class SymbolFinder(object): | |
299 """Finds corresponding symbols from addresses. | |
300 | |
301 This class does only 'find()' symbols from a specified |address_list|. | |
302 It is introduced to make a finder mockable. | |
303 """ | |
304 def __init__(self, symbol_type, symbol_data_sources): | |
305 self._symbol_type = symbol_type | |
306 self._symbol_data_sources = symbol_data_sources | |
307 | |
308 def find(self, address_list): | |
309 return find_runtime_symbols.find_runtime_symbols( | |
310 self._symbol_type, self._symbol_data_sources.get(), address_list) | |
311 | |
312 | |
313 class SymbolMappingCache(object): | |
314 """Caches mapping from actually used addresses to symbols. | |
315 | |
316 'update()' updates the cache from the original symbol data sources via | |
317 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'. | |
318 """ | |
319 def __init__(self): | |
320 self._symbol_mapping_caches = { | |
321 FUNCTION_SYMBOLS: {}, | |
322 SOURCEFILE_SYMBOLS: {}, | |
323 TYPEINFO_SYMBOLS: {}, | |
324 } | |
325 | |
326 def update(self, symbol_type, bucket_set, symbol_finder, cache_f): | |
327 """Updates symbol mapping cache on memory and in a symbol cache file. | |
328 | |
329 It reads cached symbol mapping from a symbol cache file |cache_f| if it | |
330 exists. Unresolved addresses are then resolved and added to the cache | |
331 both on memory and in the symbol cache file with using 'SymbolFinder'. | |
332 | |
333 A cache file is formatted as follows: | |
334 <Address> <Symbol> | |
335 <Address> <Symbol> | |
336 <Address> <Symbol> | |
337 ... | |
338 | |
339 Args: | |
340 symbol_type: A type of symbols to update. It should be one of | |
341 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. | |
342 bucket_set: A BucketSet object. | |
343 symbol_finder: A SymbolFinder object to find symbols. | |
344 cache_f: A readable and writable IO object of the symbol cache file. | |
345 """ | |
346 cache_f.seek(0, os.SEEK_SET) | |
347 self._load(cache_f, symbol_type) | |
348 | |
349 unresolved_addresses = sorted( | |
350 address for address in bucket_set.iter_addresses(symbol_type) | |
351 if address not in self._symbol_mapping_caches[symbol_type]) | |
352 | |
353 if not unresolved_addresses: | |
354 LOGGER.info('No need to resolve any more addresses.') | |
355 return | |
356 | |
357 cache_f.seek(0, os.SEEK_END) | |
358 LOGGER.info('Loading %d unresolved addresses.' % | |
359 len(unresolved_addresses)) | |
360 symbol_dict = symbol_finder.find(unresolved_addresses) | |
361 | |
362 for address, symbol in symbol_dict.iteritems(): | |
363 stripped_symbol = symbol.strip() or '?' | |
364 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol | |
365 cache_f.write('%x %s\n' % (address, stripped_symbol)) | |
366 | |
367 def lookup(self, symbol_type, address): | |
368 """Looks up a symbol for a given |address|. | |
369 | |
370 Args: | |
371 symbol_type: A type of symbols to update. It should be one of | |
372 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS. | |
373 address: An integer that represents an address. | |
374 | |
375 Returns: | |
376 A string that represents a symbol. | |
377 """ | |
378 return self._symbol_mapping_caches[symbol_type].get(address) | |
379 | |
380 def _load(self, cache_f, symbol_type): | |
381 try: | |
382 for line in cache_f: | |
383 items = line.rstrip().split(None, 1) | |
384 if len(items) == 1: | |
385 items.append('??') | |
386 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1] | |
387 LOGGER.info('Loaded %d entries from symbol cache.' % | |
388 len(self._symbol_mapping_caches[symbol_type])) | |
389 except IOError as e: | |
390 LOGGER.info('The symbol cache file is invalid: %s' % e) | |
391 | |
392 | |
393 class Rule(object): | |
394 """Represents one matching rule in a policy file.""" | |
395 | |
396 def __init__(self, | |
397 name, | |
398 allocator_type, | |
399 stackfunction_pattern=None, | |
400 stacksourcefile_pattern=None, | |
401 typeinfo_pattern=None, | |
402 mappedpathname_pattern=None, | |
403 mappedpermission_pattern=None, | |
404 sharedwith=None): | |
405 self._name = name | |
406 self._allocator_type = allocator_type | |
407 | |
408 self._stackfunction_pattern = None | |
409 if stackfunction_pattern: | |
410 self._stackfunction_pattern = re.compile( | |
411 stackfunction_pattern + r'\Z') | |
412 | |
413 self._stacksourcefile_pattern = None | |
414 if stacksourcefile_pattern: | |
415 self._stacksourcefile_pattern = re.compile( | |
416 stacksourcefile_pattern + r'\Z') | |
417 | |
418 self._typeinfo_pattern = None | |
419 if typeinfo_pattern: | |
420 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') | |
421 | |
422 self._mappedpathname_pattern = None | |
423 if mappedpathname_pattern: | |
424 self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') | |
425 | |
426 self._mappedpermission_pattern = None | |
427 if mappedpermission_pattern: | |
428 self._mappedpermission_pattern = re.compile( | |
429 mappedpermission_pattern + r'\Z') | |
430 | |
431 self._sharedwith = [] | |
432 if sharedwith: | |
433 self._sharedwith = sharedwith | |
434 | |
435 @property | |
436 def name(self): | |
437 return self._name | |
438 | |
439 @property | |
440 def allocator_type(self): | |
441 return self._allocator_type | |
442 | |
443 @property | |
444 def stackfunction_pattern(self): | |
445 return self._stackfunction_pattern | |
446 | |
447 @property | |
448 def stacksourcefile_pattern(self): | |
449 return self._stacksourcefile_pattern | |
450 | |
451 @property | |
452 def typeinfo_pattern(self): | |
453 return self._typeinfo_pattern | |
454 | |
455 @property | |
456 def mappedpathname_pattern(self): | |
457 return self._mappedpathname_pattern | |
458 | |
459 @property | |
460 def mappedpermission_pattern(self): | |
461 return self._mappedpermission_pattern | |
462 | |
463 @property | |
464 def sharedwith(self): | |
465 return self._sharedwith | |
466 | |
467 | |
468 class Policy(object): | |
469 """Represents a policy, a content of a policy file.""" | |
470 | |
471 def __init__(self, rules, version, components): | |
472 self._rules = rules | |
473 self._version = version | |
474 self._components = components | |
475 | |
476 @property | |
477 def rules(self): | |
478 return self._rules | |
479 | |
480 @property | |
481 def version(self): | |
482 return self._version | |
483 | |
484 @property | |
485 def components(self): | |
486 return self._components | |
487 | |
488 def find_rule(self, component_name): | |
489 """Finds a rule whose name is |component_name|. """ | |
490 for rule in self._rules: | |
491 if rule.name == component_name: | |
492 return rule | |
493 return None | |
494 | |
495 def find_malloc(self, bucket): | |
496 """Finds a matching component name which a given |bucket| belongs to. | |
497 | |
498 Args: | |
499 bucket: A Bucket object to be searched for. | |
500 | |
501 Returns: | |
502 A string representing a component name. | |
503 """ | |
504 assert not bucket or bucket.allocator_type == 'malloc' | |
505 | |
506 if not bucket: | |
507 return 'no-bucket' | |
508 if bucket.component_cache: | |
509 return bucket.component_cache | |
510 | |
511 stackfunction = bucket.symbolized_joined_stackfunction | |
512 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
513 typeinfo = bucket.symbolized_typeinfo | |
514 if typeinfo.startswith('0x'): | |
515 typeinfo = bucket.typeinfo_name | |
516 | |
517 for rule in self._rules: | |
518 if (rule.allocator_type == 'malloc' and | |
519 (not rule.stackfunction_pattern or | |
520 rule.stackfunction_pattern.match(stackfunction)) and | |
521 (not rule.stacksourcefile_pattern or | |
522 rule.stacksourcefile_pattern.match(stacksourcefile)) and | |
523 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): | |
524 bucket.component_cache = rule.name | |
525 return rule.name | |
526 | |
527 assert False | |
528 | |
529 def find_mmap(self, region, bucket_set, | |
530 pageframe=None, group_pfn_counts=None): | |
531 """Finds a matching component which a given mmap |region| belongs to. | |
532 | |
533 It uses |bucket_set| to match with backtraces. If |pageframe| is given, | |
534 it considers memory sharing among processes. | |
535 | |
536 NOTE: Don't use Bucket's |component_cache| for mmap regions because they're | |
537 classified not only with bucket information (mappedpathname for example). | |
538 | |
539 Args: | |
540 region: A tuple representing a memory region. | |
541 bucket_set: A BucketSet object to look up backtraces. | |
542 pageframe: A PageFrame object representing a pageframe maybe including | |
543 a pagecount. | |
544 group_pfn_counts: A dict mapping a PFN to the number of times the | |
545 the pageframe is mapped by the known "group (Chrome)" processes. | |
546 | |
547 Returns: | |
548 A string representing a component name. | |
549 """ | |
550 assert region[0] == 'hooked' | |
551 bucket = bucket_set.get(region[1]['bucket_id']) | |
552 assert not bucket or bucket.allocator_type == 'mmap' | |
553 | |
554 if not bucket: | |
555 return 'no-bucket', None | |
556 | |
557 stackfunction = bucket.symbolized_joined_stackfunction | |
558 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
559 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) | |
560 | |
561 for rule in self._rules: | |
562 if (rule.allocator_type == 'mmap' and | |
563 (not rule.stackfunction_pattern or | |
564 rule.stackfunction_pattern.match(stackfunction)) and | |
565 (not rule.stacksourcefile_pattern or | |
566 rule.stacksourcefile_pattern.match(stacksourcefile)) and | |
567 (not rule.mappedpathname_pattern or | |
568 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and | |
569 (not rule.mappedpermission_pattern or | |
570 rule.mappedpermission_pattern.match( | |
571 region[1]['vma']['readable'] + | |
572 region[1]['vma']['writable'] + | |
573 region[1]['vma']['executable'] + | |
574 region[1]['vma']['private'])) and | |
575 (not rule.sharedwith or | |
576 not pageframe or sharedwith in rule.sharedwith)): | |
577 return rule.name, bucket | |
578 | |
579 assert False | |
580 | |
581 def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): | |
582 """Finds a matching component which a given unhooked |region| belongs to. | |
583 | |
584 If |pageframe| is given, it considers memory sharing among processes. | |
585 | |
586 Args: | |
587 region: A tuple representing a memory region. | |
588 pageframe: A PageFrame object representing a pageframe maybe including | |
589 a pagecount. | |
590 group_pfn_counts: A dict mapping a PFN to the number of times the | |
591 the pageframe is mapped by the known "group (Chrome)" processes. | |
592 | |
593 Returns: | |
594 A string representing a component name. | |
595 """ | |
596 assert region[0] == 'unhooked' | |
597 sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) | |
598 | |
599 for rule in self._rules: | |
600 if (rule.allocator_type == 'unhooked' and | |
601 (not rule.mappedpathname_pattern or | |
602 rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and | |
603 (not rule.mappedpermission_pattern or | |
604 rule.mappedpermission_pattern.match( | |
605 region[1]['vma']['readable'] + | |
606 region[1]['vma']['writable'] + | |
607 region[1]['vma']['executable'] + | |
608 region[1]['vma']['private'])) and | |
609 (not rule.sharedwith or | |
610 not pageframe or sharedwith in rule.sharedwith)): | |
611 return rule.name | |
612 | |
613 assert False | |
614 | |
615 @staticmethod | |
616 def load(filename, filetype): | |
617 """Loads a policy file of |filename| in a |format|. | |
618 | |
619 Args: | |
620 filename: A filename to be loaded. | |
621 filetype: A string to specify a type of the file. Only 'json' is | |
622 supported for now. | |
623 | |
624 Returns: | |
625 A loaded Policy object. | |
626 """ | |
627 with open(os.path.join(BASE_PATH, filename)) as policy_f: | |
628 return Policy.parse(policy_f, filetype) | |
629 | |
630 @staticmethod | |
631 def parse(policy_f, filetype): | |
632 """Parses a policy file content in a |format|. | |
633 | |
634 Args: | |
635 policy_f: An IO object to be loaded. | |
636 filetype: A string to specify a type of the file. Only 'json' is | |
637 supported for now. | |
638 | |
639 Returns: | |
640 A loaded Policy object. | |
641 """ | |
642 if filetype == 'json': | |
643 return Policy._parse_json(policy_f) | |
644 else: | |
645 return None | |
646 | |
647 @staticmethod | |
648 def _parse_json(policy_f): | |
649 """Parses policy file in json format. | |
650 | |
651 A policy file contains component's names and their stacktrace pattern | |
652 written in regular expression. Those patterns are matched against each | |
653 symbols of each stacktraces in the order written in the policy file | |
654 | |
655 Args: | |
656 policy_f: A File/IO object to read. | |
657 | |
658 Returns: | |
659 A loaded policy object. | |
660 """ | |
661 policy = json.load(policy_f) | |
662 | |
663 rules = [] | |
664 for rule in policy['rules']: | |
665 stackfunction = rule.get('stackfunction') or rule.get('stacktrace') | |
666 stacksourcefile = rule.get('stacksourcefile') | |
667 rules.append(Rule( | |
668 rule['name'], | |
669 rule['allocator'], # allocator_type | |
670 stackfunction, | |
671 stacksourcefile, | |
672 rule['typeinfo'] if 'typeinfo' in rule else None, | |
673 rule.get('mappedpathname'), | |
674 rule.get('mappedpermission'), | |
675 rule.get('sharedwith'))) | |
676 | |
677 return Policy(rules, policy['version'], policy['components']) | |
678 | |
679 @staticmethod | |
680 def _categorize_pageframe(pageframe, group_pfn_counts): | |
681 """Categorizes a pageframe based on its sharing status. | |
682 | |
683 Returns: | |
684 'private' if |pageframe| is not shared with other processes. 'group' | |
685 if |pageframe| is shared only with group (Chrome-related) processes. | |
686 'others' if |pageframe| is shared with non-group processes. | |
687 """ | |
688 if not pageframe: | |
689 return 'private' | |
690 | |
691 if pageframe.pagecount: | |
692 if pageframe.pagecount == 1: | |
693 return 'private' | |
694 elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: | |
695 return 'group' | |
696 else: | |
697 return 'others' | |
698 else: | |
699 if pageframe.pfn in group_pfn_counts: | |
700 return 'group' | |
701 else: | |
702 return 'private' | |
703 | |
704 | |
705 class PolicySet(object): | |
706 """Represents a set of policies.""" | |
707 | |
708 def __init__(self, policy_directory): | |
709 self._policy_directory = policy_directory | |
710 | |
711 @staticmethod | |
712 def load(labels=None): | |
713 """Loads a set of policies via the "default policy directory". | |
714 | |
715 The "default policy directory" contains pairs of policies and their labels. | |
716 For example, a policy "policy.l0.json" is labeled "l0" in the default | |
717 policy directory "policies.json". | |
718 | |
719 All policies in the directory are loaded by default. Policies can be | |
720 limited by |labels|. | |
721 | |
722 Args: | |
723 labels: An array that contains policy labels to be loaded. | |
724 | |
725 Returns: | |
726 A PolicySet object. | |
727 """ | |
728 default_policy_directory = PolicySet._load_default_policy_directory() | |
729 if labels: | |
730 specified_policy_directory = {} | |
731 for label in labels: | |
732 if label in default_policy_directory: | |
733 specified_policy_directory[label] = default_policy_directory[label] | |
734 # TODO(dmikurube): Load an un-labeled policy file. | |
735 return PolicySet._load_policies(specified_policy_directory) | |
736 else: | |
737 return PolicySet._load_policies(default_policy_directory) | |
738 | |
739 def __len__(self): | |
740 return len(self._policy_directory) | |
741 | |
742 def __iter__(self): | |
743 for label in self._policy_directory: | |
744 yield label | |
745 | |
746 def __getitem__(self, label): | |
747 return self._policy_directory[label] | |
748 | |
749 @staticmethod | |
750 def _load_default_policy_directory(): | |
751 with open(POLICIES_JSON_PATH, mode='r') as policies_f: | |
752 default_policy_directory = json.load(policies_f) | |
753 return default_policy_directory | |
754 | |
755 @staticmethod | |
756 def _load_policies(directory): | |
757 LOGGER.info('Loading policy files.') | |
758 policies = {} | |
759 for label in directory: | |
760 LOGGER.info(' %s: %s' % (label, directory[label]['file'])) | |
761 loaded = Policy.load(directory[label]['file'], directory[label]['format']) | |
762 if loaded: | |
763 policies[label] = loaded | |
764 return PolicySet(policies) | |
765 | |
766 | |
767 class Bucket(object): | |
768 """Represents a bucket, which is a unit of memory block classification.""" | |
769 | |
770 def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name): | |
771 self._stacktrace = stacktrace | |
772 self._allocator_type = allocator_type | |
773 self._typeinfo = typeinfo | |
774 self._typeinfo_name = typeinfo_name | |
775 | |
776 self._symbolized_stackfunction = stacktrace | |
777 self._symbolized_joined_stackfunction = '' | |
778 self._symbolized_stacksourcefile = stacktrace | |
779 self._symbolized_joined_stacksourcefile = '' | |
780 self._symbolized_typeinfo = typeinfo_name | |
781 | |
782 self.component_cache = '' | |
783 | |
784 def __str__(self): | |
785 result = [] | |
786 result.append(self._allocator_type) | |
787 if self._symbolized_typeinfo == 'no typeinfo': | |
788 result.append('tno_typeinfo') | |
789 else: | |
790 result.append('t' + self._symbolized_typeinfo) | |
791 result.append('n' + self._typeinfo_name) | |
792 result.extend(['%s(@%s)' % (function, sourcefile) | |
793 for function, sourcefile | |
794 in zip(self._symbolized_stackfunction, | |
795 self._symbolized_stacksourcefile)]) | |
796 return ' '.join(result) | |
797 | |
798 def symbolize(self, symbol_mapping_cache): | |
799 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|. | |
800 | |
801 Args: | |
802 symbol_mapping_cache: A SymbolMappingCache object. | |
803 """ | |
804 # TODO(dmikurube): Fill explicitly with numbers if symbol not found. | |
805 self._symbolized_stackfunction = [ | |
806 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address) | |
807 for address in self._stacktrace] | |
808 self._symbolized_joined_stackfunction = ' '.join( | |
809 self._symbolized_stackfunction) | |
810 self._symbolized_stacksourcefile = [ | |
811 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address) | |
812 for address in self._stacktrace] | |
813 self._symbolized_joined_stacksourcefile = ' '.join( | |
814 self._symbolized_stacksourcefile) | |
815 if not self._typeinfo: | |
816 self._symbolized_typeinfo = 'no typeinfo' | |
817 else: | |
818 self._symbolized_typeinfo = symbol_mapping_cache.lookup( | |
819 TYPEINFO_SYMBOLS, self._typeinfo) | |
820 if not self._symbolized_typeinfo: | |
821 self._symbolized_typeinfo = 'no typeinfo' | |
822 | |
823 def clear_component_cache(self): | |
824 self.component_cache = '' | |
825 | |
826 @property | |
827 def stacktrace(self): | |
828 return self._stacktrace | |
829 | |
830 @property | |
831 def allocator_type(self): | |
832 return self._allocator_type | |
833 | |
834 @property | |
835 def typeinfo(self): | |
836 return self._typeinfo | |
837 | |
838 @property | |
839 def typeinfo_name(self): | |
840 return self._typeinfo_name | |
841 | |
842 @property | |
843 def symbolized_stackfunction(self): | |
844 return self._symbolized_stackfunction | |
845 | |
846 @property | |
847 def symbolized_joined_stackfunction(self): | |
848 return self._symbolized_joined_stackfunction | |
849 | |
850 @property | |
851 def symbolized_stacksourcefile(self): | |
852 return self._symbolized_stacksourcefile | |
853 | |
854 @property | |
855 def symbolized_joined_stacksourcefile(self): | |
856 return self._symbolized_joined_stacksourcefile | |
857 | |
858 @property | |
859 def symbolized_typeinfo(self): | |
860 return self._symbolized_typeinfo | |
861 | |
862 | |
863 class BucketSet(object): | |
864 """Represents a set of bucket.""" | |
865 def __init__(self): | |
866 self._buckets = {} | |
867 self._code_addresses = set() | |
868 self._typeinfo_addresses = set() | |
869 | |
870 def load(self, prefix): | |
871 """Loads all related bucket files. | |
872 | |
873 Args: | |
874 prefix: A prefix string for bucket file names. | |
875 """ | |
876 LOGGER.info('Loading bucket files.') | |
877 | |
878 n = 0 | |
879 skipped = 0 | |
880 while True: | |
881 path = '%s.%04d.buckets' % (prefix, n) | |
882 if not os.path.exists(path) or not os.stat(path).st_size: | |
883 if skipped > 10: | |
884 break | |
885 n += 1 | |
886 skipped += 1 | |
887 continue | |
888 LOGGER.info(' %s' % path) | |
889 with open(path, 'r') as f: | |
890 self._load_file(f) | |
891 n += 1 | |
892 skipped = 0 | |
893 | |
894 def _load_file(self, bucket_f): | |
895 for line in bucket_f: | |
896 words = line.split() | |
897 typeinfo = None | |
898 typeinfo_name = '' | |
899 stacktrace_begin = 2 | |
900 for index, word in enumerate(words): | |
901 if index < 2: | |
902 continue | |
903 if word[0] == 't': | |
904 typeinfo = int(word[1:], 16) | |
905 self._typeinfo_addresses.add(typeinfo) | |
906 elif word[0] == 'n': | |
907 typeinfo_name = word[1:] | |
908 else: | |
909 stacktrace_begin = index | |
910 break | |
911 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]] | |
912 for frame in stacktrace: | |
913 self._code_addresses.add(frame) | |
914 self._buckets[int(words[0])] = Bucket( | |
915 stacktrace, words[1], typeinfo, typeinfo_name) | |
916 | |
917 def __iter__(self): | |
918 for bucket_id, bucket_content in self._buckets.iteritems(): | |
919 yield bucket_id, bucket_content | |
920 | |
921 def __getitem__(self, bucket_id): | |
922 return self._buckets[bucket_id] | |
923 | |
924 def get(self, bucket_id): | |
925 return self._buckets.get(bucket_id) | |
926 | |
927 def symbolize(self, symbol_mapping_cache): | |
928 for bucket_content in self._buckets.itervalues(): | |
929 bucket_content.symbolize(symbol_mapping_cache) | |
930 | |
931 def clear_component_cache(self): | |
932 for bucket_content in self._buckets.itervalues(): | |
933 bucket_content.clear_component_cache() | |
934 | |
935 def iter_addresses(self, symbol_type): | |
936 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]: | |
937 for function in self._code_addresses: | |
938 yield function | |
939 else: | |
940 for function in self._typeinfo_addresses: | |
941 yield function | |
942 | |
943 | |
944 class PageFrame(object): | |
945 """Represents a pageframe and maybe its shared count.""" | |
946 def __init__(self, pfn, size, pagecount, start_truncated, end_truncated): | |
947 self._pfn = pfn | |
948 self._size = size | |
949 self._pagecount = pagecount | |
950 self._start_truncated = start_truncated | |
951 self._end_truncated = end_truncated | |
952 | |
953 def __str__(self): | |
954 result = str() | |
955 if self._start_truncated: | |
956 result += '<' | |
957 result += '%06x#%d' % (self._pfn, self._pagecount) | |
958 if self._end_truncated: | |
959 result += '>' | |
960 return result | |
961 | |
962 def __repr__(self): | |
963 return str(self) | |
964 | |
965 @staticmethod | |
966 def parse(encoded_pfn, size): | |
967 start = 0 | |
968 end = len(encoded_pfn) | |
969 end_truncated = False | |
970 if encoded_pfn.endswith('>'): | |
971 end = len(encoded_pfn) - 1 | |
972 end_truncated = True | |
973 pagecount_found = encoded_pfn.find('#') | |
974 pagecount = None | |
975 if pagecount_found >= 0: | |
976 encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end] | |
977 pagecount = struct.unpack( | |
978 '>I', '\x00' + encoded_pagecount.decode('base64'))[0] | |
979 end = pagecount_found | |
980 start_truncated = False | |
981 if encoded_pfn.startswith('<'): | |
982 start = 1 | |
983 start_truncated = True | |
984 | |
985 pfn = struct.unpack( | |
986 '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0] | |
987 | |
988 return PageFrame(pfn, size, pagecount, start_truncated, end_truncated) | |
989 | |
990 @property | |
991 def pfn(self): | |
992 return self._pfn | |
993 | |
994 @property | |
995 def size(self): | |
996 return self._size | |
997 | |
998 def set_size(self, size): | |
999 self._size = size | |
1000 | |
1001 @property | |
1002 def pagecount(self): | |
1003 return self._pagecount | |
1004 | |
1005 @property | |
1006 def start_truncated(self): | |
1007 return self._start_truncated | |
1008 | |
1009 @property | |
1010 def end_truncated(self): | |
1011 return self._end_truncated | |
1012 | |
1013 | |
1014 class PFNCounts(object): | |
1015 """Represents counts of PFNs in a process.""" | |
1016 | |
1017 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') | |
1018 | |
1019 def __init__(self, path, modified_time): | |
1020 matched = self._PATH_PATTERN.match(path) | |
1021 if matched: | |
1022 self._pid = int(matched.group(2)) | |
1023 else: | |
1024 self._pid = 0 | |
1025 self._command_line = '' | |
1026 self._pagesize = 4096 | |
1027 self._path = path | |
1028 self._pfn_meta = '' | |
1029 self._pfnset = {} | |
1030 self._reason = '' | |
1031 self._time = modified_time | |
1032 | |
1033 @staticmethod | |
1034 def load(path, log_header='Loading PFNs from a heap profile dump: '): | |
1035 pfnset = PFNCounts(path, float(os.stat(path).st_mtime)) | |
1036 LOGGER.info('%s%s' % (log_header, path)) | |
1037 | |
1038 with open(path, 'r') as pfnset_f: | |
1039 pfnset.load_file(pfnset_f) | |
1040 | |
1041 return pfnset | |
1042 | |
1043 @property | |
1044 def path(self): | |
1045 return self._path | |
1046 | |
1047 @property | |
1048 def pid(self): | |
1049 return self._pid | |
1050 | |
1051 @property | |
1052 def time(self): | |
1053 return self._time | |
1054 | |
1055 @property | |
1056 def reason(self): | |
1057 return self._reason | |
1058 | |
1059 @property | |
1060 def iter_pfn(self): | |
1061 for pfn, count in self._pfnset.iteritems(): | |
1062 yield pfn, count | |
1063 | |
1064 def load_file(self, pfnset_f): | |
1065 prev_pfn_end_truncated = None | |
1066 for line in pfnset_f: | |
1067 line = line.strip() | |
1068 if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'): | |
1069 break | |
1070 elif line.startswith('PF: '): | |
1071 for encoded_pfn in line[3:].split(): | |
1072 page_frame = PageFrame.parse(encoded_pfn, self._pagesize) | |
1073 if page_frame.start_truncated and ( | |
1074 not prev_pfn_end_truncated or | |
1075 prev_pfn_end_truncated != page_frame.pfn): | |
1076 LOGGER.error('Broken page frame number: %s.' % encoded_pfn) | |
1077 self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1 | |
1078 if page_frame.end_truncated: | |
1079 prev_pfn_end_truncated = page_frame.pfn | |
1080 else: | |
1081 prev_pfn_end_truncated = None | |
1082 elif line.startswith('PageSize: '): | |
1083 self._pagesize = int(line[10:]) | |
1084 elif line.startswith('PFN: '): | |
1085 self._pfn_meta = line[5:] | |
1086 elif line.startswith('PageFrame: '): | |
1087 self._pfn_meta = line[11:] | |
1088 elif line.startswith('Time: '): | |
1089 self._time = float(line[6:]) | |
1090 elif line.startswith('CommandLine: '): | |
1091 self._command_line = line[13:] | |
1092 elif line.startswith('Reason: '): | |
1093 self._reason = line[8:] | |
1094 | |
1095 | |
1096 class Dump(object): | |
1097 """Represents a heap profile dump.""" | |
1098 | |
1099 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') | |
1100 | |
1101 _HOOK_PATTERN = re.compile( | |
1102 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' | |
1103 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) | |
1104 | |
1105 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
1106 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') | |
1107 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' | |
1108 '(?P<RESERVED>[0-9]+)') | |
1109 | |
1110 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') | |
1111 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') | |
1112 | |
1113 _TIME_PATTERN_FORMAT = re.compile( | |
1114 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') | |
1115 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') | |
1116 | |
1117 def __init__(self, path, modified_time): | |
1118 self._path = path | |
1119 matched = self._PATH_PATTERN.match(path) | |
1120 self._pid = int(matched.group(2)) | |
1121 self._count = int(matched.group(3)) | |
1122 self._time = modified_time | |
1123 self._map = {} | |
1124 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) | |
1125 self._stacktrace_lines = [] | |
1126 self._global_stats = {} # used only in apply_policy | |
1127 | |
1128 self._run_id = '' | |
1129 self._pagesize = 4096 | |
1130 self._pageframe_length = 0 | |
1131 self._pageframe_encoding = '' | |
1132 self._has_pagecount = False | |
1133 | |
1134 self._version = '' | |
1135 self._lines = [] | |
1136 | |
1137 @property | |
1138 def path(self): | |
1139 return self._path | |
1140 | |
1141 @property | |
1142 def count(self): | |
1143 return self._count | |
1144 | |
1145 @property | |
1146 def time(self): | |
1147 return self._time | |
1148 | |
1149 @property | |
1150 def iter_map(self): | |
1151 for region in sorted(self._map.iteritems()): | |
1152 yield region[0], region[1] | |
1153 | |
1154 def iter_procmaps(self): | |
1155 for begin, end, attr in self._map.iter_range(): | |
1156 yield begin, end, attr | |
1157 | |
1158 @property | |
1159 def iter_stacktrace(self): | |
1160 for line in self._stacktrace_lines: | |
1161 yield line | |
1162 | |
1163 def global_stat(self, name): | |
1164 return self._global_stats[name] | |
1165 | |
1166 @property | |
1167 def run_id(self): | |
1168 return self._run_id | |
1169 | |
1170 @property | |
1171 def pagesize(self): | |
1172 return self._pagesize | |
1173 | |
1174 @property | |
1175 def pageframe_length(self): | |
1176 return self._pageframe_length | |
1177 | |
1178 @property | |
1179 def pageframe_encoding(self): | |
1180 return self._pageframe_encoding | |
1181 | |
1182 @property | |
1183 def has_pagecount(self): | |
1184 return self._has_pagecount | |
1185 | |
1186 @staticmethod | |
1187 def load(path, log_header='Loading a heap profile dump: '): | |
1188 """Loads a heap profile dump. | |
1189 | |
1190 Args: | |
1191 path: A file path string to load. | |
1192 log_header: A preceding string for log messages. | |
1193 | |
1194 Returns: | |
1195 A loaded Dump object. | |
1196 | |
1197 Raises: | |
1198 ParsingException for invalid heap profile dumps. | |
1199 """ | |
1200 dump = Dump(path, os.stat(path).st_mtime) | |
1201 with open(path, 'r') as f: | |
1202 dump.load_file(f, log_header) | |
1203 return dump | |
1204 | |
1205 def load_file(self, f, log_header): | |
1206 self._lines = [line for line in f | |
1207 if line and not line.startswith('#')] | |
1208 | |
1209 try: | |
1210 self._version, ln = self._parse_version() | |
1211 self._parse_meta_information() | |
1212 if self._version == DUMP_DEEP_6: | |
1213 self._parse_mmap_list() | |
1214 self._parse_global_stats() | |
1215 self._extract_stacktrace_lines(ln) | |
1216 except EmptyDumpException: | |
1217 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) | |
1218 except ParsingException, e: | |
1219 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) | |
1220 raise | |
1221 else: | |
1222 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) | |
1223 | |
1224 def _parse_version(self): | |
1225 """Parses a version string in self._lines. | |
1226 | |
1227 Returns: | |
1228 A pair of (a string representing a version of the stacktrace dump, | |
1229 and an integer indicating a line number next to the version string). | |
1230 | |
1231 Raises: | |
1232 ParsingException for invalid dump versions. | |
1233 """ | |
1234 version = '' | |
1235 | |
1236 # Skip until an identifiable line. | |
1237 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') | |
1238 if not self._lines: | |
1239 raise EmptyDumpException('Empty heap dump file.') | |
1240 (ln, found) = skip_while( | |
1241 0, len(self._lines), | |
1242 lambda n: not self._lines[n].startswith(headers)) | |
1243 if not found: | |
1244 raise InvalidDumpException('No version header.') | |
1245 | |
1246 # Identify a version. | |
1247 if self._lines[ln].startswith('heap profile: '): | |
1248 version = self._lines[ln][13:].strip() | |
1249 if version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
1250 (ln, _) = skip_while( | |
1251 ln, len(self._lines), | |
1252 lambda n: self._lines[n] != 'STACKTRACES:\n') | |
1253 elif version in DUMP_DEEP_OBSOLETE: | |
1254 raise ObsoleteDumpVersionException(version) | |
1255 else: | |
1256 raise InvalidDumpException('Invalid version: %s' % version) | |
1257 elif self._lines[ln] == 'STACKTRACES:\n': | |
1258 raise ObsoleteDumpVersionException(DUMP_DEEP_1) | |
1259 elif self._lines[ln] == 'MMAP_STACKTRACES:\n': | |
1260 raise ObsoleteDumpVersionException(DUMP_DEEP_2) | |
1261 | |
1262 return (version, ln) | |
1263 | |
1264 def _parse_global_stats(self): | |
1265 """Parses lines in self._lines as global stats.""" | |
1266 (ln, _) = skip_while( | |
1267 0, len(self._lines), | |
1268 lambda n: self._lines[n] != 'GLOBAL_STATS:\n') | |
1269 | |
1270 global_stat_names = [ | |
1271 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', | |
1272 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', | |
1273 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', | |
1274 'nonprofiled-stack', 'nonprofiled-other', | |
1275 'profiled-mmap', 'profiled-malloc'] | |
1276 | |
1277 for prefix in global_stat_names: | |
1278 (ln, _) = skip_while( | |
1279 ln, len(self._lines), | |
1280 lambda n: self._lines[n].split()[0] != prefix) | |
1281 words = self._lines[ln].split() | |
1282 self._global_stats[prefix + '_virtual'] = int(words[-2]) | |
1283 self._global_stats[prefix + '_committed'] = int(words[-1]) | |
1284 | |
1285 def _parse_meta_information(self): | |
1286 """Parses lines in self._lines for meta information.""" | |
1287 (ln, found) = skip_while( | |
1288 0, len(self._lines), | |
1289 lambda n: self._lines[n] != 'META:\n') | |
1290 if not found: | |
1291 return | |
1292 ln += 1 | |
1293 | |
1294 while True: | |
1295 if self._lines[ln].startswith('Time:'): | |
1296 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) | |
1297 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) | |
1298 if matched_format: | |
1299 self._time = time.mktime(datetime.datetime.strptime( | |
1300 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) | |
1301 if matched_format.group(2): | |
1302 self._time += float(matched_format.group(2)[1:]) / 1000.0 | |
1303 elif matched_seconds: | |
1304 self._time = float(matched_seconds.group(1)) | |
1305 elif self._lines[ln].startswith('Reason:'): | |
1306 pass # Nothing to do for 'Reason:' | |
1307 elif self._lines[ln].startswith('PageSize: '): | |
1308 self._pagesize = int(self._lines[ln][10:]) | |
1309 elif self._lines[ln].startswith('CommandLine:'): | |
1310 pass | |
1311 elif (self._lines[ln].startswith('PageFrame: ') or | |
1312 self._lines[ln].startswith('PFN: ')): | |
1313 if self._lines[ln].startswith('PageFrame: '): | |
1314 words = self._lines[ln][11:].split(',') | |
1315 else: | |
1316 words = self._lines[ln][5:].split(',') | |
1317 for word in words: | |
1318 if word == '24': | |
1319 self._pageframe_length = 24 | |
1320 elif word == 'Base64': | |
1321 self._pageframe_encoding = 'base64' | |
1322 elif word == 'PageCount': | |
1323 self._has_pagecount = True | |
1324 elif self._lines[ln].startswith('RunID: '): | |
1325 self._run_id = self._lines[ln][7:].strip() | |
1326 elif (self._lines[ln].startswith('MMAP_LIST:') or | |
1327 self._lines[ln].startswith('GLOBAL_STATS:')): | |
1328 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. | |
1329 break | |
1330 else: | |
1331 pass | |
1332 ln += 1 | |
1333 | |
1334 def _parse_mmap_list(self): | |
1335 """Parses lines in self._lines as a mmap list.""" | |
1336 (ln, found) = skip_while( | |
1337 0, len(self._lines), | |
1338 lambda n: self._lines[n] != 'MMAP_LIST:\n') | |
1339 if not found: | |
1340 return {} | |
1341 | |
1342 ln += 1 | |
1343 self._map = {} | |
1344 current_vma = {} | |
1345 pageframe_list = [] | |
1346 while True: | |
1347 entry = proc_maps.ProcMaps.parse_line(self._lines[ln]) | |
1348 if entry: | |
1349 current_vma = {} | |
1350 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): | |
1351 for key, value in entry.as_dict().iteritems(): | |
1352 attr[key] = value | |
1353 current_vma[key] = value | |
1354 ln += 1 | |
1355 continue | |
1356 | |
1357 if self._lines[ln].startswith(' PF: '): | |
1358 for pageframe in self._lines[ln][5:].split(): | |
1359 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) | |
1360 ln += 1 | |
1361 continue | |
1362 | |
1363 matched = self._HOOK_PATTERN.match(self._lines[ln]) | |
1364 if not matched: | |
1365 break | |
1366 # 2: starting address | |
1367 # 5: end address | |
1368 # 7: hooked or unhooked | |
1369 # 8: additional information | |
1370 if matched.group(7) == 'hooked': | |
1371 submatched = self._HOOKED_PATTERN.match(matched.group(8)) | |
1372 if not submatched: | |
1373 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) | |
1374 elif matched.group(7) == 'unhooked': | |
1375 submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) | |
1376 if not submatched: | |
1377 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) | |
1378 else: | |
1379 assert matched.group(7) in ['hooked', 'unhooked'] | |
1380 | |
1381 submatched_dict = submatched.groupdict() | |
1382 region_info = { 'vma': current_vma } | |
1383 if submatched_dict.get('TYPE'): | |
1384 region_info['type'] = submatched_dict['TYPE'].strip() | |
1385 if submatched_dict.get('COMMITTED'): | |
1386 region_info['committed'] = int(submatched_dict['COMMITTED']) | |
1387 if submatched_dict.get('RESERVED'): | |
1388 region_info['reserved'] = int(submatched_dict['RESERVED']) | |
1389 if submatched_dict.get('BUCKETID'): | |
1390 region_info['bucket_id'] = int(submatched_dict['BUCKETID']) | |
1391 | |
1392 if matched.group(1) == '(': | |
1393 start = current_vma['begin'] | |
1394 else: | |
1395 start = int(matched.group(2), 16) | |
1396 if matched.group(4) == '(': | |
1397 end = current_vma['end'] | |
1398 else: | |
1399 end = int(matched.group(5), 16) | |
1400 | |
1401 if pageframe_list and pageframe_list[0].start_truncated: | |
1402 pageframe_list[0].set_size( | |
1403 pageframe_list[0].size - start % self._pagesize) | |
1404 if pageframe_list and pageframe_list[-1].end_truncated: | |
1405 pageframe_list[-1].set_size( | |
1406 pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) | |
1407 region_info['pageframe'] = pageframe_list | |
1408 pageframe_list = [] | |
1409 | |
1410 self._map[(start, end)] = (matched.group(7), region_info) | |
1411 ln += 1 | |
1412 | |
1413 def _extract_stacktrace_lines(self, line_number): | |
1414 """Extracts the position of stacktrace lines. | |
1415 | |
1416 Valid stacktrace lines are stored into self._stacktrace_lines. | |
1417 | |
1418 Args: | |
1419 line_number: A line number to start parsing in lines. | |
1420 | |
1421 Raises: | |
1422 ParsingException for invalid dump versions. | |
1423 """ | |
1424 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): | |
1425 (line_number, _) = skip_while( | |
1426 line_number, len(self._lines), | |
1427 lambda n: not self._lines[n].split()[0].isdigit()) | |
1428 stacktrace_start = line_number | |
1429 (line_number, _) = skip_while( | |
1430 line_number, len(self._lines), | |
1431 lambda n: self._check_stacktrace_line(self._lines[n])) | |
1432 self._stacktrace_lines = self._lines[stacktrace_start:line_number] | |
1433 | |
1434 elif self._version in DUMP_DEEP_OBSOLETE: | |
1435 raise ObsoleteDumpVersionException(self._version) | |
1436 | |
1437 else: | |
1438 raise InvalidDumpException('Invalid version: %s' % self._version) | |
1439 | |
1440 @staticmethod | |
1441 def _check_stacktrace_line(stacktrace_line): | |
1442 """Checks if a given stacktrace_line is valid as stacktrace. | |
1443 | |
1444 Args: | |
1445 stacktrace_line: A string to be checked. | |
1446 | |
1447 Returns: | |
1448 True if the given stacktrace_line is valid. | |
1449 """ | |
1450 words = stacktrace_line.split() | |
1451 if len(words) < BUCKET_ID + 1: | |
1452 return False | |
1453 if words[BUCKET_ID - 1] != '@': | |
1454 return False | |
1455 return True | |
1456 | |
1457 | |
1458 class DumpList(object): | |
1459 """Represents a sequence of heap profile dumps.""" | |
1460 | |
1461 def __init__(self, dump_list): | |
1462 self._dump_list = dump_list | |
1463 | |
1464 @staticmethod | |
1465 def load(path_list): | |
1466 LOGGER.info('Loading heap dump profiles.') | |
1467 dump_list = [] | |
1468 for path in path_list: | |
1469 dump_list.append(Dump.load(path, ' ')) | |
1470 return DumpList(dump_list) | |
1471 | |
1472 def __len__(self): | |
1473 return len(self._dump_list) | |
1474 | |
1475 def __iter__(self): | |
1476 for dump in self._dump_list: | |
1477 yield dump | |
1478 | |
1479 def __getitem__(self, index): | |
1480 return self._dump_list[index] | |
1481 | |
1482 | |
1483 class Unit(object): | |
1484 """Represents a minimum unit of memory usage categorization. | |
1485 | |
1486 It is supposed to be inherited for some different spaces like the entire | |
1487 virtual memory and malloc arena. Such different spaces are called "worlds" | |
1488 in dmprof. (For example, the "vm" world and the "malloc" world.) | |
1489 """ | |
1490 def __init__(self, unit_id, size): | |
1491 self._unit_id = unit_id | |
1492 self._size = size | |
1493 | |
1494 @property | |
1495 def unit_id(self): | |
1496 return self._unit_id | |
1497 | |
1498 @property | |
1499 def size(self): | |
1500 return self._size | |
1501 | |
1502 | |
1503 class VMUnit(Unit): | |
1504 """Represents a Unit for a memory region on virtual memory.""" | |
1505 def __init__(self, unit_id, committed, reserved, mmap, region, | |
1506 pageframe=None, group_pfn_counts=None): | |
1507 super(VMUnit, self).__init__(unit_id, committed) | |
1508 self._reserved = reserved | |
1509 self._mmap = mmap | |
1510 self._region = region | |
1511 self._pageframe = pageframe | |
1512 self._group_pfn_counts = group_pfn_counts | |
1513 | |
1514 @property | |
1515 def committed(self): | |
1516 return self._size | |
1517 | |
1518 @property | |
1519 def reserved(self): | |
1520 return self._reserved | |
1521 | |
1522 @property | |
1523 def mmap(self): | |
1524 return self._mmap | |
1525 | |
1526 @property | |
1527 def region(self): | |
1528 return self._region | |
1529 | |
1530 @property | |
1531 def pageframe(self): | |
1532 return self._pageframe | |
1533 | |
1534 @property | |
1535 def group_pfn_counts(self): | |
1536 return self._group_pfn_counts | |
1537 | |
1538 | |
1539 class MMapUnit(VMUnit): | |
1540 """Represents a Unit for a mmap'ed region.""" | |
1541 def __init__(self, unit_id, committed, reserved, region, bucket_set, | |
1542 pageframe=None, group_pfn_counts=None): | |
1543 super(MMapUnit, self).__init__(unit_id, committed, reserved, True, | |
1544 region, pageframe, group_pfn_counts) | |
1545 self._bucket_set = bucket_set | |
1546 | |
1547 def __repr__(self): | |
1548 return str(self.region) | |
1549 | |
1550 @property | |
1551 def bucket_set(self): | |
1552 return self._bucket_set | |
1553 | |
1554 | |
1555 class UnhookedUnit(VMUnit): | |
1556 """Represents a Unit for a non-mmap'ed memory region on virtual memory.""" | |
1557 def __init__(self, unit_id, committed, reserved, region, | |
1558 pageframe=None, group_pfn_counts=None): | |
1559 super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False, | |
1560 region, pageframe, group_pfn_counts) | |
1561 | |
1562 def __repr__(self): | |
1563 return str(self.region) | |
1564 | |
1565 | |
1566 class MallocUnit(Unit): | |
1567 """Represents a Unit for a malloc'ed memory block.""" | |
1568 def __init__(self, unit_id, size, alloc_count, free_count, bucket): | |
1569 super(MallocUnit, self).__init__(unit_id, size) | |
1570 self._bucket = bucket | |
1571 self._alloc_count = alloc_count | |
1572 self._free_count = free_count | |
1573 | |
1574 def __repr__(self): | |
1575 return str(self.bucket) | |
1576 | |
1577 @property | |
1578 def bucket(self): | |
1579 return self._bucket | |
1580 | |
1581 @property | |
1582 def alloc_count(self): | |
1583 return self._alloc_count | |
1584 | |
1585 @property | |
1586 def free_count(self): | |
1587 return self._free_count | |
1588 | |
1589 | |
1590 class UnitSet(object): | |
1591 """Represents an iterable set of Units.""" | |
1592 def __init__(self, world): | |
1593 self._units = {} | |
1594 self._world = world | |
1595 | |
1596 def __repr__(self): | |
1597 return str(self._units) | |
1598 | |
1599 def __iter__(self): | |
1600 for unit_id in sorted(self._units): | |
1601 yield self._units[unit_id] | |
1602 | |
1603 def append(self, unit, overwrite=False): | |
1604 if not overwrite and unit.unit_id in self._units: | |
1605 LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id)) | |
1606 self._units[unit.unit_id] = unit | |
1607 | |
1608 | |
1609 class AbstractRule(object): | |
1610 """An abstract class for rules to be matched with units.""" | |
1611 def __init__(self, dct): | |
1612 self._name = dct['name'] | |
1613 self._hidden = dct.get('hidden', False) | |
1614 self._subworlds = dct.get('subworlds', []) | |
1615 | |
1616 def match(self, unit): | |
1617 raise NotImplementedError() | |
1618 | |
1619 @property | |
1620 def name(self): | |
1621 return self._name | |
1622 | |
1623 @property | |
1624 def hidden(self): | |
1625 return self._hidden | |
1626 | |
1627 def iter_subworld(self): | |
1628 for subworld in self._subworlds: | |
1629 yield subworld | |
1630 | |
1631 | |
1632 class VMRule(AbstractRule): | |
1633 """Represents a Rule to match with virtual memory regions.""" | |
1634 def __init__(self, dct): | |
1635 super(VMRule, self).__init__(dct) | |
1636 self._backtrace_function = dct.get('backtrace_function', None) | |
1637 if self._backtrace_function: | |
1638 self._backtrace_function = re.compile(self._backtrace_function) | |
1639 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) | |
1640 if self._backtrace_sourcefile: | |
1641 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) | |
1642 self._mmap = dct.get('mmap', None) | |
1643 self._sharedwith = dct.get('sharedwith', []) | |
1644 self._mapped_pathname = dct.get('mapped_pathname', None) | |
1645 if self._mapped_pathname: | |
1646 self._mapped_pathname = re.compile(self._mapped_pathname) | |
1647 self._mapped_permission = dct.get('mapped_permission', None) | |
1648 if self._mapped_permission: | |
1649 self._mapped_permission = re.compile(self._mapped_permission) | |
1650 | |
1651 def __repr__(self): | |
1652 result = cStringIO.StringIO() | |
1653 result.write('{"%s"=>' % self._name) | |
1654 attributes = [] | |
1655 attributes.append('mmap: %s' % self._mmap) | |
1656 if self._backtrace_function: | |
1657 attributes.append('backtrace_function: "%s"' % | |
1658 self._backtrace_function.pattern) | |
1659 if self._sharedwith: | |
1660 attributes.append('sharedwith: "%s"' % self._sharedwith) | |
1661 if self._mapped_pathname: | |
1662 attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern) | |
1663 if self._mapped_permission: | |
1664 attributes.append('mapped_permission: "%s"' % | |
1665 self._mapped_permission.pattern) | |
1666 result.write('%s}' % ', '.join(attributes)) | |
1667 return result.getvalue() | |
1668 | |
1669 def match(self, unit): | |
1670 if unit.mmap: | |
1671 assert unit.region[0] == 'hooked' | |
1672 bucket = unit.bucket_set.get(unit.region[1]['bucket_id']) | |
1673 assert bucket | |
1674 assert bucket.allocator_type == 'mmap' | |
1675 | |
1676 stackfunction = bucket.symbolized_joined_stackfunction | |
1677 stacksourcefile = bucket.symbolized_joined_stacksourcefile | |
1678 | |
1679 # TODO(dmikurube): Support shared memory. | |
1680 sharedwith = None | |
1681 | |
1682 if self._mmap == False: # (self._mmap == None) should go through. | |
1683 return False | |
1684 if (self._backtrace_function and | |
1685 not self._backtrace_function.match(stackfunction)): | |
1686 return False | |
1687 if (self._backtrace_sourcefile and | |
1688 not self._backtrace_sourcefile.match(stacksourcefile)): | |
1689 return False | |
1690 if (self._mapped_pathname and | |
1691 not self._mapped_pathname.match(unit.region[1]['vma']['name'])): | |
1692 return False | |
1693 if (self._mapped_permission and | |
1694 not self._mapped_permission.match( | |
1695 unit.region[1]['vma']['readable'] + | |
1696 unit.region[1]['vma']['writable'] + | |
1697 unit.region[1]['vma']['executable'] + | |
1698 unit.region[1]['vma']['private'])): | |
1699 return False | |
1700 if (self._sharedwith and | |
1701 unit.pageframe and sharedwith not in self._sharedwith): | |
1702 return False | |
1703 | |
1704 return True | |
1705 | |
1706 else: | |
1707 assert unit.region[0] == 'unhooked' | |
1708 | |
1709 # TODO(dmikurube): Support shared memory. | |
1710 sharedwith = None | |
1711 | |
1712 if self._mmap == True: # (self._mmap == None) should go through. | |
1713 return False | |
1714 if (self._mapped_pathname and | |
1715 not self._mapped_pathname.match(unit.region[1]['vma']['name'])): | |
1716 return False | |
1717 if (self._mapped_permission and | |
1718 not self._mapped_permission.match( | |
1719 unit.region[1]['vma']['readable'] + | |
1720 unit.region[1]['vma']['writable'] + | |
1721 unit.region[1]['vma']['executable'] + | |
1722 unit.region[1]['vma']['private'])): | |
1723 return False | |
1724 if (self._sharedwith and | |
1725 unit.pageframe and sharedwith not in self._sharedwith): | |
1726 return False | |
1727 | |
1728 return True | |
1729 | |
1730 | |
1731 class MallocRule(AbstractRule): | |
1732 """Represents a Rule to match with malloc'ed blocks.""" | |
1733 def __init__(self, dct): | |
1734 super(MallocRule, self).__init__(dct) | |
1735 self._backtrace_function = dct.get('backtrace_function', None) | |
1736 if self._backtrace_function: | |
1737 self._backtrace_function = re.compile(self._backtrace_function) | |
1738 self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None) | |
1739 if self._backtrace_sourcefile: | |
1740 self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile) | |
1741 self._typeinfo = dct.get('typeinfo', None) | |
1742 if self._typeinfo: | |
1743 self._typeinfo = re.compile(self._typeinfo) | |
1744 | |
1745 def __repr__(self): | |
1746 result = cStringIO.StringIO() | |
1747 result.write('{"%s"=>' % self._name) | |
1748 attributes = [] | |
1749 if self._backtrace_function: | |
1750 attributes.append('backtrace_function: "%s"' % self._backtrace_function) | |
1751 if self._typeinfo: | |
1752 attributes.append('typeinfo: "%s"' % self._typeinfo) | |
1753 result.write('%s}' % ', '.join(attributes)) | |
1754 return result.getvalue() | |
1755 | |
1756 def match(self, unit): | |
1757 assert unit.bucket.allocator_type == 'malloc' | |
1758 | |
1759 stackfunction = unit.bucket.symbolized_joined_stackfunction | |
1760 stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile | |
1761 typeinfo = unit.bucket.symbolized_typeinfo | |
1762 if typeinfo.startswith('0x'): | |
1763 typeinfo = unit.bucket.typeinfo_name | |
1764 | |
1765 return ((not self._backtrace_function or | |
1766 self._backtrace_function.match(stackfunction)) and | |
1767 (not self._backtrace_sourcefile or | |
1768 self._backtrace_sourcefile.match(stacksourcefile)) and | |
1769 (not self._typeinfo or self._typeinfo.match(typeinfo))) | |
1770 | |
1771 | |
1772 class NoBucketMallocRule(MallocRule): | |
1773 """Represents a Rule that small ignorable units match with.""" | |
1774 def __init__(self): | |
1775 super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'}) | |
1776 self._no_bucket = True | |
1777 | |
1778 @property | |
1779 def no_bucket(self): | |
1780 return self._no_bucket | |
1781 | |
1782 | |
1783 class AbstractSorter(object): | |
1784 """An abstract class for classifying Units with a set of Rules.""" | |
1785 def __init__(self, dct): | |
1786 self._type = 'sorter' | |
1787 self._version = dct['version'] | |
1788 self._world = dct['world'] | |
1789 self._name = dct['name'] | |
1790 self._order = dct['order'] | |
1791 | |
1792 self._rules = [] | |
1793 for rule in dct['rules']: | |
1794 if dct['world'] == 'vm': | |
1795 self._rules.append(VMRule(rule)) | |
1796 elif dct['world'] == 'malloc': | |
1797 self._rules.append(MallocRule(rule)) | |
1798 else: | |
1799 LOGGER.error('Unknown sorter world type') | |
1800 | |
1801 def __repr__(self): | |
1802 result = cStringIO.StringIO() | |
1803 result.write('world=%s' % self._world) | |
1804 result.write('order=%s' % self._order) | |
1805 result.write('rules:') | |
1806 for rule in self._rules: | |
1807 result.write(' %s' % rule) | |
1808 return result.getvalue() | |
1809 | |
1810 @staticmethod | |
1811 def load(filename): | |
1812 with open(filename) as sorter_f: | |
1813 sorter_dict = json.load(sorter_f) | |
1814 if sorter_dict['world'] == 'vm': | |
1815 return VMSorter(sorter_dict) | |
1816 elif sorter_dict['world'] == 'malloc': | |
1817 return MallocSorter(sorter_dict) | |
1818 else: | |
1819 LOGGER.error('Unknown sorter world type') | |
1820 return None | |
1821 | |
1822 @property | |
1823 def world(self): | |
1824 return self._world | |
1825 | |
1826 @property | |
1827 def name(self): | |
1828 return self._name | |
1829 | |
1830 def find(self, unit): | |
1831 raise NotImplementedError() | |
1832 | |
1833 def find_rule(self, name): | |
1834 """Finds a rule whose name is |name|. """ | |
1835 for rule in self._rules: | |
1836 if rule.name == name: | |
1837 return rule | |
1838 return None | |
1839 | |
1840 | |
1841 class VMSorter(AbstractSorter): | |
1842 """Represents a Sorter for memory regions on virtual memory.""" | |
1843 def __init__(self, dct): | |
1844 assert dct['world'] == 'vm' | |
1845 super(VMSorter, self).__init__(dct) | |
1846 | |
1847 def find(self, unit): | |
1848 for rule in self._rules: | |
1849 if rule.match(unit): | |
1850 return rule | |
1851 assert False | |
1852 | |
1853 | |
1854 class MallocSorter(AbstractSorter): | |
1855 """Represents a Sorter for malloc'ed blocks.""" | |
1856 def __init__(self, dct): | |
1857 assert dct['world'] == 'malloc' | |
1858 super(MallocSorter, self).__init__(dct) | |
1859 self._no_bucket_rule = NoBucketMallocRule() | |
1860 | |
1861 def find(self, unit): | |
1862 if not unit.bucket: | |
1863 return self._no_bucket_rule | |
1864 assert unit.bucket.allocator_type == 'malloc' | |
1865 | |
1866 if unit.bucket.component_cache: | |
1867 return unit.bucket.component_cache | |
1868 | |
1869 for rule in self._rules: | |
1870 if rule.match(unit): | |
1871 unit.bucket.component_cache = rule | |
1872 return rule | |
1873 assert False | |
1874 | |
1875 | |
1876 class SorterSet(object): | |
1877 """Represents an iterable set of Sorters.""" | |
1878 def __init__(self, additional=None, default=None): | |
1879 if not additional: | |
1880 additional = [] | |
1881 if not default: | |
1882 default = DEFAULT_SORTERS | |
1883 self._sorters = {} | |
1884 for filename in default + additional: | |
1885 sorter = AbstractSorter.load(filename) | |
1886 if sorter.world not in self._sorters: | |
1887 self._sorters[sorter.world] = [] | |
1888 self._sorters[sorter.world].append(sorter) | |
1889 | |
1890 def __repr__(self): | |
1891 result = cStringIO.StringIO() | |
1892 result.write(self._sorters) | |
1893 return result.getvalue() | |
1894 | |
1895 def __iter__(self): | |
1896 for sorters in self._sorters.itervalues(): | |
1897 for sorter in sorters: | |
1898 yield sorter | |
1899 | |
1900 def iter_world(self, world): | |
1901 for sorter in self._sorters.get(world, []): | |
1902 yield sorter | |
1903 | |
1904 | |
1905 class Command(object): | |
1906 """Subclasses are a subcommand for this executable. | |
1907 | |
1908 See COMMANDS in main(). | |
1909 """ | |
1910 _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp'] | |
1911 | |
1912 def __init__(self, usage): | |
1913 self._parser = optparse.OptionParser(usage) | |
1914 | |
1915 @staticmethod | |
1916 def load_basic_files( | |
1917 dump_path, multiple, no_dump=False, alternative_dirs=None): | |
1918 prefix = Command._find_prefix(dump_path) | |
1919 # If the target process is estimated to be working on Android, converts | |
1920 # a path in the Android device to a path estimated to be corresponding in | |
1921 # the host. Use --alternative-dirs to specify the conversion manually. | |
1922 if not alternative_dirs: | |
1923 alternative_dirs = Command._estimate_alternative_dirs(prefix) | |
1924 if alternative_dirs: | |
1925 for device, host in alternative_dirs.iteritems(): | |
1926 LOGGER.info('Assuming %s on device as %s on host' % (device, host)) | |
1927 symbol_data_sources = SymbolDataSources(prefix, alternative_dirs) | |
1928 symbol_data_sources.prepare() | |
1929 bucket_set = BucketSet() | |
1930 bucket_set.load(prefix) | |
1931 if not no_dump: | |
1932 if multiple: | |
1933 dump_list = DumpList.load(Command._find_all_dumps(dump_path)) | |
1934 else: | |
1935 dump = Dump.load(dump_path) | |
1936 symbol_mapping_cache = SymbolMappingCache() | |
1937 with open(prefix + '.cache.function', 'a+') as cache_f: | |
1938 symbol_mapping_cache.update( | |
1939 FUNCTION_SYMBOLS, bucket_set, | |
1940 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f) | |
1941 with open(prefix + '.cache.typeinfo', 'a+') as cache_f: | |
1942 symbol_mapping_cache.update( | |
1943 TYPEINFO_SYMBOLS, bucket_set, | |
1944 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f) | |
1945 with open(prefix + '.cache.sourcefile', 'a+') as cache_f: | |
1946 symbol_mapping_cache.update( | |
1947 SOURCEFILE_SYMBOLS, bucket_set, | |
1948 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f) | |
1949 bucket_set.symbolize(symbol_mapping_cache) | |
1950 if no_dump: | |
1951 return bucket_set | |
1952 elif multiple: | |
1953 return (bucket_set, dump_list) | |
1954 else: | |
1955 return (bucket_set, dump) | |
1956 | |
1957 @staticmethod | |
1958 def _find_prefix(path): | |
1959 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path) | |
1960 | |
1961 @staticmethod | |
1962 def _estimate_alternative_dirs(prefix): | |
1963 """Estimates a path in host from a corresponding path in target device. | |
1964 | |
1965 For Android, dmprof.py should find symbol information from binaries in | |
1966 the host instead of the Android device because dmprof.py doesn't run on | |
1967 the Android device. This method estimates a path in the host | |
1968 corresponding to a path in the Android device. | |
1969 | |
1970 Returns: | |
1971 A dict that maps a path in the Android device to a path in the host. | |
1972 If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it | |
1973 assumes the process was running on Android and maps the path to | |
1974 "out/Debug/lib" in the Chromium directory. An empty dict is returned | |
1975 unless Android. | |
1976 """ | |
1977 device_lib_path_candidates = set() | |
1978 | |
1979 with open(prefix + '.maps') as maps_f: | |
1980 maps = proc_maps.ProcMaps.load(maps_f) | |
1981 for entry in maps: | |
1982 name = entry.as_dict()['name'] | |
1983 if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]): | |
1984 device_lib_path_candidates.add(os.path.dirname(name)) | |
1985 | |
1986 if len(device_lib_path_candidates) == 1: | |
1987 return {device_lib_path_candidates.pop(): os.path.join( | |
1988 CHROME_SRC_PATH, 'out', 'Debug', 'lib')} | |
1989 else: | |
1990 return {} | |
1991 | |
1992 @staticmethod | |
1993 def _find_all_dumps(dump_path): | |
1994 prefix = Command._find_prefix(dump_path) | |
1995 dump_path_list = [dump_path] | |
1996 | |
1997 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5]) | |
1998 n += 1 | |
1999 skipped = 0 | |
2000 while True: | |
2001 p = '%s.%04d.heap' % (prefix, n) | |
2002 if os.path.exists(p) and os.stat(p).st_size: | |
2003 dump_path_list.append(p) | |
2004 else: | |
2005 if skipped > 10: | |
2006 break | |
2007 skipped += 1 | |
2008 n += 1 | |
2009 | |
2010 return dump_path_list | |
2011 | |
2012 @staticmethod | |
2013 def _find_all_buckets(dump_path): | |
2014 prefix = Command._find_prefix(dump_path) | |
2015 bucket_path_list = [] | |
2016 | |
2017 n = 0 | |
2018 while True: | |
2019 path = '%s.%04d.buckets' % (prefix, n) | |
2020 if not os.path.exists(path): | |
2021 if n > 10: | |
2022 break | |
2023 n += 1 | |
2024 continue | |
2025 bucket_path_list.append(path) | |
2026 n += 1 | |
2027 | |
2028 return bucket_path_list | |
2029 | |
2030 def _parse_args(self, sys_argv, required): | |
2031 options, args = self._parser.parse_args(sys_argv) | |
2032 if len(args) < required + 1: | |
2033 self._parser.error('needs %d argument(s).\n' % required) | |
2034 return None | |
2035 return (options, args) | |
2036 | |
2037 @staticmethod | |
2038 def _parse_policy_list(options_policy): | |
2039 if options_policy: | |
2040 return options_policy.split(',') | |
2041 else: | |
2042 return None | |
2043 | |
2044 | |
2045 class BucketsCommand(Command): | |
2046 def __init__(self): | |
2047 super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>') | |
2048 | |
2049 def do(self, sys_argv, out=sys.stdout): | |
2050 _, args = self._parse_args(sys_argv, 1) | |
2051 dump_path = args[1] | |
2052 bucket_set = Command.load_basic_files(dump_path, True, True) | |
2053 | |
2054 BucketsCommand._output(bucket_set, out) | |
2055 return 0 | |
2056 | |
2057 @staticmethod | |
2058 def _output(bucket_set, out): | |
2059 """Prints all buckets with resolving symbols. | |
2060 | |
2061 Args: | |
2062 bucket_set: A BucketSet object. | |
2063 out: An IO object to output. | |
2064 """ | |
2065 for bucket_id, bucket in sorted(bucket_set): | |
2066 out.write('%d: %s\n' % (bucket_id, bucket)) | |
2067 | |
2068 | |
2069 class StacktraceCommand(Command): | |
2070 def __init__(self): | |
2071 super(StacktraceCommand, self).__init__( | |
2072 'Usage: %prog stacktrace <dump>') | |
2073 | |
2074 def do(self, sys_argv): | |
2075 _, args = self._parse_args(sys_argv, 1) | |
2076 dump_path = args[1] | |
2077 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
2078 | |
2079 StacktraceCommand._output(dump, bucket_set, sys.stdout) | |
2080 return 0 | |
2081 | |
2082 @staticmethod | |
2083 def _output(dump, bucket_set, out): | |
2084 """Outputs a given stacktrace. | |
2085 | |
2086 Args: | |
2087 bucket_set: A BucketSet object. | |
2088 out: A file object to output. | |
2089 """ | |
2090 for line in dump.iter_stacktrace: | |
2091 words = line.split() | |
2092 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2093 if not bucket: | |
2094 continue | |
2095 for i in range(0, BUCKET_ID - 1): | |
2096 out.write(words[i] + ' ') | |
2097 for frame in bucket.symbolized_stackfunction: | |
2098 out.write(frame + ' ') | |
2099 out.write('\n') | |
2100 | |
2101 | |
2102 class PolicyCommands(Command): | |
2103 def __init__(self, command): | |
2104 super(PolicyCommands, self).__init__( | |
2105 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' % | |
2106 command) | |
2107 self._parser.add_option('-p', '--policy', type='string', dest='policy', | |
2108 help='profile with POLICY', metavar='POLICY') | |
2109 self._parser.add_option('--alternative-dirs', dest='alternative_dirs', | |
2110 metavar='/path/on/target@/path/on/host[:...]', | |
2111 help='Read files in /path/on/host/ instead of ' | |
2112 'files in /path/on/target/.') | |
2113 | |
2114 def _set_up(self, sys_argv): | |
2115 options, args = self._parse_args(sys_argv, 1) | |
2116 dump_path = args[1] | |
2117 shared_first_dump_paths = args[2:] | |
2118 alternative_dirs_dict = {} | |
2119 if options.alternative_dirs: | |
2120 for alternative_dir_pair in options.alternative_dirs.split(':'): | |
2121 target_path, host_path = alternative_dir_pair.split('@', 1) | |
2122 alternative_dirs_dict[target_path] = host_path | |
2123 (bucket_set, dumps) = Command.load_basic_files( | |
2124 dump_path, True, alternative_dirs=alternative_dirs_dict) | |
2125 | |
2126 pfn_counts_dict = {} | |
2127 for shared_first_dump_path in shared_first_dump_paths: | |
2128 shared_dumps = Command._find_all_dumps(shared_first_dump_path) | |
2129 for shared_dump in shared_dumps: | |
2130 pfn_counts = PFNCounts.load(shared_dump) | |
2131 if pfn_counts.pid not in pfn_counts_dict: | |
2132 pfn_counts_dict[pfn_counts.pid] = [] | |
2133 pfn_counts_dict[pfn_counts.pid].append(pfn_counts) | |
2134 | |
2135 policy_set = PolicySet.load(Command._parse_policy_list(options.policy)) | |
2136 return policy_set, dumps, pfn_counts_dict, bucket_set | |
2137 | |
2138 @staticmethod | |
2139 def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time): | |
2140 """Aggregates the total memory size of each component. | |
2141 | |
2142 Iterate through all stacktraces and attribute them to one of the components | |
2143 based on the policy. It is important to apply policy in right order. | |
2144 | |
2145 Args: | |
2146 dump: A Dump object. | |
2147 pfn_counts_dict: A dict mapping a pid to a list of PFNCounts. | |
2148 policy: A Policy object. | |
2149 bucket_set: A BucketSet object. | |
2150 first_dump_time: An integer representing time when the first dump is | |
2151 dumped. | |
2152 | |
2153 Returns: | |
2154 A dict mapping components and their corresponding sizes. | |
2155 """ | |
2156 LOGGER.info(' %s' % dump.path) | |
2157 all_pfn_dict = {} | |
2158 if pfn_counts_dict: | |
2159 LOGGER.info(' shared with...') | |
2160 for pid, pfnset_list in pfn_counts_dict.iteritems(): | |
2161 closest_pfnset_index = None | |
2162 closest_pfnset_difference = 1024.0 | |
2163 for index, pfnset in enumerate(pfnset_list): | |
2164 time_difference = pfnset.time - dump.time | |
2165 if time_difference >= 3.0: | |
2166 break | |
2167 elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or | |
2168 (0.0 <= time_difference and time_difference < 3.0)): | |
2169 closest_pfnset_index = index | |
2170 closest_pfnset_difference = time_difference | |
2171 elif time_difference < 0.0 and pfnset.reason == 'Exiting': | |
2172 closest_pfnset_index = None | |
2173 break | |
2174 if closest_pfnset_index: | |
2175 for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn: | |
2176 all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count | |
2177 LOGGER.info(' %s (time difference = %f)' % | |
2178 (pfnset_list[closest_pfnset_index].path, | |
2179 closest_pfnset_difference)) | |
2180 else: | |
2181 LOGGER.info(' (no match with pid:%d)' % pid) | |
2182 | |
2183 sizes = dict((c, 0) for c in policy.components) | |
2184 | |
2185 PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes) | |
2186 verify_global_stats = PolicyCommands._accumulate_maps( | |
2187 dump, all_pfn_dict, policy, bucket_set, sizes) | |
2188 | |
2189 # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed. | |
2190 # http://crbug.com/245603. | |
2191 for verify_key, verify_value in verify_global_stats.iteritems(): | |
2192 dump_value = dump.global_stat('%s_committed' % verify_key) | |
2193 if dump_value != verify_value: | |
2194 LOGGER.warn('%25s: %12d != %d (%d)' % ( | |
2195 verify_key, dump_value, verify_value, dump_value - verify_value)) | |
2196 | |
2197 sizes['mmap-no-log'] = ( | |
2198 dump.global_stat('profiled-mmap_committed') - | |
2199 sizes['mmap-total-log']) | |
2200 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed') | |
2201 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual') | |
2202 | |
2203 sizes['tc-no-log'] = ( | |
2204 dump.global_stat('profiled-malloc_committed') - | |
2205 sizes['tc-total-log']) | |
2206 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed') | |
2207 sizes['tc-unused'] = ( | |
2208 sizes['mmap-tcmalloc'] - | |
2209 dump.global_stat('profiled-malloc_committed')) | |
2210 if sizes['tc-unused'] < 0: | |
2211 LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' % | |
2212 sizes['tc-unused']) | |
2213 sizes['tc-unused'] = 0 | |
2214 sizes['tc-total'] = sizes['mmap-tcmalloc'] | |
2215 | |
2216 # TODO(dmikurube): global_stat will be deprecated. | |
2217 # See http://crbug.com/245603. | |
2218 for key, value in { | |
2219 'total': 'total_committed', | |
2220 'filemapped': 'file_committed', | |
2221 'absent': 'absent_committed', | |
2222 'file-exec': 'file-exec_committed', | |
2223 'file-nonexec': 'file-nonexec_committed', | |
2224 'anonymous': 'anonymous_committed', | |
2225 'stack': 'stack_committed', | |
2226 'other': 'other_committed', | |
2227 'unhooked-absent': 'nonprofiled-absent_committed', | |
2228 'total-vm': 'total_virtual', | |
2229 'filemapped-vm': 'file_virtual', | |
2230 'anonymous-vm': 'anonymous_virtual', | |
2231 'other-vm': 'other_virtual' }.iteritems(): | |
2232 if key in sizes: | |
2233 sizes[key] = dump.global_stat(value) | |
2234 | |
2235 if 'mustbezero' in sizes: | |
2236 removed_list = ( | |
2237 'profiled-mmap_committed', | |
2238 'nonprofiled-absent_committed', | |
2239 'nonprofiled-anonymous_committed', | |
2240 'nonprofiled-file-exec_committed', | |
2241 'nonprofiled-file-nonexec_committed', | |
2242 'nonprofiled-stack_committed', | |
2243 'nonprofiled-other_committed') | |
2244 sizes['mustbezero'] = ( | |
2245 dump.global_stat('total_committed') - | |
2246 sum(dump.global_stat(removed) for removed in removed_list)) | |
2247 if 'total-exclude-profiler' in sizes: | |
2248 sizes['total-exclude-profiler'] = ( | |
2249 dump.global_stat('total_committed') - | |
2250 (sizes['mmap-profiler'] + sizes['mmap-type-profiler'])) | |
2251 if 'hour' in sizes: | |
2252 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0 | |
2253 if 'minute' in sizes: | |
2254 sizes['minute'] = (dump.time - first_dump_time) / 60.0 | |
2255 if 'second' in sizes: | |
2256 sizes['second'] = dump.time - first_dump_time | |
2257 | |
2258 return sizes | |
2259 | |
2260 @staticmethod | |
2261 def _accumulate_malloc(dump, policy, bucket_set, sizes): | |
2262 for line in dump.iter_stacktrace: | |
2263 words = line.split() | |
2264 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2265 if not bucket or bucket.allocator_type == 'malloc': | |
2266 component_match = policy.find_malloc(bucket) | |
2267 elif bucket.allocator_type == 'mmap': | |
2268 continue | |
2269 else: | |
2270 assert False | |
2271 sizes[component_match] += int(words[COMMITTED]) | |
2272 | |
2273 assert not component_match.startswith('mmap-') | |
2274 if component_match.startswith('tc-'): | |
2275 sizes['tc-total-log'] += int(words[COMMITTED]) | |
2276 else: | |
2277 sizes['other-total-log'] += int(words[COMMITTED]) | |
2278 | |
2279 @staticmethod | |
2280 def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes): | |
2281 # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed. | |
2282 # http://crbug.com/245603. | |
2283 global_stats = { | |
2284 'total': 0, | |
2285 'file-exec': 0, | |
2286 'file-nonexec': 0, | |
2287 'anonymous': 0, | |
2288 'stack': 0, | |
2289 'other': 0, | |
2290 'nonprofiled-file-exec': 0, | |
2291 'nonprofiled-file-nonexec': 0, | |
2292 'nonprofiled-anonymous': 0, | |
2293 'nonprofiled-stack': 0, | |
2294 'nonprofiled-other': 0, | |
2295 'profiled-mmap': 0, | |
2296 } | |
2297 | |
2298 for key, value in dump.iter_map: | |
2299 # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed. | |
2300 # It's temporary verification code for transition described in | |
2301 # http://crbug.com/245603. | |
2302 committed = 0 | |
2303 if 'committed' in value[1]: | |
2304 committed = value[1]['committed'] | |
2305 global_stats['total'] += committed | |
2306 key = 'other' | |
2307 name = value[1]['vma']['name'] | |
2308 if name.startswith('/'): | |
2309 if value[1]['vma']['executable'] == 'x': | |
2310 key = 'file-exec' | |
2311 else: | |
2312 key = 'file-nonexec' | |
2313 elif name == '[stack]': | |
2314 key = 'stack' | |
2315 elif name == '': | |
2316 key = 'anonymous' | |
2317 global_stats[key] += committed | |
2318 if value[0] == 'unhooked': | |
2319 global_stats['nonprofiled-' + key] += committed | |
2320 if value[0] == 'hooked': | |
2321 global_stats['profiled-mmap'] += committed | |
2322 | |
2323 if value[0] == 'unhooked': | |
2324 if pfn_dict and dump.pageframe_length: | |
2325 for pageframe in value[1]['pageframe']: | |
2326 component_match = policy.find_unhooked(value, pageframe, pfn_dict) | |
2327 sizes[component_match] += pageframe.size | |
2328 else: | |
2329 component_match = policy.find_unhooked(value) | |
2330 sizes[component_match] += int(value[1]['committed']) | |
2331 elif value[0] == 'hooked': | |
2332 if pfn_dict and dump.pageframe_length: | |
2333 for pageframe in value[1]['pageframe']: | |
2334 component_match, _ = policy.find_mmap( | |
2335 value, bucket_set, pageframe, pfn_dict) | |
2336 sizes[component_match] += pageframe.size | |
2337 assert not component_match.startswith('tc-') | |
2338 if component_match.startswith('mmap-'): | |
2339 sizes['mmap-total-log'] += pageframe.size | |
2340 else: | |
2341 sizes['other-total-log'] += pageframe.size | |
2342 else: | |
2343 component_match, _ = policy.find_mmap(value, bucket_set) | |
2344 sizes[component_match] += int(value[1]['committed']) | |
2345 if component_match.startswith('mmap-'): | |
2346 sizes['mmap-total-log'] += int(value[1]['committed']) | |
2347 else: | |
2348 sizes['other-total-log'] += int(value[1]['committed']) | |
2349 else: | |
2350 LOGGER.error('Unrecognized mapping status: %s' % value[0]) | |
2351 | |
2352 return global_stats | |
2353 | |
2354 | |
2355 class CSVCommand(PolicyCommands): | |
2356 def __init__(self): | |
2357 super(CSVCommand, self).__init__('csv') | |
2358 | |
2359 def do(self, sys_argv): | |
2360 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
2361 return CSVCommand._output( | |
2362 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
2363 | |
2364 @staticmethod | |
2365 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
2366 max_components = 0 | |
2367 for label in policy_set: | |
2368 max_components = max(max_components, len(policy_set[label].components)) | |
2369 | |
2370 for label in sorted(policy_set): | |
2371 components = policy_set[label].components | |
2372 if len(policy_set) > 1: | |
2373 out.write('%s%s\n' % (label, ',' * (max_components - 1))) | |
2374 out.write('%s%s\n' % ( | |
2375 ','.join(components), ',' * (max_components - len(components)))) | |
2376 | |
2377 LOGGER.info('Applying a policy %s to...' % label) | |
2378 for dump in dumps: | |
2379 component_sizes = PolicyCommands._apply_policy( | |
2380 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) | |
2381 s = [] | |
2382 for c in components: | |
2383 if c in ('hour', 'minute', 'second'): | |
2384 s.append('%05.5f' % (component_sizes[c])) | |
2385 else: | |
2386 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0)) | |
2387 out.write('%s%s\n' % ( | |
2388 ','.join(s), ',' * (max_components - len(components)))) | |
2389 | |
2390 bucket_set.clear_component_cache() | |
2391 | |
2392 return 0 | |
2393 | |
2394 | |
2395 class JSONCommand(PolicyCommands): | |
2396 def __init__(self): | |
2397 super(JSONCommand, self).__init__('json') | |
2398 | |
2399 def do(self, sys_argv): | |
2400 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
2401 return JSONCommand._output( | |
2402 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
2403 | |
2404 @staticmethod | |
2405 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
2406 json_base = { | |
2407 'version': 'JSON_DEEP_2', | |
2408 'policies': {}, | |
2409 } | |
2410 | |
2411 for label in sorted(policy_set): | |
2412 json_base['policies'][label] = { | |
2413 'legends': policy_set[label].components, | |
2414 'snapshots': [], | |
2415 } | |
2416 | |
2417 LOGGER.info('Applying a policy %s to...' % label) | |
2418 for dump in dumps: | |
2419 component_sizes = PolicyCommands._apply_policy( | |
2420 dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time) | |
2421 component_sizes['dump_path'] = dump.path | |
2422 component_sizes['dump_time'] = datetime.datetime.fromtimestamp( | |
2423 dump.time).strftime('%Y-%m-%d %H:%M:%S') | |
2424 json_base['policies'][label]['snapshots'].append(component_sizes) | |
2425 | |
2426 bucket_set.clear_component_cache() | |
2427 | |
2428 json.dump(json_base, out, indent=2, sort_keys=True) | |
2429 | |
2430 return 0 | |
2431 | |
2432 | |
2433 class ListCommand(PolicyCommands): | |
2434 def __init__(self): | |
2435 super(ListCommand, self).__init__('list') | |
2436 | |
2437 def do(self, sys_argv): | |
2438 policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv) | |
2439 return ListCommand._output( | |
2440 policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout) | |
2441 | |
2442 @staticmethod | |
2443 def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out): | |
2444 for label in sorted(policy_set): | |
2445 LOGGER.info('Applying a policy %s to...' % label) | |
2446 for dump in dumps: | |
2447 component_sizes = PolicyCommands._apply_policy( | |
2448 dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time) | |
2449 out.write('%s for %s:\n' % (label, dump.path)) | |
2450 for c in policy_set[label].components: | |
2451 if c in ['hour', 'minute', 'second']: | |
2452 out.write('%40s %12.3f\n' % (c, component_sizes[c])) | |
2453 else: | |
2454 out.write('%40s %12d\n' % (c, component_sizes[c])) | |
2455 | |
2456 bucket_set.clear_component_cache() | |
2457 | |
2458 return 0 | |
2459 | |
2460 | |
2461 class MapCommand(Command): | |
2462 def __init__(self): | |
2463 super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>') | |
2464 | |
2465 def do(self, sys_argv, out=sys.stdout): | |
2466 _, args = self._parse_args(sys_argv, 2) | |
2467 dump_path = args[1] | |
2468 target_policy = args[2] | |
2469 (bucket_set, dumps) = Command.load_basic_files(dump_path, True) | |
2470 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
2471 | |
2472 MapCommand._output(dumps, bucket_set, policy_set[target_policy], out) | |
2473 return 0 | |
2474 | |
2475 @staticmethod | |
2476 def _output(dumps, bucket_set, policy, out): | |
2477 """Prints all stacktraces in a given component of given depth. | |
2478 | |
2479 Args: | |
2480 dumps: A list of Dump objects. | |
2481 bucket_set: A BucketSet object. | |
2482 policy: A Policy object. | |
2483 out: An IO object to output. | |
2484 """ | |
2485 max_dump_count = 0 | |
2486 range_dict = ExclusiveRangeDict(ListAttribute) | |
2487 for dump in dumps: | |
2488 max_dump_count = max(max_dump_count, dump.count) | |
2489 for key, value in dump.iter_map: | |
2490 for begin, end, attr in range_dict.iter_range(key[0], key[1]): | |
2491 attr[dump.count] = value | |
2492 | |
2493 max_dump_count_digit = len(str(max_dump_count)) | |
2494 for begin, end, attr in range_dict.iter_range(): | |
2495 out.write('%x-%x\n' % (begin, end)) | |
2496 if len(attr) < max_dump_count: | |
2497 attr[max_dump_count] = None | |
2498 for index, value in enumerate(attr[1:]): | |
2499 out.write(' #%0*d: ' % (max_dump_count_digit, index + 1)) | |
2500 if not value: | |
2501 out.write('None\n') | |
2502 elif value[0] == 'hooked': | |
2503 component_match, _ = policy.find_mmap(value, bucket_set) | |
2504 out.write('%s @ %d\n' % (component_match, value[1]['bucket_id'])) | |
2505 else: | |
2506 component_match = policy.find_unhooked(value) | |
2507 region_info = value[1] | |
2508 size = region_info['committed'] | |
2509 out.write('%s [%d bytes] %s%s%s%s %s\n' % ( | |
2510 component_match, size, value[1]['vma']['readable'], | |
2511 value[1]['vma']['writable'], value[1]['vma']['executable'], | |
2512 value[1]['vma']['private'], value[1]['vma']['name'])) | |
2513 | |
2514 | |
2515 class ExpandCommand(Command): | |
2516 def __init__(self): | |
2517 super(ExpandCommand, self).__init__( | |
2518 'Usage: %prog expand <dump> <policy> <component> <depth>') | |
2519 | |
2520 def do(self, sys_argv): | |
2521 _, args = self._parse_args(sys_argv, 4) | |
2522 dump_path = args[1] | |
2523 target_policy = args[2] | |
2524 component_name = args[3] | |
2525 depth = args[4] | |
2526 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
2527 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
2528 | |
2529 ExpandCommand._output(dump, policy_set[target_policy], bucket_set, | |
2530 component_name, int(depth), sys.stdout) | |
2531 return 0 | |
2532 | |
2533 @staticmethod | |
2534 def _output(dump, policy, bucket_set, component_name, depth, out): | |
2535 """Prints all stacktraces in a given component of given depth. | |
2536 | |
2537 Args: | |
2538 dump: A Dump object. | |
2539 policy: A Policy object. | |
2540 bucket_set: A BucketSet object. | |
2541 component_name: A name of component for filtering. | |
2542 depth: An integer representing depth to be printed. | |
2543 out: An IO object to output. | |
2544 """ | |
2545 sizes = {} | |
2546 | |
2547 ExpandCommand._accumulate( | |
2548 dump, policy, bucket_set, component_name, depth, sizes) | |
2549 | |
2550 sorted_sizes_list = sorted( | |
2551 sizes.iteritems(), key=(lambda x: x[1]), reverse=True) | |
2552 total = 0 | |
2553 # TODO(dmikurube): Better formatting. | |
2554 for size_pair in sorted_sizes_list: | |
2555 out.write('%10d %s\n' % (size_pair[1], size_pair[0])) | |
2556 total += size_pair[1] | |
2557 LOGGER.info('total: %d\n' % total) | |
2558 | |
2559 @staticmethod | |
2560 def _add_size(precedence, bucket, depth, committed, sizes): | |
2561 stacktrace_sequence = precedence | |
2562 for function, sourcefile in zip( | |
2563 bucket.symbolized_stackfunction[ | |
2564 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)], | |
2565 bucket.symbolized_stacksourcefile[ | |
2566 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]): | |
2567 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile) | |
2568 if not stacktrace_sequence in sizes: | |
2569 sizes[stacktrace_sequence] = 0 | |
2570 sizes[stacktrace_sequence] += committed | |
2571 | |
2572 @staticmethod | |
2573 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes): | |
2574 rule = policy.find_rule(component_name) | |
2575 if not rule: | |
2576 pass | |
2577 elif rule.allocator_type == 'malloc': | |
2578 for line in dump.iter_stacktrace: | |
2579 words = line.split() | |
2580 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2581 if not bucket or bucket.allocator_type == 'malloc': | |
2582 component_match = policy.find_malloc(bucket) | |
2583 elif bucket.allocator_type == 'mmap': | |
2584 continue | |
2585 else: | |
2586 assert False | |
2587 if component_match == component_name: | |
2588 precedence = '' | |
2589 precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT]) | |
2590 precedence += '(free=%d) ' % int(words[FREE_COUNT]) | |
2591 if bucket.typeinfo: | |
2592 precedence += '(type=%s) ' % bucket.symbolized_typeinfo | |
2593 precedence += '(type.name=%s) ' % bucket.typeinfo_name | |
2594 ExpandCommand._add_size(precedence, bucket, depth, | |
2595 int(words[COMMITTED]), sizes) | |
2596 elif rule.allocator_type == 'mmap': | |
2597 for _, region in dump.iter_map: | |
2598 if region[0] != 'hooked': | |
2599 continue | |
2600 component_match, bucket = policy.find_mmap(region, bucket_set) | |
2601 if component_match == component_name: | |
2602 ExpandCommand._add_size('', bucket, depth, | |
2603 region[1]['committed'], sizes) | |
2604 | |
2605 | |
2606 class PProfCommand(Command): | |
2607 def __init__(self): | |
2608 super(PProfCommand, self).__init__( | |
2609 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>') | |
2610 self._parser.add_option('-c', '--component', type='string', | |
2611 dest='component', | |
2612 help='restrict to COMPONENT', metavar='COMPONENT') | |
2613 | |
2614 def do(self, sys_argv): | |
2615 options, args = self._parse_args(sys_argv, 2) | |
2616 | |
2617 dump_path = args[1] | |
2618 target_policy = args[2] | |
2619 component = options.component | |
2620 | |
2621 (bucket_set, dump) = Command.load_basic_files(dump_path, False) | |
2622 policy_set = PolicySet.load(Command._parse_policy_list(target_policy)) | |
2623 | |
2624 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f: | |
2625 maps_lines = maps_f.readlines() | |
2626 PProfCommand._output( | |
2627 dump, policy_set[target_policy], bucket_set, maps_lines, component, | |
2628 sys.stdout) | |
2629 | |
2630 return 0 | |
2631 | |
2632 @staticmethod | |
2633 def _output(dump, policy, bucket_set, maps_lines, component_name, out): | |
2634 """Converts the heap profile dump so it can be processed by pprof. | |
2635 | |
2636 Args: | |
2637 dump: A Dump object. | |
2638 policy: A Policy object. | |
2639 bucket_set: A BucketSet object. | |
2640 maps_lines: A list of strings containing /proc/.../maps. | |
2641 component_name: A name of component for filtering. | |
2642 out: An IO object to output. | |
2643 """ | |
2644 out.write('heap profile: ') | |
2645 com_committed, com_allocs = PProfCommand._accumulate( | |
2646 dump, policy, bucket_set, component_name) | |
2647 | |
2648 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % ( | |
2649 com_allocs, com_committed, com_allocs, com_committed)) | |
2650 | |
2651 PProfCommand._output_stacktrace_lines( | |
2652 dump, policy, bucket_set, component_name, out) | |
2653 | |
2654 out.write('MAPPED_LIBRARIES:\n') | |
2655 for line in maps_lines: | |
2656 out.write(line) | |
2657 | |
2658 @staticmethod | |
2659 def _accumulate(dump, policy, bucket_set, component_name): | |
2660 """Accumulates size of committed chunks and the number of allocated chunks. | |
2661 | |
2662 Args: | |
2663 dump: A Dump object. | |
2664 policy: A Policy object. | |
2665 bucket_set: A BucketSet object. | |
2666 component_name: A name of component for filtering. | |
2667 | |
2668 Returns: | |
2669 Two integers which are the accumulated size of committed regions and the | |
2670 number of allocated chunks, respectively. | |
2671 """ | |
2672 com_committed = 0 | |
2673 com_allocs = 0 | |
2674 | |
2675 for _, region in dump.iter_map: | |
2676 if region[0] != 'hooked': | |
2677 continue | |
2678 component_match, bucket = policy.find_mmap(region, bucket_set) | |
2679 | |
2680 if (component_name and component_name != component_match) or ( | |
2681 region[1]['committed'] == 0): | |
2682 continue | |
2683 | |
2684 com_committed += region[1]['committed'] | |
2685 com_allocs += 1 | |
2686 | |
2687 for line in dump.iter_stacktrace: | |
2688 words = line.split() | |
2689 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2690 if not bucket or bucket.allocator_type == 'malloc': | |
2691 component_match = policy.find_malloc(bucket) | |
2692 elif bucket.allocator_type == 'mmap': | |
2693 continue | |
2694 else: | |
2695 assert False | |
2696 if (not bucket or | |
2697 (component_name and component_name != component_match)): | |
2698 continue | |
2699 | |
2700 com_committed += int(words[COMMITTED]) | |
2701 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]) | |
2702 | |
2703 return com_committed, com_allocs | |
2704 | |
2705 @staticmethod | |
2706 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out): | |
2707 """Prints information of stacktrace lines for pprof. | |
2708 | |
2709 Args: | |
2710 dump: A Dump object. | |
2711 policy: A Policy object. | |
2712 bucket_set: A BucketSet object. | |
2713 component_name: A name of component for filtering. | |
2714 out: An IO object to output. | |
2715 """ | |
2716 for _, region in dump.iter_map: | |
2717 if region[0] != 'hooked': | |
2718 continue | |
2719 component_match, bucket = policy.find_mmap(region, bucket_set) | |
2720 | |
2721 if (component_name and component_name != component_match) or ( | |
2722 region[1]['committed'] == 0): | |
2723 continue | |
2724 | |
2725 out.write(' 1: %8s [ 1: %8s] @' % ( | |
2726 region[1]['committed'], region[1]['committed'])) | |
2727 for address in bucket.stacktrace: | |
2728 out.write(' 0x%016x' % address) | |
2729 out.write('\n') | |
2730 | |
2731 for line in dump.iter_stacktrace: | |
2732 words = line.split() | |
2733 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2734 if not bucket or bucket.allocator_type == 'malloc': | |
2735 component_match = policy.find_malloc(bucket) | |
2736 elif bucket.allocator_type == 'mmap': | |
2737 continue | |
2738 else: | |
2739 assert False | |
2740 if (not bucket or | |
2741 (component_name and component_name != component_match)): | |
2742 continue | |
2743 | |
2744 out.write('%6d: %8s [%6d: %8s] @' % ( | |
2745 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
2746 words[COMMITTED], | |
2747 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]), | |
2748 words[COMMITTED])) | |
2749 for address in bucket.stacktrace: | |
2750 out.write(' 0x%016x' % address) | |
2751 out.write('\n') | |
2752 | |
2753 | |
2754 class UploadCommand(Command): | |
2755 def __init__(self): | |
2756 super(UploadCommand, self).__init__( | |
2757 'Usage: %prog upload [--gsutil path/to/gsutil] ' | |
2758 '<first-dump> <destination-gs-path>') | |
2759 self._parser.add_option('--gsutil', default='gsutil', | |
2760 help='path to GSUTIL', metavar='GSUTIL') | |
2761 | |
2762 def do(self, sys_argv): | |
2763 options, args = self._parse_args(sys_argv, 2) | |
2764 dump_path = args[1] | |
2765 gs_path = args[2] | |
2766 | |
2767 dump_files = Command._find_all_dumps(dump_path) | |
2768 bucket_files = Command._find_all_buckets(dump_path) | |
2769 prefix = Command._find_prefix(dump_path) | |
2770 symbol_data_sources = SymbolDataSources(prefix) | |
2771 symbol_data_sources.prepare() | |
2772 symbol_path = symbol_data_sources.path() | |
2773 | |
2774 handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof') | |
2775 os.close(handle_zip) | |
2776 | |
2777 try: | |
2778 file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED) | |
2779 for filename in dump_files: | |
2780 file_zip.write(filename, os.path.basename(os.path.abspath(filename))) | |
2781 for filename in bucket_files: | |
2782 file_zip.write(filename, os.path.basename(os.path.abspath(filename))) | |
2783 | |
2784 symbol_basename = os.path.basename(os.path.abspath(symbol_path)) | |
2785 for filename in os.listdir(symbol_path): | |
2786 if not filename.startswith('.'): | |
2787 file_zip.write(os.path.join(symbol_path, filename), | |
2788 os.path.join(symbol_basename, os.path.basename( | |
2789 os.path.abspath(filename)))) | |
2790 file_zip.close() | |
2791 | |
2792 returncode = UploadCommand._run_gsutil( | |
2793 options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path) | |
2794 finally: | |
2795 os.remove(filename_zip) | |
2796 | |
2797 return returncode | |
2798 | |
2799 @staticmethod | |
2800 def _run_gsutil(gsutil, *args): | |
2801 """Run gsutil as a subprocess. | |
2802 | |
2803 Args: | |
2804 *args: Arguments to pass to gsutil. The first argument should be an | |
2805 operation such as ls, cp or cat. | |
2806 Returns: | |
2807 The return code from the process. | |
2808 """ | |
2809 command = [gsutil] + list(args) | |
2810 LOGGER.info("Running: %s", command) | |
2811 | |
2812 try: | |
2813 return subprocess.call(command) | |
2814 except OSError, e: | |
2815 LOGGER.error('Error to run gsutil: %s', e) | |
2816 | |
2817 | |
2818 class CatCommand(Command): | |
2819 def __init__(self): | |
2820 super(CatCommand, self).__init__('Usage: %prog cat <first-dump>') | |
2821 self._parser.add_option('--alternative-dirs', dest='alternative_dirs', | |
2822 metavar='/path/on/target@/path/on/host[:...]', | |
2823 help='Read files in /path/on/host/ instead of ' | |
2824 'files in /path/on/target/.') | |
2825 self._parser.add_option('--indent', dest='indent', action='store_true', | |
2826 help='Indent the output.') | |
2827 | |
2828 def do(self, sys_argv): | |
2829 options, args = self._parse_args(sys_argv, 1) | |
2830 dump_path = args[1] | |
2831 # TODO(dmikurube): Support shared memory. | |
2832 alternative_dirs_dict = {} | |
2833 if options.alternative_dirs: | |
2834 for alternative_dir_pair in options.alternative_dirs.split(':'): | |
2835 target_path, host_path = alternative_dir_pair.split('@', 1) | |
2836 alternative_dirs_dict[target_path] = host_path | |
2837 (bucket_set, dumps) = Command.load_basic_files( | |
2838 dump_path, True, alternative_dirs=alternative_dirs_dict) | |
2839 | |
2840 json_root = OrderedDict() | |
2841 json_root['version'] = 1 | |
2842 json_root['run_id'] = None | |
2843 for dump in dumps: | |
2844 if json_root['run_id'] and json_root['run_id'] != dump.run_id: | |
2845 LOGGER.error('Inconsistent heap profile dumps.') | |
2846 json_root['run_id'] = '' | |
2847 break | |
2848 json_root['run_id'] = dump.run_id | |
2849 json_root['snapshots'] = [] | |
2850 | |
2851 # Load all sorters. | |
2852 sorters = SorterSet() | |
2853 | |
2854 for dump in dumps: | |
2855 json_root['snapshots'].append( | |
2856 self._fill_snapshot(dump, bucket_set, sorters)) | |
2857 | |
2858 if options.indent: | |
2859 json.dump(json_root, sys.stdout, indent=2) | |
2860 else: | |
2861 json.dump(json_root, sys.stdout) | |
2862 print '' | |
2863 | |
2864 @staticmethod | |
2865 def _fill_snapshot(dump, bucket_set, sorters): | |
2866 root = OrderedDict() | |
2867 root['time'] = dump.time | |
2868 root['worlds'] = OrderedDict() | |
2869 root['worlds']['vm'] = CatCommand._fill_world( | |
2870 dump, bucket_set, sorters, 'vm') | |
2871 root['worlds']['malloc'] = CatCommand._fill_world( | |
2872 dump, bucket_set, sorters, 'malloc') | |
2873 return root | |
2874 | |
2875 @staticmethod | |
2876 def _fill_world(dump, bucket_set, sorters, world): | |
2877 root = OrderedDict() | |
2878 | |
2879 root['name'] = 'world' | |
2880 if world == 'vm': | |
2881 root['unit_fields'] = ['committed', 'reserved'] | |
2882 elif world == 'malloc': | |
2883 root['unit_fields'] = ['size', 'alloc_count', 'free_count'] | |
2884 | |
2885 # Make { vm | malloc } units with their sizes. | |
2886 root['units'] = OrderedDict() | |
2887 unit_set = UnitSet(world) | |
2888 if world == 'vm': | |
2889 for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set): | |
2890 unit_set.append(unit) | |
2891 for unit in unit_set: | |
2892 root['units'][unit.unit_id] = [unit.committed, unit.reserved] | |
2893 elif world == 'malloc': | |
2894 for unit in CatCommand._iterate_malloc_unit(dump, bucket_set): | |
2895 unit_set.append(unit) | |
2896 for unit in unit_set: | |
2897 root['units'][unit.unit_id] = [ | |
2898 unit.size, unit.alloc_count, unit.free_count] | |
2899 | |
2900 # Iterate for { vm | malloc } sorters. | |
2901 root['breakdown'] = OrderedDict() | |
2902 for sorter in sorters.iter_world(world): | |
2903 breakdown = OrderedDict() | |
2904 for unit in unit_set: | |
2905 found = sorter.find(unit) | |
2906 if found.name not in breakdown: | |
2907 category = OrderedDict() | |
2908 category['name'] = found.name | |
2909 category['color'] = 'random' | |
2910 subworlds = {} | |
2911 for subworld in found.iter_subworld(): | |
2912 subworlds[subworld] = False | |
2913 if subworlds: | |
2914 category['subworlds'] = subworlds | |
2915 if found.hidden: | |
2916 category['hidden'] = True | |
2917 category['units'] = [] | |
2918 breakdown[found.name] = category | |
2919 breakdown[found.name]['units'].append(unit.unit_id) | |
2920 root['breakdown'][sorter.name] = breakdown | |
2921 | |
2922 return root | |
2923 | |
2924 @staticmethod | |
2925 def _iterate_vm_unit(dump, pfn_dict, bucket_set): | |
2926 unit_id = 0 | |
2927 for _, region in dump.iter_map: | |
2928 unit_id += 1 | |
2929 if region[0] == 'unhooked': | |
2930 if pfn_dict and dump.pageframe_length: | |
2931 for pageframe in region[1]['pageframe']: | |
2932 yield UnhookedUnit(unit_id, pageframe.size, pageframe.size, | |
2933 region, pageframe, pfn_dict) | |
2934 else: | |
2935 yield UnhookedUnit(unit_id, | |
2936 int(region[1]['committed']), | |
2937 int(region[1]['reserved']), | |
2938 region) | |
2939 elif region[0] == 'hooked': | |
2940 if pfn_dict and dump.pageframe_length: | |
2941 for pageframe in region[1]['pageframe']: | |
2942 yield MMapUnit(unit_id, | |
2943 pageframe.size, | |
2944 pageframe.size, | |
2945 region, bucket_set, pageframe, pfn_dict) | |
2946 else: | |
2947 yield MMapUnit(unit_id, | |
2948 int(region[1]['committed']), | |
2949 int(region[1]['reserved']), | |
2950 region, | |
2951 bucket_set) | |
2952 else: | |
2953 LOGGER.error('Unrecognized mapping status: %s' % region[0]) | |
2954 | |
2955 @staticmethod | |
2956 def _iterate_malloc_unit(dump, bucket_set): | |
2957 for line in dump.iter_stacktrace: | |
2958 words = line.split() | |
2959 bucket = bucket_set.get(int(words[BUCKET_ID])) | |
2960 if bucket and bucket.allocator_type == 'malloc': | |
2961 yield MallocUnit(int(words[BUCKET_ID]), | |
2962 int(words[COMMITTED]), | |
2963 int(words[ALLOC_COUNT]), | |
2964 int(words[FREE_COUNT]), | |
2965 bucket) | |
2966 elif not bucket: | |
2967 # 'Not-found' buckets are all assumed as malloc buckets. | |
2968 yield MallocUnit(int(words[BUCKET_ID]), | |
2969 int(words[COMMITTED]), | |
2970 int(words[ALLOC_COUNT]), | |
2971 int(words[FREE_COUNT]), | |
2972 None) | |
2973 | 18 |
2974 | 19 |
2975 def main(): | 20 def main(): |
2976 COMMANDS = { | 21 COMMANDS = { |
2977 'buckets': BucketsCommand, | 22 'buckets': subcommands.BucketsCommand, |
2978 'cat': CatCommand, | 23 'cat': subcommands.CatCommand, |
2979 'csv': CSVCommand, | 24 'csv': subcommands.CSVCommand, |
2980 'expand': ExpandCommand, | 25 'expand': subcommands.ExpandCommand, |
2981 'json': JSONCommand, | 26 'json': subcommands.JSONCommand, |
2982 'list': ListCommand, | 27 'list': subcommands.ListCommand, |
2983 'map': MapCommand, | 28 'map': subcommands.MapCommand, |
2984 'pprof': PProfCommand, | 29 'pprof': subcommands.PProfCommand, |
2985 'stacktrace': StacktraceCommand, | 30 'stacktrace': subcommands.StacktraceCommand, |
2986 'upload': UploadCommand, | 31 'upload': subcommands.UploadCommand, |
2987 } | 32 } |
2988 | 33 |
2989 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): | 34 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS): |
2990 sys.stderr.write("""Usage: dmprof <command> [options] [<args>] | 35 sys.stderr.write("""Usage: dmprof <command> [options] [<args>] |
2991 | 36 |
2992 Commands: | 37 Commands: |
2993 buckets Dump a bucket list with resolving symbols | 38 buckets Dump a bucket list with resolving symbols |
2994 cat Categorize memory usage (under development) | 39 cat Categorize memory usage (under development) |
2995 csv Classify memory usage in CSV | 40 csv Classify memory usage in CSV |
2996 expand Show all stacktraces contained in the specified component | 41 expand Show all stacktraces contained in the specified component |
(...skipping 30 matching lines...) Expand all Loading... |
3027 errorcode = COMMANDS[action]().do(sys.argv) | 72 errorcode = COMMANDS[action]().do(sys.argv) |
3028 except ParsingException, e: | 73 except ParsingException, e: |
3029 errorcode = 1 | 74 errorcode = 1 |
3030 sys.stderr.write('Exit by parsing error: %s\n' % e) | 75 sys.stderr.write('Exit by parsing error: %s\n' % e) |
3031 | 76 |
3032 return errorcode | 77 return errorcode |
3033 | 78 |
3034 | 79 |
3035 if __name__ == '__main__': | 80 if __name__ == '__main__': |
3036 sys.exit(main()) | 81 sys.exit(main()) |
OLD | NEW |