Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Unified Diff: tools/deep_memory_profiler/dmprof

Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/deep_memory_profiler/dmprof
diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof
index 1951838c4f75571e8c5f5bb967f9f83c23bc2ed6..a43f9f663af06b898c6a14bdae989077eba28104 100755
--- a/tools/deep_memory_profiler/dmprof
+++ b/tools/deep_memory_profiler/dmprof
@@ -1,1341 +1,12 @@
-#!/usr/bin/env python
+#!/usr/bin/env bash
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-"""The deep heap profiler script for Chrome."""
+# Re-direct the arguments to dmprof.py.
-from datetime import datetime
-import json
-import logging
-import optparse
-import os
-import re
-import shutil
-import subprocess
-import sys
-import tempfile
+BASEDIR=`dirname $0`
+ARGV="$@"
-BASE_PATH = os.path.dirname(os.path.abspath(__file__))
-FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
- BASE_PATH, os.pardir, 'find_runtime_symbols')
-sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
-
-from find_runtime_symbols import find_runtime_symbols_list
-from find_runtime_symbols import find_runtime_typeinfo_symbols_list
-from find_runtime_symbols import RuntimeSymbolsInProcess
-from prepare_symbol_info import prepare_symbol_info
-
-BUCKET_ID = 5
-VIRTUAL = 0
-COMMITTED = 1
-ALLOC_COUNT = 2
-FREE_COUNT = 3
-NULL_REGEX = re.compile('')
-
-LOGGER = logging.getLogger('dmprof')
-POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
-FUNCTION_ADDRESS = 'function'
-TYPEINFO_ADDRESS = 'typeinfo'
-
-
-# Heap Profile Dump versions
-
-# DUMP_DEEP_[1-4] are obsolete.
-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
-DUMP_DEEP_1 = 'DUMP_DEEP_1'
-DUMP_DEEP_2 = 'DUMP_DEEP_2'
-DUMP_DEEP_3 = 'DUMP_DEEP_3'
-DUMP_DEEP_4 = 'DUMP_DEEP_4'
-
-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
-
-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
-# malloc and mmap are identified in bucket files.
-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
-DUMP_DEEP_5 = 'DUMP_DEEP_5'
-
-
-# Heap Profile Policy versions
-
-# POLICY_DEEP_1 DOES NOT include allocation_type columns.
-# mmap regions are distincted w/ mmap frames in the pattern column.
-POLICY_DEEP_1 = 'POLICY_DEEP_1'
-
-# POLICY_DEEP_2 DOES include allocation_type columns.
-# mmap regions are distincted w/ the allocation_type column.
-POLICY_DEEP_2 = 'POLICY_DEEP_2'
-
-# POLICY_DEEP_3 is in JSON format.
-POLICY_DEEP_3 = 'POLICY_DEEP_3'
-
-# POLICY_DEEP_3 contains typeinfo.
-POLICY_DEEP_4 = 'POLICY_DEEP_4'
-
-
-class EmptyDumpException(Exception):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-
-class ParsingException(Exception):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-
-class InvalidDumpException(ParsingException):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return "invalid heap profile dump: %s" % repr(self.value)
-
-
-class ObsoleteDumpVersionException(ParsingException):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return "obsolete heap profile dump version: %s" % repr(self.value)
-
-
-def skip_while(index, max_index, skipping_condition):
- """Increments |index| until |skipping_condition|(|index|) is False.
-
- Returns:
- A pair of an integer indicating a line number after skipped, and a
- boolean value which is True if found a line which skipping_condition
- is False for.
- """
- while skipping_condition(index):
- index += 1
- if index >= max_index:
- return index, False
- return index, True
-
-
-class SymbolMapping(object):
- """Manages all symbol information on process memory mapping.
-
- The symbol information consists of all symbols in the binary files obtained
- by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps,
- nm and so on. It is minimum requisite information to run dmprof.
-
- The information is prepared in a directory "|prefix|.symmap" by prepare().
- The directory is more portable than Chromium binaries. Users can save it
- and re-analyze with the portable one.
-
- Note that loading the symbol information takes a long time. It is very big
- in general -- it doesn't know which functions are called and which types are
- used actually. Used symbols can be cached in the "SymbolCache" class.
- """
- def __init__(self, prefix):
- self._prefix = prefix
- self._prepared_symbol_mapping_path = None
- self._loaded_symbol_mapping = None
-
- def prepare(self):
- """Extracts symbol mapping from binaries and prepares it to use.
-
- The symbol mapping is stored in a directory whose name is stored in
- |self._prepared_symbol_mapping_path|.
-
- Returns:
- True if succeeded.
- """
- LOGGER.info('Preparing symbol mapping...')
- self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info(
- self._prefix + '.maps', self._prefix + '.symmap', True)
- if self._prepared_symbol_mapping_path:
- LOGGER.info(' Prepared symbol mapping.')
- if used_tempdir:
- LOGGER.warn(' Using a temporary directory for symbol mapping.')
- LOGGER.warn(' Delete it by yourself.')
- LOGGER.warn(' Or, move the directory by yourself to use it later.')
- return True
- else:
- LOGGER.warn(' Failed to prepare symbol mapping.')
- return False
-
- def get(self):
- """Returns symbol mapping.
-
- Returns:
- Loaded symbol mapping. None if failed.
- """
- if not self._prepared_symbol_mapping_path and not self.prepare():
- return None
- if not self._loaded_symbol_mapping:
- LOGGER.info('Loading symbol mapping...')
- self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load(
- self._prepared_symbol_mapping_path)
- return self._loaded_symbol_mapping
-
-
-class SymbolCache(object):
- """Manages cache of used symbol mapping.
-
- The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for
- examples), and "SymbolCache" just caches "how dmprof interprets the address"
- to speed-up another analysis for the same binary and profile dumps.
- Handling all symbol mapping takes a long time in "SymbolMapping".
- "SymbolCache" caches used symbol mapping on memory and in files.
- """
- def __init__(self, prefix):
- self._prefix = prefix
- self._symbol_cache_paths = {
- FUNCTION_ADDRESS: prefix + '.funcsym',
- TYPEINFO_ADDRESS: prefix + '.typesym',
- }
- self._find_runtime_symbols_functions = {
- FUNCTION_ADDRESS: find_runtime_symbols_list,
- TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list,
- }
- self._symbol_caches = {
- FUNCTION_ADDRESS: {},
- TYPEINFO_ADDRESS: {},
- }
-
- def update(self, address_type, bucket_set, symbol_mapping):
- """Updates symbol mapping on memory and in a ".*sym" cache file.
-
- It reads cached symbol mapping from a ".*sym" file if it exists. Then,
- it looks up unresolved addresses from a given "SymbolMapping". Finally,
- both symbol mappings on memory and in the ".*sym" cache file are updated.
-
- Symbol files are formatted as follows:
- <Address> <Symbol>
- <Address> <Symbol>
- <Address> <Symbol>
- ...
-
- Args:
- address_type: A type of addresses to update. It should be one of
- FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
- bucket_set: A BucketSet object.
- symbol_mapping: A SymbolMapping object.
- """
- self._load(address_type)
-
- unresolved_addresses = sorted(
- address for address in bucket_set.iter_addresses(address_type)
- if address not in self._symbol_caches[address_type])
-
- if not unresolved_addresses:
- LOGGER.info('No need to resolve any more addresses.')
- return
-
- symbol_cache_path = self._symbol_cache_paths[address_type]
- with open(symbol_cache_path, mode='a+') as symbol_f:
- LOGGER.info('Loading %d unresolved addresses.' %
- len(unresolved_addresses))
- symbol_list = self._find_runtime_symbols_functions[address_type](
- symbol_mapping.get(), unresolved_addresses)
-
- for address, symbol in zip(unresolved_addresses, symbol_list):
- stripped_symbol = symbol.strip() or '??'
- self._symbol_caches[address_type][address] = stripped_symbol
- symbol_f.write('%x %s\n' % (address, stripped_symbol))
-
- def lookup(self, address_type, address):
- """Looks up a symbol for a given |address|.
-
- Args:
- address_type: A type of addresses to lookup. It should be one of
- FUNCTION_ADDRESS or TYPEINFO_ADDRESS.
- address: An integer that represents an address.
-
- Returns:
- A string that represents a symbol.
- """
- return self._symbol_caches[address_type].get(address)
-
- def _load(self, address_type):
- symbol_cache_path = self._symbol_cache_paths[address_type]
- try:
- with open(symbol_cache_path, mode='r') as symbol_f:
- for line in symbol_f:
- items = line.rstrip().split(None, 1)
- if len(items) == 1:
- items.append('??')
- self._symbol_caches[address_type][int(items[0], 16)] = items[1]
- LOGGER.info('Loaded %d entries from symbol cache.' %
- len(self._symbol_caches[address_type]))
- except IOError as e:
- LOGGER.info('No valid symbol cache file is found.')
-
-
-class Rule(object):
- """Represents one matching rule in a policy file."""
-
- def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None):
- self._name = name
- self._mmap = mmap
- self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z')
- if typeinfo_pattern:
- self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
- else:
- self._typeinfo_pattern = None
-
- @property
- def name(self):
- return self._name
-
- @property
- def mmap(self):
- return self._mmap
-
- @property
- def stacktrace_pattern(self):
- return self._stacktrace_pattern
-
- @property
- def typeinfo_pattern(self):
- return self._typeinfo_pattern
-
-
-class Policy(object):
- """Represents a policy, a content of a policy file."""
-
- def __init__(self, rules, version, components):
- self._rules = rules
- self._version = version
- self._components = components
-
- @property
- def rules(self):
- return self._rules
-
- @property
- def version(self):
- return self._version
-
- @property
- def components(self):
- return self._components
-
- def find(self, bucket):
- """Finds a matching component name which a given |bucket| belongs to.
-
- Args:
- bucket: A Bucket object to be searched for.
-
- Returns:
- A string representing a component name.
- """
- if not bucket:
- return 'no-bucket'
- if bucket.component_cache:
- return bucket.component_cache
-
- stacktrace = bucket.symbolized_joined_stacktrace
- typeinfo = bucket.symbolized_typeinfo
- if typeinfo.startswith('0x'):
- typeinfo = bucket.typeinfo_name
-
- for rule in self._rules:
- if (bucket.mmap == rule.mmap and
- rule.stacktrace_pattern.match(stacktrace) and
- (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
- bucket.component_cache = rule.name
- return rule.name
-
- assert False
-
- @staticmethod
- def load(filename, format):
- """Loads a policy file of |filename| in a |format|.
-
- Args:
- filename: A filename to be loaded.
- format: A string to specify a format of the file. Only 'json' is
- supported for now.
-
- Returns:
- A loaded Policy object.
- """
- with open(os.path.join(BASE_PATH, filename)) as policy_f:
- return Policy.parse(policy_f, format)
-
- @staticmethod
- def parse(policy_f, format):
- """Parses a policy file content in a |format|.
-
- Args:
- policy_f: An IO object to be loaded.
- format: A string to specify a format of the file. Only 'json' is
- supported for now.
-
- Returns:
- A loaded Policy object.
- """
- if format == 'json':
- return Policy._parse_json(policy_f)
- else:
- return None
-
- @staticmethod
- def _parse_json(policy_f):
- """Parses policy file in json format.
-
- A policy file contains component's names and their stacktrace pattern
- written in regular expression. Those patterns are matched against each
- symbols of each stacktraces in the order written in the policy file
-
- Args:
- policy_f: A File/IO object to read.
-
- Returns:
- A loaded policy object.
- """
- policy = json.load(policy_f)
-
- rules = []
- for rule in policy['rules']:
- rules.append(Rule(
- rule['name'],
- rule['allocator'] == 'mmap',
- rule['stacktrace'],
- rule['typeinfo'] if 'typeinfo' in rule else None))
- return Policy(rules, policy['version'], policy['components'])
-
-
-class PolicySet(object):
- """Represents a set of policies."""
-
- def __init__(self, policy_directory):
- self._policy_directory = policy_directory
-
- @staticmethod
- def load(labels=None):
- """Loads a set of policies via the "default policy directory".
-
- The "default policy directory" contains pairs of policies and their labels.
- For example, a policy "policy.l0.json" is labeled "l0" in the default
- policy directory "policies.json".
-
- All policies in the directory are loaded by default. Policies can be
- limited by |labels|.
-
- Args:
- labels: An array that contains policy labels to be loaded.
-
- Returns:
- A PolicySet object.
- """
- default_policy_directory = PolicySet._load_default_policy_directory()
- if labels:
- specified_policy_directory = {}
- for label in labels:
- if label in default_policy_directory:
- specified_policy_directory[label] = default_policy_directory[label]
- # TODO(dmikurube): Load an un-labeled policy file.
- return PolicySet._load_policies(specified_policy_directory)
- else:
- return PolicySet._load_policies(default_policy_directory)
-
- def __len__(self):
- return len(self._policy_directory)
-
- def __iter__(self):
- for label in self._policy_directory:
- yield label
-
- def __getitem__(self, label):
- return self._policy_directory[label]
-
- @staticmethod
- def _load_default_policy_directory():
- with open(POLICIES_JSON_PATH, mode='r') as policies_f:
- default_policy_directory = json.load(policies_f)
- return default_policy_directory
-
- @staticmethod
- def _load_policies(directory):
- LOGGER.info('Loading policy files.')
- policies = {}
- for label in directory:
- LOGGER.info(' %s: %s' % (label, directory[label]['file']))
- loaded = Policy.load(directory[label]['file'], directory[label]['format'])
- if loaded:
- policies[label] = loaded
- return PolicySet(policies)
-
-
-class Bucket(object):
- """Represents a bucket, which is a unit of memory block classification."""
-
- def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name):
- self._stacktrace = stacktrace
- self._mmap = mmap
- self._typeinfo = typeinfo
- self._typeinfo_name = typeinfo_name
-
- self._symbolized_stacktrace = stacktrace
- self._symbolized_joined_stacktrace = ''
- self._symbolized_typeinfo = typeinfo_name
-
- self.component_cache = ''
-
- def symbolize(self, symbol_cache):
- """Makes a symbolized stacktrace and typeinfo with |symbol_cache|.
-
- Args:
- symbol_cache: A SymbolCache object.
- """
- # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
- self._symbolized_stacktrace = [
- symbol_cache.lookup(FUNCTION_ADDRESS, address)
- for address in self._stacktrace]
- self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace)
- if not self._typeinfo:
- self._symbolized_typeinfo = 'no typeinfo'
- else:
- self._symbolized_typeinfo = symbol_cache.lookup(
- TYPEINFO_ADDRESS, self._typeinfo)
- if not self._symbolized_typeinfo:
- self._symbolized_typeinfo = 'no typeinfo'
-
- def clear_component_cache(self):
- self.component_cache = ''
-
- @property
- def stacktrace(self):
- return self._stacktrace
-
- @property
- def mmap(self):
- return self._mmap
-
- @property
- def typeinfo(self):
- return self._typeinfo
-
- @property
- def typeinfo_name(self):
- return self._typeinfo_name
-
- @property
- def symbolized_stacktrace(self):
- return self._symbolized_stacktrace
-
- @property
- def symbolized_joined_stacktrace(self):
- return self._symbolized_joined_stacktrace
-
- @property
- def symbolized_typeinfo(self):
- return self._symbolized_typeinfo
-
-
-class BucketSet(object):
- """Represents a set of bucket."""
- def __init__(self):
- self._buckets = {}
- self._addresses = {
- FUNCTION_ADDRESS: set(),
- TYPEINFO_ADDRESS: set(),
- }
-
- @staticmethod
- def load(prefix):
- """Loads all related bucket files.
-
- Args:
- prefix: A prefix string for bucket file names.
-
- Returns:
- A loaded BucketSet object.
- """
- LOGGER.info('Loading bucket files.')
- bucket_set = BucketSet()
-
- n = 0
- while True:
- path = '%s.%04d.buckets' % (prefix, n)
- if not os.path.exists(path):
- if n > 10:
- break
- n += 1
- continue
- LOGGER.info(' %s' % path)
- with open(path, 'r') as f:
- bucket_set._load_file(f)
- n += 1
-
- return bucket_set
-
- def _load_file(self, bucket_f):
- for line in bucket_f:
- words = line.split()
- typeinfo = None
- typeinfo_name = ''
- stacktrace_begin = 2
- for index, word in enumerate(words):
- if index < 2:
- continue
- if word[0] == 't':
- typeinfo = int(word[1:], 16)
- self._addresses[TYPEINFO_ADDRESS].add(typeinfo)
- elif word[0] == 'n':
- typeinfo_name = word[1:]
- else:
- stacktrace_begin = index
- break
- stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]
- for frame in stacktrace:
- self._addresses[FUNCTION_ADDRESS].add(frame)
- self._buckets[int(words[0])] = Bucket(
- stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name)
-
- def __iter__(self):
- for bucket_id, bucket_content in self._buckets.iteritems():
- yield bucket_id, bucket_content
-
- def __getitem__(self, bucket_id):
- return self._buckets[bucket_id]
-
- def get(self, bucket_id):
- return self._buckets.get(bucket_id)
-
- def symbolize(self, symbol_cache):
- for bucket_content in self._buckets.itervalues():
- bucket_content.symbolize(symbol_cache)
-
- def clear_component_cache(self):
- for bucket_content in self._buckets.itervalues():
- bucket_content.clear_component_cache()
-
- def iter_addresses(self, address_type):
- for function in self._addresses[address_type]:
- yield function
-
-
-class Dump(object):
- """Represents a heap profile dump."""
-
- def __init__(self):
- self._path = ''
- self._time = None
- self._stacktrace_lines = []
- self._global_stats = {} # used only in apply_policy
-
- self._version = ''
- self._lines = []
-
- @property
- def path(self):
- return self._path
-
- @property
- def time(self):
- return self._time
-
- @property
- def iter_stacktrace(self):
- for line in self._stacktrace_lines:
- yield line
-
- def global_stat(self, name):
- return self._global_stats[name]
-
- @staticmethod
- def load(path, log_header='Loading a heap profile dump: '):
- """Loads a heap profile dump.
-
- Args:
- path: A file path string to load.
- log_header: A preceding string for log messages.
-
- Returns:
- A loaded Dump object.
-
- Raises:
- ParsingException for invalid heap profile dumps.
- """
- dump = Dump()
- dump._path = path
- dump._time = os.stat(dump._path).st_mtime
- dump._version = ''
-
- dump._lines = [line for line in open(dump._path, 'r')
- if line and not line.startswith('#')]
-
- try:
- dump._version, ln = dump._parse_version()
- dump._parse_global_stats()
- dump._extract_stacktrace_lines(ln)
- except EmptyDumpException:
- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path))
- except ParsingException, e:
- LOGGER.error('%s%s ...error %s' % (log_header, path, e))
- raise
- else:
- LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version))
-
- return dump
-
- def _parse_version(self):
- """Parses a version string in self._lines.
-
- Returns:
- A pair of (a string representing a version of the stacktrace dump,
- and an integer indicating a line number next to the version string).
-
- Raises:
- ParsingException for invalid dump versions.
- """
- version = ''
-
- # Skip until an identifiable line.
- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
- if not self._lines:
- raise EmptyDumpException('Empty heap dump file.')
- (ln, found) = skip_while(
- 0, len(self._lines),
- lambda n: not self._lines[n].startswith(headers))
- if not found:
- raise InvalidDumpException('No version header.')
-
- # Identify a version.
- if self._lines[ln].startswith('heap profile: '):
- version = self._lines[ln][13:].strip()
- if version == DUMP_DEEP_5:
- (ln, _) = skip_while(
- ln, len(self._lines),
- lambda n: self._lines[n] != 'STACKTRACES:\n')
- elif version in DUMP_DEEP_OBSOLETE:
- raise ObsoleteDumpVersionException(version)
- else:
- raise InvalidDumpException('Invalid version: %s' % version)
- elif self._lines[ln] == 'STACKTRACES:\n':
- raise ObsoleteDumpVersionException(DUMP_DEEP_1)
- elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
- raise ObsoleteDumpVersionException(DUMP_DEEP_2)
-
- return (version, ln)
-
- def _parse_global_stats(self):
- """Parses lines in self._lines as global stats."""
- (ln, _) = skip_while(
- 0, len(self._lines),
- lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
-
- global_stat_names = [
- 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',
- 'nonprofiled-absent', 'nonprofiled-anonymous',
- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
- 'nonprofiled-stack', 'nonprofiled-other',
- 'profiled-mmap', 'profiled-malloc']
-
- for prefix in global_stat_names:
- (ln, _) = skip_while(
- ln, len(self._lines),
- lambda n: self._lines[n].split()[0] != prefix)
- words = self._lines[ln].split()
- self._global_stats[prefix + '_virtual'] = int(words[-2])
- self._global_stats[prefix + '_committed'] = int(words[-1])
-
- def _extract_stacktrace_lines(self, line_number):
- """Extracts the position of stacktrace lines.
-
- Valid stacktrace lines are stored into self._stacktrace_lines.
-
- Args:
- line_number: A line number to start parsing in lines.
-
- Raises:
- ParsingException for invalid dump versions.
- """
- if self._version == DUMP_DEEP_5:
- (line_number, _) = skip_while(
- line_number, len(self._lines),
- lambda n: not self._lines[n].split()[0].isdigit())
- stacktrace_start = line_number
- (line_number, _) = skip_while(
- line_number, len(self._lines),
- lambda n: self._check_stacktrace_line(self._lines[n]))
- self._stacktrace_lines = self._lines[stacktrace_start:line_number]
-
- elif self._version in DUMP_DEEP_OBSOLETE:
- raise ObsoleteDumpVersionException(self._version)
-
- else:
- raise InvalidDumpException('Invalid version: %s' % self._version)
-
- @staticmethod
- def _check_stacktrace_line(stacktrace_line):
- """Checks if a given stacktrace_line is valid as stacktrace.
-
- Args:
- stacktrace_line: A string to be checked.
-
- Returns:
- True if the given stacktrace_line is valid.
- """
- words = stacktrace_line.split()
- if len(words) < BUCKET_ID + 1:
- return False
- if words[BUCKET_ID - 1] != '@':
- return False
- return True
-
-
-class DumpList(object):
- """Represents a sequence of heap profile dumps."""
-
- def __init__(self, dump_list):
- self._dump_list = dump_list
-
- @staticmethod
- def load(path_list):
- LOGGER.info('Loading heap dump profiles.')
- dump_list = []
- for path in path_list:
- dump_list.append(Dump.load(path, ' '))
- return DumpList(dump_list)
-
- def __len__(self):
- return len(self._dump_list)
-
- def __iter__(self):
- for dump in self._dump_list:
- yield dump
-
- def __getitem__(self, index):
- return self._dump_list[index]
-
-
-class Command(object):
- """Subclasses are a subcommand for this executable.
-
- See COMMANDS in main().
- """
- def __init__(self, usage):
- self._parser = optparse.OptionParser(usage)
-
- @staticmethod
- def load_basic_files(dump_path, multiple):
- prefix = Command._find_prefix(dump_path)
- symbol_mapping = SymbolMapping(prefix)
- symbol_mapping.prepare()
- bucket_set = BucketSet.load(prefix)
- if multiple:
- dump_list = DumpList.load(Command._find_all_dumps(dump_path))
- else:
- dump = Dump.load(dump_path)
- symbol_cache = SymbolCache(prefix)
- symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping)
- symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping)
- bucket_set.symbolize(symbol_cache)
- if multiple:
- return (bucket_set, dump_list)
- else:
- return (bucket_set, dump)
-
- @staticmethod
- def _find_prefix(path):
- return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
-
- @staticmethod
- def _find_all_dumps(dump_path):
- prefix = Command._find_prefix(dump_path)
- dump_path_list = [dump_path]
-
- n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
- n += 1
- while True:
- p = '%s.%04d.heap' % (prefix, n)
- if os.path.exists(p):
- dump_path_list.append(p)
- else:
- break
- n += 1
-
- return dump_path_list
-
- def _parse_args(self, sys_argv, required):
- options, args = self._parser.parse_args(sys_argv)
- if len(args) != required + 1:
- self._parser.error('needs %d argument(s).\n' % required)
- return None
- return (options, args)
-
- def _parse_policy_list(self, options_policy):
- if options_policy:
- return options_policy.split(',')
- else:
- return None
-
-
-class StacktraceCommand(Command):
- def __init__(self):
- super(StacktraceCommand, self).__init__(
- 'Usage: %prog stacktrace <dump>')
-
- def do(self, sys_argv):
- options, args = self._parse_args(sys_argv, 1)
- dump_path = args[1]
- (bucket_set, dump) = Command.load_basic_files(dump_path, False)
-
- StacktraceCommand._output(dump, bucket_set, sys.stdout)
- return 0
-
- @staticmethod
- def _output(dump, bucket_set, out):
- """Outputs a given stacktrace.
-
- Args:
- bucket_set: A BucketSet object.
- out: A file object to output.
- """
- for line in dump.iter_stacktrace:
- words = line.split()
- bucket = bucket_set.get(int(words[BUCKET_ID]))
- if not bucket:
- continue
- for i in range(0, BUCKET_ID - 1):
- out.write(words[i] + ' ')
- for frame in bucket.symbolized_stacktrace:
- out.write(frame + ' ')
- out.write('\n')
-
-
-class PolicyCommands(Command):
- def __init__(self, command):
- super(PolicyCommands, self).__init__(
- 'Usage: %%prog %s [-p POLICY] <first-dump>' % command)
- self._parser.add_option('-p', '--policy', type='string', dest='policy',
- help='profile with POLICY', metavar='POLICY')
-
- def _set_up(self, sys_argv):
- options, args = self._parse_args(sys_argv, 1)
- dump_path = args[1]
- (bucket_set, dumps) = Command.load_basic_files(dump_path, True)
-
- policy_set = PolicySet.load(self._parse_policy_list(options.policy))
- return policy_set, dumps, bucket_set
-
- def _apply_policy(self, dump, policy, bucket_set, first_dump_time):
- """Aggregates the total memory size of each component.
-
- Iterate through all stacktraces and attribute them to one of the components
- based on the policy. It is important to apply policy in right order.
-
- Args:
- dump: A Dump object.
- policy: A Policy object.
- bucket_set: A BucketSet object.
- first_dump_time: An integer representing time when the first dump is
- dumped.
-
- Returns:
- A dict mapping components and their corresponding sizes.
- """
- LOGGER.info(' %s' % dump.path)
- sizes = dict((c, 0) for c in policy.components)
-
- PolicyCommands._accumulate(dump, policy, bucket_set, sizes)
-
- sizes['mmap-no-log'] = (
- dump.global_stat('profiled-mmap_committed') -
- sizes['mmap-total-log'])
- sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
- sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
-
- sizes['tc-no-log'] = (
- dump.global_stat('profiled-malloc_committed') -
- sizes['tc-total-log'])
- sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
- sizes['tc-unused'] = (
- sizes['mmap-tcmalloc'] -
- dump.global_stat('profiled-malloc_committed'))
- sizes['tc-total'] = sizes['mmap-tcmalloc']
-
- for key, value in {
- 'total': 'total_committed',
- 'filemapped': 'file_committed',
- 'file-exec': 'file-exec_committed',
- 'file-nonexec': 'file-nonexec_committed',
- 'anonymous': 'anonymous_committed',
- 'stack': 'stack_committed',
- 'other': 'other_committed',
- 'unhooked-absent': 'nonprofiled-absent_committed',
- 'unhooked-anonymous': 'nonprofiled-anonymous_committed',
- 'unhooked-file-exec': 'nonprofiled-file-exec_committed',
- 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',
- 'unhooked-stack': 'nonprofiled-stack_committed',
- 'unhooked-other': 'nonprofiled-other_committed',
- 'total-vm': 'total_virtual',
- 'filemapped-vm': 'file_virtual',
- 'anonymous-vm': 'anonymous_virtual',
- 'other-vm': 'other_virtual' }.iteritems():
- if key in sizes:
- sizes[key] = dump.global_stat(value)
-
- if 'mustbezero' in sizes:
- removed_list = (
- 'profiled-mmap_committed',
- 'nonprofiled-absent_committed',
- 'nonprofiled-anonymous_committed',
- 'nonprofiled-file-exec_committed',
- 'nonprofiled-file-nonexec_committed',
- 'nonprofiled-stack_committed',
- 'nonprofiled-other_committed')
- sizes['mustbezero'] = (
- dump.global_stat('total_committed') -
- sum(dump.global_stat(removed) for removed in removed_list))
- if 'total-exclude-profiler' in sizes:
- sizes['total-exclude-profiler'] = (
- dump.global_stat('total_committed') -
- (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
- if 'hour' in sizes:
- sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
- if 'minute' in sizes:
- sizes['minute'] = (dump.time - first_dump_time) / 60.0
- if 'second' in sizes:
- sizes['second'] = dump.time - first_dump_time
-
- return sizes
-
- @staticmethod
- def _accumulate(dump, policy, bucket_set, sizes):
- for line in dump.iter_stacktrace:
- words = line.split()
- bucket = bucket_set.get(int(words[BUCKET_ID]))
- component_match = policy.find(bucket)
- sizes[component_match] += int(words[COMMITTED])
-
- if component_match.startswith('tc-'):
- sizes['tc-total-log'] += int(words[COMMITTED])
- elif component_match.startswith('mmap-'):
- sizes['mmap-total-log'] += int(words[COMMITTED])
- else:
- sizes['other-total-log'] += int(words[COMMITTED])
-
-
-class CSVCommand(PolicyCommands):
- def __init__(self):
- super(CSVCommand, self).__init__('csv')
-
- def do(self, sys_argv):
- policy_set, dumps, bucket_set = self._set_up(sys_argv)
- return self._output(policy_set, dumps, bucket_set, sys.stdout)
-
- def _output(self, policy_set, dumps, bucket_set, out):
- max_components = 0
- for label in policy_set:
- max_components = max(max_components, len(policy_set[label].components))
-
- for label in sorted(policy_set):
- components = policy_set[label].components
- if len(policy_set) > 1:
- out.write('%s%s\n' % (label, ',' * (max_components - 1)))
- out.write('%s%s\n' % (
- ','.join(components), ',' * (max_components - len(components))))
-
- LOGGER.info('Applying a policy %s to...' % label)
- for dump in dumps:
- component_sizes = self._apply_policy(
- dump, policy_set[label], bucket_set, dumps[0].time)
- s = []
- for c in components:
- if c in ('hour', 'minute', 'second'):
- s.append('%05.5f' % (component_sizes[c]))
- else:
- s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
- out.write('%s%s\n' % (
- ','.join(s), ',' * (max_components - len(components))))
-
- bucket_set.clear_component_cache()
-
- return 0
-
-
-class JSONCommand(PolicyCommands):
- def __init__(self):
- super(JSONCommand, self).__init__('json')
-
- def do(self, sys_argv):
- policy_set, dumps, bucket_set = self._set_up(sys_argv)
- return self._output(policy_set, dumps, bucket_set, sys.stdout)
-
- def _output(self, policy_set, dumps, bucket_set, out):
- json_base = {
- 'version': 'JSON_DEEP_2',
- 'policies': {},
- }
-
- for label in sorted(policy_set):
- json_base['policies'][label] = {
- 'legends': policy_set[label].components,
- 'snapshots': [],
- }
-
- LOGGER.info('Applying a policy %s to...' % label)
- for dump in dumps:
- component_sizes = self._apply_policy(
- dump, policy_set[label], bucket_set, dumps[0].time)
- component_sizes['dump_path'] = dump.path
- component_sizes['dump_time'] = datetime.fromtimestamp(
- dump.time).strftime('%Y-%m-%d %H:%M:%S')
- json_base['policies'][label]['snapshots'].append(component_sizes)
-
- bucket_set.clear_component_cache()
-
- json.dump(json_base, out, indent=2, sort_keys=True)
-
- return 0
-
-
-class ListCommand(PolicyCommands):
- def __init__(self):
- super(ListCommand, self).__init__('list')
-
- def do(self, sys_argv):
- policy_set, dumps, bucket_set = self._set_up(sys_argv)
- return self._output(policy_set, dumps, bucket_set, sys.stdout)
-
- def _output(self, policy_set, dumps, bucket_set, out):
- for label in sorted(policy_set):
- LOGGER.info('Applying a policy %s to...' % label)
- for dump in dumps:
- component_sizes = self._apply_policy(
- dump, policy_set[label], bucket_set, dump.time)
- out.write('%s for %s:\n' % (label, dump.path))
- for c in policy_set[label].components:
- if c in ['hour', 'minute', 'second']:
- out.write('%40s %12.3f\n' % (c, component_sizes[c]))
- else:
- out.write('%40s %12d\n' % (c, component_sizes[c]))
-
- bucket_set.clear_component_cache()
-
- return 0
-
-
-class ExpandCommand(Command):
- def __init__(self):
- super(ExpandCommand, self).__init__(
- 'Usage: %prog expand <dump> <policy> <component> <depth>')
-
- def do(self, sys_argv):
- options, args = self._parse_args(sys_argv, 4)
- dump_path = args[1]
- target_policy = args[2]
- component_name = args[3]
- depth = args[4]
- (bucket_set, dump) = Command.load_basic_files(dump_path, False)
- policy_set = PolicySet.load(self._parse_policy_list(target_policy))
-
- self._output(dump, policy_set[target_policy], bucket_set,
- component_name, int(depth), sys.stdout)
- return 0
-
- def _output(self, dump, policy, bucket_set, component_name, depth, out):
- """Prints all stacktraces in a given component of given depth.
-
- Args:
- dump: A Dump object.
- policy: A Policy object.
- bucket_set: A BucketSet object.
- component_name: A name of component for filtering.
- depth: An integer representing depth to be printed.
- out: An IO object to output.
- """
- sizes = {}
-
- ExpandCommand._accumulate(
- dump, policy, bucket_set, component_name, depth, sizes)
-
- sorted_sizes_list = sorted(
- sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
- total = 0
- for size_pair in sorted_sizes_list:
- out.write('%10d %s\n' % (size_pair[1], size_pair[0]))
- total += size_pair[1]
- LOGGER.info('total: %d\n' % total)
-
- @staticmethod
- def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):
- for line in dump.iter_stacktrace:
- words = line.split()
- bucket = bucket_set.get(int(words[BUCKET_ID]))
- component_match = policy.find(bucket)
- if component_match == component_name:
- stacktrace_sequence = ''
- if bucket.typeinfo:
- stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo
- stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name
- for stack in bucket.symbolized_stacktrace[
- 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]:
- stacktrace_sequence += stack + ' '
- if not stacktrace_sequence in sizes:
- sizes[stacktrace_sequence] = 0
- sizes[stacktrace_sequence] += int(words[COMMITTED])
-
-
-class PProfCommand(Command):
- def __init__(self):
- super(PProfCommand, self).__init__(
- 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
- self._parser.add_option('-c', '--component', type='string',
- dest='component',
- help='restrict to COMPONENT', metavar='COMPONENT')
-
- def do(self, sys_argv):
- options, args = self._parse_args(sys_argv, 2)
-
- dump_path = args[1]
- target_policy = args[2]
- component = options.component
-
- (bucket_set, dump) = Command.load_basic_files(dump_path, False)
- policy_set = PolicySet.load(self._parse_policy_list(target_policy))
-
- with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:
- maps_lines = maps_f.readlines()
- PProfCommand._output(
- dump, policy_set[target_policy], bucket_set, maps_lines, component,
- sys.stdout)
-
- return 0
-
- @staticmethod
- def _output(dump, policy, bucket_set, maps_lines, component_name, out):
- """Converts the heap profile dump so it can be processed by pprof.
-
- Args:
- dump: A Dump object.
- policy: A Policy object.
- bucket_set: A BucketSet object.
- maps_lines: A list of strings containing /proc/.../maps.
- component_name: A name of component for filtering.
- out: An IO object to output.
- """
- out.write('heap profile: ')
- com_committed, com_allocs = PProfCommand._accumulate(
- dump, policy, bucket_set, component_name)
-
- out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
- com_allocs, com_committed, com_allocs, com_committed))
-
- PProfCommand._output_stacktrace_lines(
- dump, policy, bucket_set, component_name, out)
-
- out.write('MAPPED_LIBRARIES:\n')
- for line in maps_lines:
- out.write(line)
-
- @staticmethod
- def _accumulate(dump, policy, bucket_set, component_name):
- """Accumulates size of committed chunks and the number of allocated chunks.
-
- Args:
- dump: A Dump object.
- policy: A Policy object.
- bucket_set: A BucketSet object.
- component_name: A name of component for filtering.
-
- Returns:
- Two integers which are the accumulated size of committed regions and the
- number of allocated chunks, respectively.
- """
- com_committed = 0
- com_allocs = 0
- for line in dump.iter_stacktrace:
- words = line.split()
- bucket = bucket_set.get(int(words[BUCKET_ID]))
- if (not bucket or
- (component_name and component_name != policy.find(bucket))):
- continue
-
- com_committed += int(words[COMMITTED])
- com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
-
- return com_committed, com_allocs
-
- @staticmethod
- def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):
- """Prints information of stacktrace lines for pprof.
-
- Args:
- dump: A Dump object.
- policy: A Policy object.
- bucket_set: A BucketSet object.
- component_name: A name of component for filtering.
- out: An IO object to output.
- """
- for line in dump.iter_stacktrace:
- words = line.split()
- bucket = bucket_set.get(int(words[BUCKET_ID]))
- if (not bucket or
- (component_name and component_name != policy.find(bucket))):
- continue
-
- out.write('%6d: %8s [%6d: %8s] @' % (
- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
- words[COMMITTED],
- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
- words[COMMITTED]))
- for address in bucket.stacktrace:
- out.write(' 0x%016x' % address)
- out.write('\n')
-
-
-def main():
- COMMANDS = {
- 'csv': CSVCommand,
- 'expand': ExpandCommand,
- 'json': JSONCommand,
- 'list': ListCommand,
- 'pprof': PProfCommand,
- 'stacktrace': StacktraceCommand,
- }
-
- if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
- sys.stderr.write("""Usage: %s <command> [options] [<args>]
-
-Commands:
- csv Classify memory usage in CSV
- expand Show all stacktraces contained in the specified component
- json Classify memory usage in JSON
- list Classify memory usage in simple listing format
- pprof Format the profile dump so that it can be processed by pprof
- stacktrace Convert runtime addresses to symbol names
-
-Quick Reference:
- dmprof csv [-p POLICY] <first-dump>
- dmprof expand <dump> <policy> <component> <depth>
- dmprof json [-p POLICY] <first-dump>
- dmprof list [-p POLICY] <first-dump>
- dmprof pprof [-c COMPONENT] <dump> <policy>
- dmprof stacktrace <dump>
-""" % (sys.argv[0]))
- sys.exit(1)
- action = sys.argv.pop(1)
-
- LOGGER.setLevel(logging.DEBUG)
- handler = logging.StreamHandler()
- handler.setLevel(logging.INFO)
- formatter = logging.Formatter('%(message)s')
- handler.setFormatter(formatter)
- LOGGER.addHandler(handler)
-
- try:
- errorcode = COMMANDS[action]().do(sys.argv)
- except ParsingException, e:
- errorcode = 1
- sys.stderr.write('Exit by parsing error: %s\n' % e)
-
- return errorcode
-
-
-if __name__ == '__main__':
- sys.exit(main())
+PYTHONPATH=$BASEDIR/../python/google python \
+ "$BASEDIR/dmprof.py" $ARGV

Powered by Google App Engine
This is Rietveld 408576698