tools/deep_memory_profiler/dmprof - Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler.

Unified Diff: tools/deep_memory_profiler/dmprof

Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/deep_memory_profiler/dmprof

diff --git a/tools/deep_memory_profiler/dmprof b/tools/deep_memory_profiler/dmprof

index 1951838c4f75571e8c5f5bb967f9f83c23bc2ed6..a43f9f663af06b898c6a14bdae989077eba28104 100755

--- a/tools/deep_memory_profiler/dmprof

+++ b/tools/deep_memory_profiler/dmprof

@@ -1,1341 +1,12 @@

-#!/usr/bin/env python

+#!/usr/bin/env bash

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

-"""The deep heap profiler script for Chrome."""

+# Re-direct the arguments to dmprof.py.

-from datetime import datetime

-import json

-import logging

-import optparse

-import os

-import re

-import shutil

-import subprocess

-import sys

-import tempfile

+BASEDIR=`dirname $0`

+ARGV="$@"

-BASE_PATH = os.path.dirname(os.path.abspath(__file__))

-FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

- BASE_PATH, os.pardir, 'find_runtime_symbols')

-sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

-from find_runtime_symbols import find_runtime_symbols_list

-from find_runtime_symbols import find_runtime_typeinfo_symbols_list

-from find_runtime_symbols import RuntimeSymbolsInProcess

-from prepare_symbol_info import prepare_symbol_info

-BUCKET_ID = 5

-VIRTUAL = 0

-COMMITTED = 1

-ALLOC_COUNT = 2

-FREE_COUNT = 3

-NULL_REGEX = re.compile('')

-LOGGER = logging.getLogger('dmprof')

-POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')

-FUNCTION_ADDRESS = 'function'

-TYPEINFO_ADDRESS = 'typeinfo'

-# Heap Profile Dump versions

-# DUMP_DEEP_[1-4] are obsolete.

-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

-DUMP_DEEP_1 = 'DUMP_DEEP_1'

-DUMP_DEEP_2 = 'DUMP_DEEP_2'

-DUMP_DEEP_3 = 'DUMP_DEEP_3'

-DUMP_DEEP_4 = 'DUMP_DEEP_4'

-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

-# malloc and mmap are identified in bucket files.

-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

-DUMP_DEEP_5 = 'DUMP_DEEP_5'

-# Heap Profile Policy versions

-# POLICY_DEEP_1 DOES NOT include allocation_type columns.

-# mmap regions are distincted w/ mmap frames in the pattern column.

-POLICY_DEEP_1 = 'POLICY_DEEP_1'

-# POLICY_DEEP_2 DOES include allocation_type columns.

-# mmap regions are distincted w/ the allocation_type column.

-POLICY_DEEP_2 = 'POLICY_DEEP_2'

-# POLICY_DEEP_3 is in JSON format.

-POLICY_DEEP_3 = 'POLICY_DEEP_3'

-# POLICY_DEEP_3 contains typeinfo.

-POLICY_DEEP_4 = 'POLICY_DEEP_4'

-class EmptyDumpException(Exception):

- def __init__(self, value):

- self.value = value

- def __str__(self):

- return repr(self.value)

-class ParsingException(Exception):

- def __init__(self, value):

- self.value = value

- def __str__(self):

- return repr(self.value)

-class InvalidDumpException(ParsingException):

- def __init__(self, value):

- self.value = value

- def __str__(self):

- return "invalid heap profile dump: %s" % repr(self.value)

-class ObsoleteDumpVersionException(ParsingException):

- def __init__(self, value):

- self.value = value

- def __str__(self):

- return "obsolete heap profile dump version: %s" % repr(self.value)

-def skip_while(index, max_index, skipping_condition):

- Returns:

- A pair of an integer indicating a line number after skipped, and a

- boolean value which is True if found a line which skipping_condition

- is False for.

- """

- while skipping_condition(index):

- index += 1

- if index >= max_index:

- return index, False

- return index, True

-class SymbolMapping(object):

- """Manages all symbol information on process memory mapping.

- The symbol information consists of all symbols in the binary files obtained

- by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps,

- nm and so on. It is minimum requisite information to run dmprof.

- The information is prepared in a directory "|prefix|.symmap" by prepare().

- The directory is more portable than Chromium binaries. Users can save it

- and re-analyze with the portable one.

- Note that loading the symbol information takes a long time. It is very big

- in general -- it doesn't know which functions are called and which types are

- used actually. Used symbols can be cached in the "SymbolCache" class.

- """

- def __init__(self, prefix):

- self._prefix = prefix

- self._prepared_symbol_mapping_path = None

- self._loaded_symbol_mapping = None

- def prepare(self):

- """Extracts symbol mapping from binaries and prepares it to use.

- The symbol mapping is stored in a directory whose name is stored in

- |self._prepared_symbol_mapping_path|.

- Returns:

- True if succeeded.

- """

- LOGGER.info('Preparing symbol mapping...')

- self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info(

- self._prefix + '.maps', self._prefix + '.symmap', True)

- if self._prepared_symbol_mapping_path:

- LOGGER.info(' Prepared symbol mapping.')

- if used_tempdir:

- LOGGER.warn(' Using a temporary directory for symbol mapping.')

- LOGGER.warn(' Delete it by yourself.')

- LOGGER.warn(' Or, move the directory by yourself to use it later.')

- return True

- else:

- LOGGER.warn(' Failed to prepare symbol mapping.')

- return False

- def get(self):

- """Returns symbol mapping.

- Returns:

- Loaded symbol mapping. None if failed.

- """

- if not self._prepared_symbol_mapping_path and not self.prepare():

- return None

- if not self._loaded_symbol_mapping:

- LOGGER.info('Loading symbol mapping...')

- self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load(

- self._prepared_symbol_mapping_path)

- return self._loaded_symbol_mapping

-class SymbolCache(object):

- """Manages cache of used symbol mapping.

- The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for

- examples), and "SymbolCache" just caches "how dmprof interprets the address"

- to speed-up another analysis for the same binary and profile dumps.

- Handling all symbol mapping takes a long time in "SymbolMapping".

- "SymbolCache" caches used symbol mapping on memory and in files.

- """

- def __init__(self, prefix):

- self._prefix = prefix

- self._symbol_cache_paths = {

- FUNCTION_ADDRESS: prefix + '.funcsym',

- TYPEINFO_ADDRESS: prefix + '.typesym',

- }

- self._find_runtime_symbols_functions = {

- FUNCTION_ADDRESS: find_runtime_symbols_list,

- TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list,

- }

- self._symbol_caches = {

- FUNCTION_ADDRESS: {},

- TYPEINFO_ADDRESS: {},

- }

- def update(self, address_type, bucket_set, symbol_mapping):

- """Updates symbol mapping on memory and in a ".*sym" cache file.

- It reads cached symbol mapping from a ".*sym" file if it exists. Then,

- it looks up unresolved addresses from a given "SymbolMapping". Finally,

- both symbol mappings on memory and in the ".*sym" cache file are updated.

- Symbol files are formatted as follows:

- <Address> <Symbol>

- ...

- Args:

- address_type: A type of addresses to update. It should be one of

- FUNCTION_ADDRESS or TYPEINFO_ADDRESS.

- bucket_set: A BucketSet object.

- symbol_mapping: A SymbolMapping object.

- """

- self._load(address_type)

- unresolved_addresses = sorted(

- address for address in bucket_set.iter_addresses(address_type)

- if address not in self._symbol_caches[address_type])

- if not unresolved_addresses:

- LOGGER.info('No need to resolve any more addresses.')

- return

- symbol_cache_path = self._symbol_cache_paths[address_type]

- with open(symbol_cache_path, mode='a+') as symbol_f:

- LOGGER.info('Loading %d unresolved addresses.' %

- len(unresolved_addresses))

- symbol_list = self._find_runtime_symbols_functions[address_type](

- symbol_mapping.get(), unresolved_addresses)

- for address, symbol in zip(unresolved_addresses, symbol_list):

- stripped_symbol = symbol.strip() or '??'

- self._symbol_caches[address_type][address] = stripped_symbol

- symbol_f.write('%x %s\n' % (address, stripped_symbol))

- def lookup(self, address_type, address):

- """Looks up a symbol for a given |address|.

- Args:

- address_type: A type of addresses to lookup. It should be one of

- FUNCTION_ADDRESS or TYPEINFO_ADDRESS.

- address: An integer that represents an address.

- Returns:

- A string that represents a symbol.

- """

- return self._symbol_caches[address_type].get(address)

- def _load(self, address_type):

- symbol_cache_path = self._symbol_cache_paths[address_type]

- try:

- with open(symbol_cache_path, mode='r') as symbol_f:

- for line in symbol_f:

- items = line.rstrip().split(None, 1)

- if len(items) == 1:

- items.append('??')

- self._symbol_caches[address_type][int(items[0], 16)] = items[1]

- LOGGER.info('Loaded %d entries from symbol cache.' %

- len(self._symbol_caches[address_type]))

- except IOError as e:

- LOGGER.info('No valid symbol cache file is found.')

-class Rule(object):

- """Represents one matching rule in a policy file."""

- def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None):

- self._name = name

- self._mmap = mmap

- self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z')

- if typeinfo_pattern:

- self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')

- else:

- self._typeinfo_pattern = None

- @property

- def name(self):

- return self._name

- @property

- def mmap(self):

- return self._mmap

- @property

- def stacktrace_pattern(self):

- return self._stacktrace_pattern

- @property

- def typeinfo_pattern(self):

- return self._typeinfo_pattern

-class Policy(object):

- """Represents a policy, a content of a policy file."""

- def __init__(self, rules, version, components):

- self._rules = rules

- self._version = version

- self._components = components

- @property

- def rules(self):

- return self._rules

- @property

- def version(self):

- return self._version

- @property

- def components(self):

- return self._components

- def find(self, bucket):

- """Finds a matching component name which a given |bucket| belongs to.

- Args:

- bucket: A Bucket object to be searched for.

- Returns:

- A string representing a component name.

- """

- if not bucket:

- return 'no-bucket'

- if bucket.component_cache:

- return bucket.component_cache

- stacktrace = bucket.symbolized_joined_stacktrace

- typeinfo = bucket.symbolized_typeinfo

- if typeinfo.startswith('0x'):

- typeinfo = bucket.typeinfo_name

- for rule in self._rules:

- if (bucket.mmap == rule.mmap and

- rule.stacktrace_pattern.match(stacktrace) and

- (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):

- bucket.component_cache = rule.name

- return rule.name

- assert False

- @staticmethod

- def load(filename, format):

- """Loads a policy file of |filename| in a |format|.

- Args:

- filename: A filename to be loaded.

- format: A string to specify a format of the file. Only 'json' is

- supported for now.

- Returns:

- A loaded Policy object.

- """

- with open(os.path.join(BASE_PATH, filename)) as policy_f:

- return Policy.parse(policy_f, format)

- @staticmethod

- def parse(policy_f, format):

- """Parses a policy file content in a |format|.

- Args:

- policy_f: An IO object to be loaded.

- format: A string to specify a format of the file. Only 'json' is

- supported for now.

- Returns:

- A loaded Policy object.

- """

- if format == 'json':

- return Policy._parse_json(policy_f)

- else:

- return None

- @staticmethod

- def _parse_json(policy_f):

- """Parses policy file in json format.

- A policy file contains component's names and their stacktrace pattern

- written in regular expression. Those patterns are matched against each

- symbols of each stacktraces in the order written in the policy file

- Args:

- policy_f: A File/IO object to read.

- Returns:

- A loaded policy object.

- """

- policy = json.load(policy_f)

- rules = []

- for rule in policy['rules']:

- rules.append(Rule(

- rule['name'],

- rule['allocator'] == 'mmap',

- rule['stacktrace'],

- rule['typeinfo'] if 'typeinfo' in rule else None))

- return Policy(rules, policy['version'], policy['components'])

-class PolicySet(object):

- """Represents a set of policies."""

- def __init__(self, policy_directory):

- self._policy_directory = policy_directory

- @staticmethod

- def load(labels=None):

- """Loads a set of policies via the "default policy directory".

- The "default policy directory" contains pairs of policies and their labels.

- For example, a policy "policy.l0.json" is labeled "l0" in the default

- policy directory "policies.json".

- All policies in the directory are loaded by default. Policies can be

- limited by |labels|.

- Args:

- labels: An array that contains policy labels to be loaded.

- Returns:

- A PolicySet object.

- """

- default_policy_directory = PolicySet._load_default_policy_directory()

- if labels:

- specified_policy_directory = {}

- for label in labels:

- if label in default_policy_directory:

- specified_policy_directory[label] = default_policy_directory[label]

- # TODO(dmikurube): Load an un-labeled policy file.

- return PolicySet._load_policies(specified_policy_directory)

- else:

- return PolicySet._load_policies(default_policy_directory)

- def __len__(self):

- return len(self._policy_directory)

- def __iter__(self):

- for label in self._policy_directory:

- yield label

- def __getitem__(self, label):

- return self._policy_directory[label]

- @staticmethod

- def _load_default_policy_directory():

- with open(POLICIES_JSON_PATH, mode='r') as policies_f:

- default_policy_directory = json.load(policies_f)

- return default_policy_directory

- @staticmethod

- def _load_policies(directory):

- LOGGER.info('Loading policy files.')

- policies = {}

- for label in directory:

- LOGGER.info(' %s: %s' % (label, directory[label]['file']))

- loaded = Policy.load(directory[label]['file'], directory[label]['format'])

- if loaded:

- policies[label] = loaded

- return PolicySet(policies)

-class Bucket(object):

- """Represents a bucket, which is a unit of memory block classification."""

- def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name):

- self._stacktrace = stacktrace

- self._mmap = mmap

- self._typeinfo = typeinfo

- self._typeinfo_name = typeinfo_name

- self._symbolized_stacktrace = stacktrace

- self._symbolized_joined_stacktrace = ''

- self._symbolized_typeinfo = typeinfo_name

- self.component_cache = ''

- def symbolize(self, symbol_cache):

- """Makes a symbolized stacktrace and typeinfo with |symbol_cache|.

- Args:

- symbol_cache: A SymbolCache object.

- """

- # TODO(dmikurube): Fill explicitly with numbers if symbol not found.

- self._symbolized_stacktrace = [

- symbol_cache.lookup(FUNCTION_ADDRESS, address)

- for address in self._stacktrace]

- self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace)

- if not self._typeinfo:

- self._symbolized_typeinfo = 'no typeinfo'

- else:

- self._symbolized_typeinfo = symbol_cache.lookup(

- TYPEINFO_ADDRESS, self._typeinfo)

- if not self._symbolized_typeinfo:

- self._symbolized_typeinfo = 'no typeinfo'

- def clear_component_cache(self):

- self.component_cache = ''

- @property

- def stacktrace(self):

- return self._stacktrace

- @property

- def mmap(self):

- return self._mmap

- @property

- def typeinfo(self):

- return self._typeinfo

- @property

- def typeinfo_name(self):

- return self._typeinfo_name

- @property

- def symbolized_stacktrace(self):

- return self._symbolized_stacktrace

- @property

- def symbolized_joined_stacktrace(self):

- return self._symbolized_joined_stacktrace

- @property

- def symbolized_typeinfo(self):

- return self._symbolized_typeinfo

-class BucketSet(object):

- """Represents a set of bucket."""

- def __init__(self):

- self._buckets = {}

- self._addresses = {

- FUNCTION_ADDRESS: set(),

- TYPEINFO_ADDRESS: set(),

- }

- @staticmethod

- def load(prefix):

- """Loads all related bucket files.

- Args:

- prefix: A prefix string for bucket file names.

- Returns:

- A loaded BucketSet object.

- """

- LOGGER.info('Loading bucket files.')

- bucket_set = BucketSet()

- n = 0

- while True:

- path = '%s.%04d.buckets' % (prefix, n)

- if not os.path.exists(path):

- if n > 10:

- break

- n += 1

- continue

- LOGGER.info(' %s' % path)

- with open(path, 'r') as f:

- bucket_set._load_file(f)

- n += 1

- return bucket_set

- def _load_file(self, bucket_f):

- for line in bucket_f:

- words = line.split()

- typeinfo = None

- typeinfo_name = ''

- stacktrace_begin = 2

- for index, word in enumerate(words):

- if index < 2:

- continue

- if word[0] == 't':

- typeinfo = int(word[1:], 16)

- self._addresses[TYPEINFO_ADDRESS].add(typeinfo)

- elif word[0] == 'n':

- typeinfo_name = word[1:]

- else:

- stacktrace_begin = index

- break

- stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]

- for frame in stacktrace:

- self._addresses[FUNCTION_ADDRESS].add(frame)

- self._buckets[int(words[0])] = Bucket(

- stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name)

- def __iter__(self):

- for bucket_id, bucket_content in self._buckets.iteritems():

- yield bucket_id, bucket_content

- def __getitem__(self, bucket_id):

- return self._buckets[bucket_id]

- def get(self, bucket_id):

- return self._buckets.get(bucket_id)

- def symbolize(self, symbol_cache):

- for bucket_content in self._buckets.itervalues():

- bucket_content.symbolize(symbol_cache)

- def clear_component_cache(self):

- for bucket_content in self._buckets.itervalues():

- bucket_content.clear_component_cache()

- def iter_addresses(self, address_type):

- for function in self._addresses[address_type]:

- yield function

-class Dump(object):

- """Represents a heap profile dump."""

- def __init__(self):

- self._path = ''

- self._time = None

- self._stacktrace_lines = []

- self._global_stats = {} # used only in apply_policy

- self._version = ''

- self._lines = []

- @property

- def path(self):

- return self._path

- @property

- def time(self):

- return self._time

- @property

- def iter_stacktrace(self):

- for line in self._stacktrace_lines:

- yield line

- def global_stat(self, name):

- return self._global_stats[name]

- @staticmethod

- def load(path, log_header='Loading a heap profile dump: '):

- """Loads a heap profile dump.

- Args:

- path: A file path string to load.

- log_header: A preceding string for log messages.

- Returns:

- A loaded Dump object.

- Raises:

- ParsingException for invalid heap profile dumps.

- """

- dump = Dump()

- dump._path = path

- dump._time = os.stat(dump._path).st_mtime

- dump._version = ''

- dump._lines = [line for line in open(dump._path, 'r')

- if line and not line.startswith('#')]

- try:

- dump._version, ln = dump._parse_version()

- dump._parse_global_stats()

- dump._extract_stacktrace_lines(ln)

- except EmptyDumpException:

- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path))

- except ParsingException, e:

- LOGGER.error('%s%s ...error %s' % (log_header, path, e))

- raise

- else:

- LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version))

- return dump

- def _parse_version(self):

- """Parses a version string in self._lines.

- Returns:

- A pair of (a string representing a version of the stacktrace dump,

- and an integer indicating a line number next to the version string).

- Raises:

- ParsingException for invalid dump versions.

- """

- version = ''

- # Skip until an identifiable line.

- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

- if not self._lines:

- raise EmptyDumpException('Empty heap dump file.')

- (ln, found) = skip_while(

- 0, len(self._lines),

- lambda n: not self._lines[n].startswith(headers))

- if not found:

- raise InvalidDumpException('No version header.')

- # Identify a version.

- if self._lines[ln].startswith('heap profile: '):

- version = self._lines[ln][13:].strip()

- if version == DUMP_DEEP_5:

- (ln, _) = skip_while(

- ln, len(self._lines),

- lambda n: self._lines[n] != 'STACKTRACES:\n')

- elif version in DUMP_DEEP_OBSOLETE:

- raise ObsoleteDumpVersionException(version)

- else:

- raise InvalidDumpException('Invalid version: %s' % version)

- elif self._lines[ln] == 'STACKTRACES:\n':

- raise ObsoleteDumpVersionException(DUMP_DEEP_1)

- elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

- raise ObsoleteDumpVersionException(DUMP_DEEP_2)

- return (version, ln)

- def _parse_global_stats(self):

- """Parses lines in self._lines as global stats."""

- (ln, _) = skip_while(

- 0, len(self._lines),

- lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

- global_stat_names = [

- 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

- 'nonprofiled-absent', 'nonprofiled-anonymous',

- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

- 'nonprofiled-stack', 'nonprofiled-other',

- 'profiled-mmap', 'profiled-malloc']

- for prefix in global_stat_names:

- (ln, _) = skip_while(

- ln, len(self._lines),

- lambda n: self._lines[n].split()[0] != prefix)

- words = self._lines[ln].split()

- self._global_stats[prefix + '_virtual'] = int(words[-2])

- self._global_stats[prefix + '_committed'] = int(words[-1])

- def _extract_stacktrace_lines(self, line_number):

- """Extracts the position of stacktrace lines.

- Valid stacktrace lines are stored into self._stacktrace_lines.

- Args:

- line_number: A line number to start parsing in lines.

- Raises:

- ParsingException for invalid dump versions.

- """

- if self._version == DUMP_DEEP_5:

- (line_number, _) = skip_while(

- line_number, len(self._lines),

- lambda n: not self._lines[n].split()[0].isdigit())

- stacktrace_start = line_number

- (line_number, _) = skip_while(

- line_number, len(self._lines),

- lambda n: self._check_stacktrace_line(self._lines[n]))

- self._stacktrace_lines = self._lines[stacktrace_start:line_number]

- elif self._version in DUMP_DEEP_OBSOLETE:

- raise ObsoleteDumpVersionException(self._version)

- else:

- raise InvalidDumpException('Invalid version: %s' % self._version)

- @staticmethod

- def _check_stacktrace_line(stacktrace_line):

- """Checks if a given stacktrace_line is valid as stacktrace.

- Args:

- stacktrace_line: A string to be checked.

- Returns:

- True if the given stacktrace_line is valid.

- """

- words = stacktrace_line.split()

- if len(words) < BUCKET_ID + 1:

- return False

- if words[BUCKET_ID - 1] != '@':

- return False

- return True

-class DumpList(object):

- """Represents a sequence of heap profile dumps."""

- def __init__(self, dump_list):

- self._dump_list = dump_list

- @staticmethod

- def load(path_list):

- LOGGER.info('Loading heap dump profiles.')

- dump_list = []

- for path in path_list:

- dump_list.append(Dump.load(path, ' '))

- return DumpList(dump_list)

- def __len__(self):

- return len(self._dump_list)

- def __iter__(self):

- for dump in self._dump_list:

- yield dump

- def __getitem__(self, index):

- return self._dump_list[index]

-class Command(object):

- """Subclasses are a subcommand for this executable.

- See COMMANDS in main().

- """

- def __init__(self, usage):

- self._parser = optparse.OptionParser(usage)

- @staticmethod

- def load_basic_files(dump_path, multiple):

- prefix = Command._find_prefix(dump_path)

- symbol_mapping = SymbolMapping(prefix)

- symbol_mapping.prepare()

- bucket_set = BucketSet.load(prefix)

- if multiple:

- dump_list = DumpList.load(Command._find_all_dumps(dump_path))

- else:

- dump = Dump.load(dump_path)

- symbol_cache = SymbolCache(prefix)

- symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping)

- symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping)

- bucket_set.symbolize(symbol_cache)

- if multiple:

- return (bucket_set, dump_list)

- else:

- return (bucket_set, dump)

- @staticmethod

- def _find_prefix(path):

- return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

- @staticmethod

- def _find_all_dumps(dump_path):

- prefix = Command._find_prefix(dump_path)

- dump_path_list = [dump_path]

- n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

- n += 1

- while True:

- p = '%s.%04d.heap' % (prefix, n)

- if os.path.exists(p):

- dump_path_list.append(p)

- else:

- break

- n += 1

- return dump_path_list

- def _parse_args(self, sys_argv, required):

- options, args = self._parser.parse_args(sys_argv)

- if len(args) != required + 1:

- self._parser.error('needs %d argument(s).\n' % required)

- return None

- return (options, args)

- def _parse_policy_list(self, options_policy):

- if options_policy:

- return options_policy.split(',')

- else:

- return None

-class StacktraceCommand(Command):

- def __init__(self):

- super(StacktraceCommand, self).__init__(

- 'Usage: %prog stacktrace <dump>')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- StacktraceCommand._output(dump, bucket_set, sys.stdout)

- return 0

- @staticmethod

- def _output(dump, bucket_set, out):

- """Outputs a given stacktrace.

- Args:

- bucket_set: A BucketSet object.

- out: A file object to output.

- """

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket:

- continue

- for i in range(0, BUCKET_ID - 1):

- out.write(words[i] + ' ')

- for frame in bucket.symbolized_stacktrace:

- out.write(frame + ' ')

- out.write('\n')

-class PolicyCommands(Command):

- def __init__(self, command):

- super(PolicyCommands, self).__init__(

- 'Usage: %%prog %s [-p POLICY] <first-dump>' % command)

- self._parser.add_option('-p', '--policy', type='string', dest='policy',

- help='profile with POLICY', metavar='POLICY')

- def _set_up(self, sys_argv):

- options, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- (bucket_set, dumps) = Command.load_basic_files(dump_path, True)

- policy_set = PolicySet.load(self._parse_policy_list(options.policy))

- return policy_set, dumps, bucket_set

- def _apply_policy(self, dump, policy, bucket_set, first_dump_time):

- """Aggregates the total memory size of each component.

- Iterate through all stacktraces and attribute them to one of the components

- based on the policy. It is important to apply policy in right order.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- first_dump_time: An integer representing time when the first dump is

- dumped.

- Returns:

- A dict mapping components and their corresponding sizes.

- """

- LOGGER.info(' %s' % dump.path)

- sizes = dict((c, 0) for c in policy.components)

- PolicyCommands._accumulate(dump, policy, bucket_set, sizes)

- sizes['mmap-no-log'] = (

- dump.global_stat('profiled-mmap_committed') -

- sizes['mmap-total-log'])

- sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')

- sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')

- sizes['tc-no-log'] = (

- dump.global_stat('profiled-malloc_committed') -

- sizes['tc-total-log'])

- sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')

- sizes['tc-unused'] = (

- sizes['mmap-tcmalloc'] -

- dump.global_stat('profiled-malloc_committed'))

- sizes['tc-total'] = sizes['mmap-tcmalloc']

- for key, value in {

- 'total': 'total_committed',

- 'filemapped': 'file_committed',

- 'file-exec': 'file-exec_committed',

- 'file-nonexec': 'file-nonexec_committed',

- 'anonymous': 'anonymous_committed',

- 'stack': 'stack_committed',

- 'other': 'other_committed',

- 'unhooked-absent': 'nonprofiled-absent_committed',

- 'unhooked-anonymous': 'nonprofiled-anonymous_committed',

- 'unhooked-file-exec': 'nonprofiled-file-exec_committed',

- 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',

- 'unhooked-stack': 'nonprofiled-stack_committed',

- 'unhooked-other': 'nonprofiled-other_committed',

- 'total-vm': 'total_virtual',

- 'filemapped-vm': 'file_virtual',

- 'anonymous-vm': 'anonymous_virtual',

- 'other-vm': 'other_virtual' }.iteritems():

- if key in sizes:

- sizes[key] = dump.global_stat(value)

- if 'mustbezero' in sizes:

- removed_list = (

- 'profiled-mmap_committed',

- 'nonprofiled-absent_committed',

- 'nonprofiled-anonymous_committed',

- 'nonprofiled-file-exec_committed',

- 'nonprofiled-file-nonexec_committed',

- 'nonprofiled-stack_committed',

- 'nonprofiled-other_committed')

- sizes['mustbezero'] = (

- dump.global_stat('total_committed') -

- sum(dump.global_stat(removed) for removed in removed_list))

- if 'total-exclude-profiler' in sizes:

- sizes['total-exclude-profiler'] = (

- dump.global_stat('total_committed') -

- (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))

- if 'hour' in sizes:

- sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0

- if 'minute' in sizes:

- sizes['minute'] = (dump.time - first_dump_time) / 60.0

- if 'second' in sizes:

- sizes['second'] = dump.time - first_dump_time

- return sizes

- @staticmethod

- def _accumulate(dump, policy, bucket_set, sizes):

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- component_match = policy.find(bucket)

- sizes[component_match] += int(words[COMMITTED])

- if component_match.startswith('tc-'):

- sizes['tc-total-log'] += int(words[COMMITTED])

- elif component_match.startswith('mmap-'):

- sizes['mmap-total-log'] += int(words[COMMITTED])

- else:

- sizes['other-total-log'] += int(words[COMMITTED])

-class CSVCommand(PolicyCommands):

- def __init__(self):

- super(CSVCommand, self).__init__('csv')

- def do(self, sys_argv):

- policy_set, dumps, bucket_set = self._set_up(sys_argv)

- return self._output(policy_set, dumps, bucket_set, sys.stdout)

- def _output(self, policy_set, dumps, bucket_set, out):

- max_components = 0

- for label in policy_set:

- max_components = max(max_components, len(policy_set[label].components))

- for label in sorted(policy_set):

- components = policy_set[label].components

- if len(policy_set) > 1:

- out.write('%s%s\n' % (label, ',' * (max_components - 1)))

- out.write('%s%s\n' % (

- ','.join(components), ',' * (max_components - len(components))))

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = self._apply_policy(

- dump, policy_set[label], bucket_set, dumps[0].time)

- s = []

- for c in components:

- if c in ('hour', 'minute', 'second'):

- s.append('%05.5f' % (component_sizes[c]))

- else:

- s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

- out.write('%s%s\n' % (

- ','.join(s), ',' * (max_components - len(components))))

- bucket_set.clear_component_cache()

- return 0

-class JSONCommand(PolicyCommands):

- def __init__(self):

- super(JSONCommand, self).__init__('json')

- def do(self, sys_argv):

- policy_set, dumps, bucket_set = self._set_up(sys_argv)

- return self._output(policy_set, dumps, bucket_set, sys.stdout)

- def _output(self, policy_set, dumps, bucket_set, out):

- json_base = {

- 'version': 'JSON_DEEP_2',

- 'policies': {},

- }

- for label in sorted(policy_set):

- json_base['policies'][label] = {

- 'legends': policy_set[label].components,

- 'snapshots': [],

- }

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = self._apply_policy(

- dump, policy_set[label], bucket_set, dumps[0].time)

- component_sizes['dump_path'] = dump.path

- component_sizes['dump_time'] = datetime.fromtimestamp(

- dump.time).strftime('%Y-%m-%d %H:%M:%S')

- json_base['policies'][label]['snapshots'].append(component_sizes)

- bucket_set.clear_component_cache()

- json.dump(json_base, out, indent=2, sort_keys=True)

- return 0

-class ListCommand(PolicyCommands):

- def __init__(self):

- super(ListCommand, self).__init__('list')

- def do(self, sys_argv):

- policy_set, dumps, bucket_set = self._set_up(sys_argv)

- return self._output(policy_set, dumps, bucket_set, sys.stdout)

- def _output(self, policy_set, dumps, bucket_set, out):

- for label in sorted(policy_set):

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = self._apply_policy(

- dump, policy_set[label], bucket_set, dump.time)

- out.write('%s for %s:\n' % (label, dump.path))

- for c in policy_set[label].components:

- if c in ['hour', 'minute', 'second']:

- out.write('%40s %12.3f\n' % (c, component_sizes[c]))

- else:

- out.write('%40s %12d\n' % (c, component_sizes[c]))

- bucket_set.clear_component_cache()

- return 0

-class ExpandCommand(Command):

- def __init__(self):

- super(ExpandCommand, self).__init__(

- 'Usage: %prog expand <dump> <policy> <component> <depth>')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 4)

- dump_path = args[1]

- target_policy = args[2]

- component_name = args[3]

- depth = args[4]

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- policy_set = PolicySet.load(self._parse_policy_list(target_policy))

- self._output(dump, policy_set[target_policy], bucket_set,

- component_name, int(depth), sys.stdout)

- return 0

- def _output(self, dump, policy, bucket_set, component_name, depth, out):

- """Prints all stacktraces in a given component of given depth.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- depth: An integer representing depth to be printed.

- out: An IO object to output.

- """

- sizes = {}

- ExpandCommand._accumulate(

- dump, policy, bucket_set, component_name, depth, sizes)

- sorted_sizes_list = sorted(

- sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

- total = 0

- for size_pair in sorted_sizes_list:

- out.write('%10d %s\n' % (size_pair[1], size_pair[0]))

- total += size_pair[1]

- LOGGER.info('total: %d\n' % total)

- @staticmethod

- def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- component_match = policy.find(bucket)

- if component_match == component_name:

- stacktrace_sequence = ''

- if bucket.typeinfo:

- stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo

- stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name

- for stack in bucket.symbolized_stacktrace[

- 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]:

- stacktrace_sequence += stack + ' '

- if not stacktrace_sequence in sizes:

- sizes[stacktrace_sequence] = 0

- sizes[stacktrace_sequence] += int(words[COMMITTED])

-class PProfCommand(Command):

- def __init__(self):

- super(PProfCommand, self).__init__(

- 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

- self._parser.add_option('-c', '--component', type='string',

- dest='component',

- help='restrict to COMPONENT', metavar='COMPONENT')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 2)

- dump_path = args[1]

- target_policy = args[2]

- component = options.component

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- policy_set = PolicySet.load(self._parse_policy_list(target_policy))

- with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:

- maps_lines = maps_f.readlines()

- PProfCommand._output(

- dump, policy_set[target_policy], bucket_set, maps_lines, component,

- sys.stdout)

- return 0

- @staticmethod

- def _output(dump, policy, bucket_set, maps_lines, component_name, out):

- """Converts the heap profile dump so it can be processed by pprof.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- maps_lines: A list of strings containing /proc/.../maps.

- component_name: A name of component for filtering.

- out: An IO object to output.

- """

- out.write('heap profile: ')

- com_committed, com_allocs = PProfCommand._accumulate(

- dump, policy, bucket_set, component_name)

- out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

- com_allocs, com_committed, com_allocs, com_committed))

- PProfCommand._output_stacktrace_lines(

- dump, policy, bucket_set, component_name, out)

- out.write('MAPPED_LIBRARIES:\n')

- for line in maps_lines:

- out.write(line)

- @staticmethod

- def _accumulate(dump, policy, bucket_set, component_name):

- """Accumulates size of committed chunks and the number of allocated chunks.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- Returns:

- Two integers which are the accumulated size of committed regions and the

- number of allocated chunks, respectively.

- """

- com_committed = 0

- com_allocs = 0

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if (not bucket or

- (component_name and component_name != policy.find(bucket))):

- continue

- com_committed += int(words[COMMITTED])

- com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

- return com_committed, com_allocs

- @staticmethod

- def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):

- """Prints information of stacktrace lines for pprof.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- out: An IO object to output.

- """

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if (not bucket or

- (component_name and component_name != policy.find(bucket))):

- continue

- out.write('%6d: %8s [%6d: %8s] @' % (

- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

- words[COMMITTED],

- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

- words[COMMITTED]))

- for address in bucket.stacktrace:

- out.write(' 0x%016x' % address)

- out.write('\n')

-def main():

- COMMANDS = {

- 'csv': CSVCommand,

- 'expand': ExpandCommand,

- 'json': JSONCommand,

- 'list': ListCommand,

- 'pprof': PProfCommand,

- 'stacktrace': StacktraceCommand,

- }

- if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

- sys.stderr.write("""Usage: %s <command> [options] [<args>]

-Commands:

- csv Classify memory usage in CSV

- expand Show all stacktraces contained in the specified component

- json Classify memory usage in JSON

- list Classify memory usage in simple listing format

- pprof Format the profile dump so that it can be processed by pprof

- stacktrace Convert runtime addresses to symbol names

-Quick Reference:

- dmprof csv [-p POLICY] <first-dump>

- dmprof expand <dump> <policy> <component> <depth>

- dmprof json [-p POLICY] <first-dump>

- dmprof list [-p POLICY] <first-dump>

- dmprof pprof [-c COMPONENT] <dump> <policy>

- dmprof stacktrace <dump>

-""" % (sys.argv[0]))

- sys.exit(1)

- action = sys.argv.pop(1)

- LOGGER.setLevel(logging.DEBUG)

- handler = logging.StreamHandler()

- handler.setLevel(logging.INFO)

- formatter = logging.Formatter('%(message)s')

- handler.setFormatter(formatter)

- LOGGER.addHandler(handler)

- try:

- errorcode = COMMANDS[action]().do(sys.argv)

- except ParsingException, e:

- errorcode = 1

- sys.stderr.write('Exit by parsing error: %s\n' % e)

- return errorcode

-if __name__ == '__main__':

- sys.exit(main())

+PYTHONPATH=$BASEDIR/../python/google python \

+ "$BASEDIR/dmprof.py" $ARGV

« no previous file with comments | « tools/deep_memory_profiler/PRESUBMIT.py ('k') | tools/deep_memory_profiler/dmprof.py » ('j') | tools/deep_memory_profiler/dmprof.py » ('J')