tools/deep_memory_profiler/dmprof.py - Issue 19346002: Refactor dmprof: Split dmprof.py into modules.

Unified Diff: tools/deep_memory_profiler/dmprof.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/deep_memory_profiler/dmprof.py

diff --git a/tools/deep_memory_profiler/dmprof.py b/tools/deep_memory_profiler/dmprof.py

index 533cbb9ee9a0b8e7219266be160f92ca93ab412d..0cb030a32fe7f3adaa401897413f5e93401efdff 100644

--- a/tools/deep_memory_profiler/dmprof.py

+++ b/tools/deep_memory_profiler/dmprof.py

@@ -2,2988 +2,33 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

-"""The deep heap profiler script for Chrome."""

+"""The Deep Memory Profiler analyzer script.

+See http://dev.chromium.org/developers/deep-memory-profiler for details.

+"""

-import copy

-import cStringIO

-import datetime

-import json

import logging

-import optparse

-import os

-import re

-import struct

-import subprocess

import sys

-import tempfile

-import time

-import zipfile

-try:

- from collections import OrderedDict # pylint: disable=E0611

-except ImportError:

- # TODO(dmikurube): Remove this once Python 2.7 is required.

- BASE_PATH = os.path.dirname(os.path.abspath(__file__))

- SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party')

- sys.path.insert(0, SIMPLEJSON_PATH)

- from simplejson import OrderedDict

-from range_dict import ExclusiveRangeDict

-BASE_PATH = os.path.dirname(os.path.abspath(__file__))

-FIND_RUNTIME_SYMBOLS_PATH = os.path.join(

- BASE_PATH, os.pardir, 'find_runtime_symbols')

-sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

-import find_runtime_symbols

-import prepare_symbol_info

-import proc_maps

+from lib.exceptions import ParsingException

+import subcommands

-from find_runtime_symbols import FUNCTION_SYMBOLS

-from find_runtime_symbols import SOURCEFILE_SYMBOLS

-from find_runtime_symbols import TYPEINFO_SYMBOLS

-BUCKET_ID = 5

-VIRTUAL = 0

-COMMITTED = 1

-ALLOC_COUNT = 2

-FREE_COUNT = 3

-NULL_REGEX = re.compile('')

LOGGER = logging.getLogger('dmprof')

-POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')

-CHROME_SRC_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir)

-DEFAULT_SORTERS = [

- os.path.join(BASE_PATH, 'sorter.malloc-component.json'),

- os.path.join(BASE_PATH, 'sorter.malloc-type.json'),

- os.path.join(BASE_PATH, 'sorter.vm-map.json'),

- os.path.join(BASE_PATH, 'sorter.vm-sharing.json'),

- ]

-# Heap Profile Dump versions

-# DUMP_DEEP_[1-4] are obsolete.

-# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

-# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

-# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

-# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

-# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

-DUMP_DEEP_1 = 'DUMP_DEEP_1'

-DUMP_DEEP_2 = 'DUMP_DEEP_2'

-DUMP_DEEP_3 = 'DUMP_DEEP_3'

-DUMP_DEEP_4 = 'DUMP_DEEP_4'

-DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

-# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

-# malloc and mmap are identified in bucket files.

-# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

-DUMP_DEEP_5 = 'DUMP_DEEP_5'

-# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.

-DUMP_DEEP_6 = 'DUMP_DEEP_6'

-# Heap Profile Policy versions

-# POLICY_DEEP_1 DOES NOT include allocation_type columns.

-# mmap regions are distincted w/ mmap frames in the pattern column.

-POLICY_DEEP_1 = 'POLICY_DEEP_1'

-# POLICY_DEEP_2 DOES include allocation_type columns.

-# mmap regions are distincted w/ the allocation_type column.

-POLICY_DEEP_2 = 'POLICY_DEEP_2'

-# POLICY_DEEP_3 is in JSON format.

-POLICY_DEEP_3 = 'POLICY_DEEP_3'

-# POLICY_DEEP_3 contains typeinfo.

-POLICY_DEEP_4 = 'POLICY_DEEP_4'

-class EmptyDumpException(Exception):

- def __init__(self, value=''):

- super(EmptyDumpException, self).__init__()

- self.value = value

- def __str__(self):

- return repr(self.value)

-class ParsingException(Exception):

- def __init__(self, value=''):

- super(ParsingException, self).__init__()

- self.value = value

- def __str__(self):

- return repr(self.value)

-class InvalidDumpException(ParsingException):

- def __init__(self, value):

- super(InvalidDumpException, self).__init__()

- self.value = value

- def __str__(self):

- return "invalid heap profile dump: %s" % repr(self.value)

-class ObsoleteDumpVersionException(ParsingException):

- def __init__(self, value):

- super(ObsoleteDumpVersionException, self).__init__()

- self.value = value

- def __str__(self):

- return "obsolete heap profile dump version: %s" % repr(self.value)

-class ListAttribute(ExclusiveRangeDict.RangeAttribute):

- """Represents a list for an attribute in range_dict.ExclusiveRangeDict."""

- def __init__(self):

- super(ListAttribute, self).__init__()

- self._list = []

- def __str__(self):

- return str(self._list)

- def __repr__(self):

- return 'ListAttribute' + str(self._list)

- def __len__(self):

- return len(self._list)

- def __iter__(self):

- for x in self._list:

- yield x

- def __getitem__(self, index):

- return self._list[index]

- def __setitem__(self, index, value):

- if index >= len(self._list):

- self._list.extend([None] * (index + 1 - len(self._list)))

- self._list[index] = value

- def copy(self):

- new_list = ListAttribute()

- for index, item in enumerate(self._list):

- new_list[index] = copy.deepcopy(item)

- return new_list

-class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):

- """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""

- _DUMMY_ENTRY = proc_maps.ProcMapsEntry(

- 0, # begin

- 0, # end

- '-', # readable

- '-', # writable

- '-', # executable

- '-', # private

- 0, # offset

- '00', # major

- '00', # minor

- 0, # inode

- '' # name

- )

- def __init__(self):

- super(ProcMapsEntryAttribute, self).__init__()

- self._entry = self._DUMMY_ENTRY.as_dict()

- def __str__(self):

- return str(self._entry)

- def __repr__(self):

- return 'ProcMapsEntryAttribute' + str(self._entry)

- def __getitem__(self, key):

- return self._entry[key]

- def __setitem__(self, key, value):

- if key not in self._entry:

- raise KeyError(key)

- self._entry[key] = value

- def copy(self):

- new_entry = ProcMapsEntryAttribute()

- for key, value in self._entry.iteritems():

- new_entry[key] = copy.deepcopy(value)

- return new_entry

-def skip_while(index, max_index, skipping_condition):

- Returns:

- A pair of an integer indicating a line number after skipped, and a

- boolean value which is True if found a line which skipping_condition

- is False for.

- """

- while skipping_condition(index):

- index += 1

- if index >= max_index:

- return index, False

- return index, True

-class SymbolDataSources(object):

- """Manages symbol data sources in a process.

- The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and

- so on. They are collected into a directory '|prefix|.symmap' from the binary

- files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.

- Binaries are not mandatory to profile. The prepared data sources work in

- place of the binary even if the binary has been overwritten with another

- binary.

- Note that loading the symbol data sources takes a long time. They are often

- very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'

- which caches actually used symbols.

- """

- def __init__(self, prefix, alternative_dirs=None):

- self._prefix = prefix

- self._prepared_symbol_data_sources_path = None

- self._loaded_symbol_data_sources = None

- self._alternative_dirs = alternative_dirs or {}

- def prepare(self):

- """Prepares symbol data sources by extracting mapping from a binary.

- The prepared symbol data sources are stored in a directory. The directory

- name is stored in |self._prepared_symbol_data_sources_path|.

- Returns:

- True if succeeded.

- """

- LOGGER.info('Preparing symbol mapping...')

- self._prepared_symbol_data_sources_path, used_tempdir = (

- prepare_symbol_info.prepare_symbol_info(

- self._prefix + '.maps',

- output_dir_path=self._prefix + '.symmap',

- alternative_dirs=self._alternative_dirs,

- use_tempdir=True,

- use_source_file_name=True))

- if self._prepared_symbol_data_sources_path:

- LOGGER.info(' Prepared symbol mapping.')

- if used_tempdir:

- LOGGER.warn(' Using a temporary directory for symbol mapping.')

- LOGGER.warn(' Delete it by yourself.')

- LOGGER.warn(' Or, move the directory by yourself to use it later.')

- return True

- else:

- LOGGER.warn(' Failed to prepare symbol mapping.')

- return False

- def get(self):

- """Returns the prepared symbol data sources.

- Returns:

- The prepared symbol data sources. None if failed.

- """

- if not self._prepared_symbol_data_sources_path and not self.prepare():

- return None

- if not self._loaded_symbol_data_sources:

- LOGGER.info('Loading symbol mapping...')

- self._loaded_symbol_data_sources = (

- find_runtime_symbols.RuntimeSymbolsInProcess.load(

- self._prepared_symbol_data_sources_path))

- return self._loaded_symbol_data_sources

- def path(self):

- """Returns the path of the prepared symbol data sources if possible."""

- if not self._prepared_symbol_data_sources_path and not self.prepare():

- return None

- return self._prepared_symbol_data_sources_path

-class SymbolFinder(object):

- """Finds corresponding symbols from addresses.

- This class does only 'find()' symbols from a specified |address_list|.

- It is introduced to make a finder mockable.

- """

- def __init__(self, symbol_type, symbol_data_sources):

- self._symbol_type = symbol_type

- self._symbol_data_sources = symbol_data_sources

- def find(self, address_list):

- return find_runtime_symbols.find_runtime_symbols(

- self._symbol_type, self._symbol_data_sources.get(), address_list)

-class SymbolMappingCache(object):

- """Caches mapping from actually used addresses to symbols.

- 'update()' updates the cache from the original symbol data sources via

- 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.

- """

- def __init__(self):

- self._symbol_mapping_caches = {

- FUNCTION_SYMBOLS: {},

- SOURCEFILE_SYMBOLS: {},

- TYPEINFO_SYMBOLS: {},

- }

- def update(self, symbol_type, bucket_set, symbol_finder, cache_f):

- """Updates symbol mapping cache on memory and in a symbol cache file.

- It reads cached symbol mapping from a symbol cache file |cache_f| if it

- exists. Unresolved addresses are then resolved and added to the cache

- both on memory and in the symbol cache file with using 'SymbolFinder'.

- A cache file is formatted as follows:

- <Address> <Symbol>

- ...

- Args:

- symbol_type: A type of symbols to update. It should be one of

- FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.

- bucket_set: A BucketSet object.

- symbol_finder: A SymbolFinder object to find symbols.

- cache_f: A readable and writable IO object of the symbol cache file.

- """

- cache_f.seek(0, os.SEEK_SET)

- self._load(cache_f, symbol_type)

- unresolved_addresses = sorted(

- address for address in bucket_set.iter_addresses(symbol_type)

- if address not in self._symbol_mapping_caches[symbol_type])

- if not unresolved_addresses:

- LOGGER.info('No need to resolve any more addresses.')

- return

- cache_f.seek(0, os.SEEK_END)

- LOGGER.info('Loading %d unresolved addresses.' %

- len(unresolved_addresses))

- symbol_dict = symbol_finder.find(unresolved_addresses)

- for address, symbol in symbol_dict.iteritems():

- stripped_symbol = symbol.strip() or '?'

- self._symbol_mapping_caches[symbol_type][address] = stripped_symbol

- cache_f.write('%x %s\n' % (address, stripped_symbol))

- def lookup(self, symbol_type, address):

- """Looks up a symbol for a given |address|.

- Args:

- symbol_type: A type of symbols to update. It should be one of

- FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.

- address: An integer that represents an address.

- Returns:

- A string that represents a symbol.

- """

- return self._symbol_mapping_caches[symbol_type].get(address)

- def _load(self, cache_f, symbol_type):

- try:

- for line in cache_f:

- items = line.rstrip().split(None, 1)

- if len(items) == 1:

- items.append('??')

- self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]

- LOGGER.info('Loaded %d entries from symbol cache.' %

- len(self._symbol_mapping_caches[symbol_type]))

- except IOError as e:

- LOGGER.info('The symbol cache file is invalid: %s' % e)

-class Rule(object):

- """Represents one matching rule in a policy file."""

- def __init__(self,

- name,

- allocator_type,

- stackfunction_pattern=None,

- stacksourcefile_pattern=None,

- typeinfo_pattern=None,

- mappedpathname_pattern=None,

- mappedpermission_pattern=None,

- sharedwith=None):

- self._name = name

- self._allocator_type = allocator_type

- self._stackfunction_pattern = None

- if stackfunction_pattern:

- self._stackfunction_pattern = re.compile(

- stackfunction_pattern + r'\Z')

- self._stacksourcefile_pattern = None

- if stacksourcefile_pattern:

- self._stacksourcefile_pattern = re.compile(

- stacksourcefile_pattern + r'\Z')

- self._typeinfo_pattern = None

- if typeinfo_pattern:

- self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')

- self._mappedpathname_pattern = None

- if mappedpathname_pattern:

- self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')

- self._mappedpermission_pattern = None

- if mappedpermission_pattern:

- self._mappedpermission_pattern = re.compile(

- mappedpermission_pattern + r'\Z')

- self._sharedwith = []

- if sharedwith:

- self._sharedwith = sharedwith

- @property

- def name(self):

- return self._name

- @property

- def allocator_type(self):

- return self._allocator_type

- @property

- def stackfunction_pattern(self):

- return self._stackfunction_pattern

- @property

- def stacksourcefile_pattern(self):

- return self._stacksourcefile_pattern

- @property

- def typeinfo_pattern(self):

- return self._typeinfo_pattern

- @property

- def mappedpathname_pattern(self):

- return self._mappedpathname_pattern

- @property

- def mappedpermission_pattern(self):

- return self._mappedpermission_pattern

- @property

- def sharedwith(self):

- return self._sharedwith

-class Policy(object):

- """Represents a policy, a content of a policy file."""

- def __init__(self, rules, version, components):

- self._rules = rules

- self._version = version

- self._components = components

- @property

- def rules(self):

- return self._rules

- @property

- def version(self):

- return self._version

- @property

- def components(self):

- return self._components

- def find_rule(self, component_name):

- """Finds a rule whose name is |component_name|. """

- for rule in self._rules:

- if rule.name == component_name:

- return rule

- return None

- def find_malloc(self, bucket):

- """Finds a matching component name which a given |bucket| belongs to.

- Args:

- bucket: A Bucket object to be searched for.

- Returns:

- A string representing a component name.

- """

- assert not bucket or bucket.allocator_type == 'malloc'

- if not bucket:

- return 'no-bucket'

- if bucket.component_cache:

- return bucket.component_cache

- stackfunction = bucket.symbolized_joined_stackfunction

- stacksourcefile = bucket.symbolized_joined_stacksourcefile

- typeinfo = bucket.symbolized_typeinfo

- if typeinfo.startswith('0x'):

- typeinfo = bucket.typeinfo_name

- for rule in self._rules:

- if (rule.allocator_type == 'malloc' and

- (not rule.stackfunction_pattern or

- rule.stackfunction_pattern.match(stackfunction)) and

- (not rule.stacksourcefile_pattern or

- rule.stacksourcefile_pattern.match(stacksourcefile)) and

- (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):

- bucket.component_cache = rule.name

- return rule.name

- assert False

- def find_mmap(self, region, bucket_set,

- pageframe=None, group_pfn_counts=None):

- """Finds a matching component which a given mmap |region| belongs to.

- It uses |bucket_set| to match with backtraces. If |pageframe| is given,

- it considers memory sharing among processes.

- NOTE: Don't use Bucket's |component_cache| for mmap regions because they're

- classified not only with bucket information (mappedpathname for example).

- Args:

- region: A tuple representing a memory region.

- bucket_set: A BucketSet object to look up backtraces.

- pageframe: A PageFrame object representing a pageframe maybe including

- a pagecount.

- group_pfn_counts: A dict mapping a PFN to the number of times the

- the pageframe is mapped by the known "group (Chrome)" processes.

- Returns:

- A string representing a component name.

- """

- assert region[0] == 'hooked'

- bucket = bucket_set.get(region[1]['bucket_id'])

- assert not bucket or bucket.allocator_type == 'mmap'

- if not bucket:

- return 'no-bucket', None

- stackfunction = bucket.symbolized_joined_stackfunction

- stacksourcefile = bucket.symbolized_joined_stacksourcefile

- sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)

- for rule in self._rules:

- if (rule.allocator_type == 'mmap' and

- (not rule.stackfunction_pattern or

- rule.stackfunction_pattern.match(stackfunction)) and

- (not rule.stacksourcefile_pattern or

- rule.stacksourcefile_pattern.match(stacksourcefile)) and

- (not rule.mappedpathname_pattern or

- rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and

- (not rule.mappedpermission_pattern or

- rule.mappedpermission_pattern.match(

- region[1]['vma']['readable'] +

- region[1]['vma']['writable'] +

- region[1]['vma']['executable'] +

- region[1]['vma']['private'])) and

- (not rule.sharedwith or

- not pageframe or sharedwith in rule.sharedwith)):

- return rule.name, bucket

- assert False

- def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):

- """Finds a matching component which a given unhooked |region| belongs to.

- If |pageframe| is given, it considers memory sharing among processes.

- Args:

- region: A tuple representing a memory region.

- pageframe: A PageFrame object representing a pageframe maybe including

- a pagecount.

- group_pfn_counts: A dict mapping a PFN to the number of times the

- the pageframe is mapped by the known "group (Chrome)" processes.

- Returns:

- A string representing a component name.

- """

- assert region[0] == 'unhooked'

- sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)

- for rule in self._rules:

- if (rule.allocator_type == 'unhooked' and

- (not rule.mappedpathname_pattern or

- rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and

- (not rule.mappedpermission_pattern or

- rule.mappedpermission_pattern.match(

- region[1]['vma']['readable'] +

- region[1]['vma']['writable'] +

- region[1]['vma']['executable'] +

- region[1]['vma']['private'])) and

- (not rule.sharedwith or

- not pageframe or sharedwith in rule.sharedwith)):

- return rule.name

- assert False

- @staticmethod

- def load(filename, filetype):

- """Loads a policy file of |filename| in a |format|.

- Args:

- filename: A filename to be loaded.

- filetype: A string to specify a type of the file. Only 'json' is

- supported for now.

- Returns:

- A loaded Policy object.

- """

- with open(os.path.join(BASE_PATH, filename)) as policy_f:

- return Policy.parse(policy_f, filetype)

- @staticmethod

- def parse(policy_f, filetype):

- """Parses a policy file content in a |format|.

- Args:

- policy_f: An IO object to be loaded.

- filetype: A string to specify a type of the file. Only 'json' is

- supported for now.

- Returns:

- A loaded Policy object.

- """

- if filetype == 'json':

- return Policy._parse_json(policy_f)

- else:

- return None

- @staticmethod

- def _parse_json(policy_f):

- """Parses policy file in json format.

- A policy file contains component's names and their stacktrace pattern

- written in regular expression. Those patterns are matched against each

- symbols of each stacktraces in the order written in the policy file

- Args:

- policy_f: A File/IO object to read.

- Returns:

- A loaded policy object.

- """

- policy = json.load(policy_f)

- rules = []

- for rule in policy['rules']:

- stackfunction = rule.get('stackfunction') or rule.get('stacktrace')

- stacksourcefile = rule.get('stacksourcefile')

- rules.append(Rule(

- rule['name'],

- rule['allocator'], # allocator_type

- stackfunction,

- stacksourcefile,

- rule['typeinfo'] if 'typeinfo' in rule else None,

- rule.get('mappedpathname'),

- rule.get('mappedpermission'),

- rule.get('sharedwith')))

- return Policy(rules, policy['version'], policy['components'])

- @staticmethod

- def _categorize_pageframe(pageframe, group_pfn_counts):

- """Categorizes a pageframe based on its sharing status.

- Returns:

- 'private' if |pageframe| is not shared with other processes. 'group'

- if |pageframe| is shared only with group (Chrome-related) processes.

- 'others' if |pageframe| is shared with non-group processes.

- """

- if not pageframe:

- return 'private'

- if pageframe.pagecount:

- if pageframe.pagecount == 1:

- return 'private'

- elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:

- return 'group'

- else:

- return 'others'

- else:

- if pageframe.pfn in group_pfn_counts:

- return 'group'

- else:

- return 'private'

-class PolicySet(object):

- """Represents a set of policies."""

- def __init__(self, policy_directory):

- self._policy_directory = policy_directory

- @staticmethod

- def load(labels=None):

- """Loads a set of policies via the "default policy directory".

- The "default policy directory" contains pairs of policies and their labels.

- For example, a policy "policy.l0.json" is labeled "l0" in the default

- policy directory "policies.json".

- All policies in the directory are loaded by default. Policies can be

- limited by |labels|.

- Args:

- labels: An array that contains policy labels to be loaded.

- Returns:

- A PolicySet object.

- """

- default_policy_directory = PolicySet._load_default_policy_directory()

- if labels:

- specified_policy_directory = {}

- for label in labels:

- if label in default_policy_directory:

- specified_policy_directory[label] = default_policy_directory[label]

- # TODO(dmikurube): Load an un-labeled policy file.

- return PolicySet._load_policies(specified_policy_directory)

- else:

- return PolicySet._load_policies(default_policy_directory)

- def __len__(self):

- return len(self._policy_directory)

- def __iter__(self):

- for label in self._policy_directory:

- yield label

- def __getitem__(self, label):

- return self._policy_directory[label]

- @staticmethod

- def _load_default_policy_directory():

- with open(POLICIES_JSON_PATH, mode='r') as policies_f:

- default_policy_directory = json.load(policies_f)

- return default_policy_directory

- @staticmethod

- def _load_policies(directory):

- LOGGER.info('Loading policy files.')

- policies = {}

- for label in directory:

- LOGGER.info(' %s: %s' % (label, directory[label]['file']))

- loaded = Policy.load(directory[label]['file'], directory[label]['format'])

- if loaded:

- policies[label] = loaded

- return PolicySet(policies)

-class Bucket(object):

- """Represents a bucket, which is a unit of memory block classification."""

- def __init__(self, stacktrace, allocator_type, typeinfo, typeinfo_name):

- self._stacktrace = stacktrace

- self._allocator_type = allocator_type

- self._typeinfo = typeinfo

- self._typeinfo_name = typeinfo_name

- self._symbolized_stackfunction = stacktrace

- self._symbolized_joined_stackfunction = ''

- self._symbolized_stacksourcefile = stacktrace

- self._symbolized_joined_stacksourcefile = ''

- self._symbolized_typeinfo = typeinfo_name

- self.component_cache = ''

- def __str__(self):

- result = []

- result.append(self._allocator_type)

- if self._symbolized_typeinfo == 'no typeinfo':

- result.append('tno_typeinfo')

- else:

- result.append('t' + self._symbolized_typeinfo)

- result.append('n' + self._typeinfo_name)

- result.extend(['%s(@%s)' % (function, sourcefile)

- for function, sourcefile

- in zip(self._symbolized_stackfunction,

- self._symbolized_stacksourcefile)])

- return ' '.join(result)

- def symbolize(self, symbol_mapping_cache):

- """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|.

- Args:

- symbol_mapping_cache: A SymbolMappingCache object.

- """

- # TODO(dmikurube): Fill explicitly with numbers if symbol not found.

- self._symbolized_stackfunction = [

- symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address)

- for address in self._stacktrace]

- self._symbolized_joined_stackfunction = ' '.join(

- self._symbolized_stackfunction)

- self._symbolized_stacksourcefile = [

- symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address)

- for address in self._stacktrace]

- self._symbolized_joined_stacksourcefile = ' '.join(

- self._symbolized_stacksourcefile)

- if not self._typeinfo:

- self._symbolized_typeinfo = 'no typeinfo'

- else:

- self._symbolized_typeinfo = symbol_mapping_cache.lookup(

- TYPEINFO_SYMBOLS, self._typeinfo)

- if not self._symbolized_typeinfo:

- self._symbolized_typeinfo = 'no typeinfo'

- def clear_component_cache(self):

- self.component_cache = ''

- @property

- def stacktrace(self):

- return self._stacktrace

- @property

- def allocator_type(self):

- return self._allocator_type

- @property

- def typeinfo(self):

- return self._typeinfo

- @property

- def typeinfo_name(self):

- return self._typeinfo_name

- @property

- def symbolized_stackfunction(self):

- return self._symbolized_stackfunction

- @property

- def symbolized_joined_stackfunction(self):

- return self._symbolized_joined_stackfunction

- @property

- def symbolized_stacksourcefile(self):

- return self._symbolized_stacksourcefile

- @property

- def symbolized_joined_stacksourcefile(self):

- return self._symbolized_joined_stacksourcefile

- @property

- def symbolized_typeinfo(self):

- return self._symbolized_typeinfo

-class BucketSet(object):

- """Represents a set of bucket."""

- def __init__(self):

- self._buckets = {}

- self._code_addresses = set()

- self._typeinfo_addresses = set()

- def load(self, prefix):

- """Loads all related bucket files.

- Args:

- prefix: A prefix string for bucket file names.

- """

- LOGGER.info('Loading bucket files.')

- n = 0

- skipped = 0

- while True:

- path = '%s.%04d.buckets' % (prefix, n)

- if not os.path.exists(path) or not os.stat(path).st_size:

- if skipped > 10:

- break

- n += 1

- skipped += 1

- continue

- LOGGER.info(' %s' % path)

- with open(path, 'r') as f:

- self._load_file(f)

- n += 1

- skipped = 0

- def _load_file(self, bucket_f):

- for line in bucket_f:

- words = line.split()

- typeinfo = None

- typeinfo_name = ''

- stacktrace_begin = 2

- for index, word in enumerate(words):

- if index < 2:

- continue

- if word[0] == 't':

- typeinfo = int(word[1:], 16)

- self._typeinfo_addresses.add(typeinfo)

- elif word[0] == 'n':

- typeinfo_name = word[1:]

- else:

- stacktrace_begin = index

- break

- stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]

- for frame in stacktrace:

- self._code_addresses.add(frame)

- self._buckets[int(words[0])] = Bucket(

- stacktrace, words[1], typeinfo, typeinfo_name)

- def __iter__(self):

- for bucket_id, bucket_content in self._buckets.iteritems():

- yield bucket_id, bucket_content

- def __getitem__(self, bucket_id):

- return self._buckets[bucket_id]

- def get(self, bucket_id):

- return self._buckets.get(bucket_id)

- def symbolize(self, symbol_mapping_cache):

- for bucket_content in self._buckets.itervalues():

- bucket_content.symbolize(symbol_mapping_cache)

- def clear_component_cache(self):

- for bucket_content in self._buckets.itervalues():

- bucket_content.clear_component_cache()

- def iter_addresses(self, symbol_type):

- if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]:

- for function in self._code_addresses:

- yield function

- else:

- for function in self._typeinfo_addresses:

- yield function

-class PageFrame(object):

- """Represents a pageframe and maybe its shared count."""

- def __init__(self, pfn, size, pagecount, start_truncated, end_truncated):

- self._pfn = pfn

- self._size = size

- self._pagecount = pagecount

- self._start_truncated = start_truncated

- self._end_truncated = end_truncated

- def __str__(self):

- result = str()

- if self._start_truncated:

- result += '<'

- result += '%06x#%d' % (self._pfn, self._pagecount)

- if self._end_truncated:

- result += '>'

- return result

- def __repr__(self):

- return str(self)

- @staticmethod

- def parse(encoded_pfn, size):

- start = 0

- end = len(encoded_pfn)

- end_truncated = False

- if encoded_pfn.endswith('>'):

- end = len(encoded_pfn) - 1

- end_truncated = True

- pagecount_found = encoded_pfn.find('#')

- pagecount = None

- if pagecount_found >= 0:

- encoded_pagecount = 'AAA' + encoded_pfn[pagecount_found+1 : end]

- pagecount = struct.unpack(

- '>I', '\x00' + encoded_pagecount.decode('base64'))[0]

- end = pagecount_found

- start_truncated = False

- if encoded_pfn.startswith('<'):

- start = 1

- start_truncated = True

- pfn = struct.unpack(

- '>I', '\x00' + (encoded_pfn[start:end]).decode('base64'))[0]

- return PageFrame(pfn, size, pagecount, start_truncated, end_truncated)

- @property

- def pfn(self):

- return self._pfn

- @property

- def size(self):

- return self._size

- def set_size(self, size):

- self._size = size

- @property

- def pagecount(self):

- return self._pagecount

- @property

- def start_truncated(self):

- return self._start_truncated

- @property

- def end_truncated(self):

- return self._end_truncated

-class PFNCounts(object):

- """Represents counts of PFNs in a process."""

- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

- def __init__(self, path, modified_time):

- matched = self._PATH_PATTERN.match(path)

- if matched:

- self._pid = int(matched.group(2))

- else:

- self._pid = 0

- self._command_line = ''

- self._pagesize = 4096

- self._path = path

- self._pfn_meta = ''

- self._pfnset = {}

- self._reason = ''

- self._time = modified_time

- @staticmethod

- def load(path, log_header='Loading PFNs from a heap profile dump: '):

- pfnset = PFNCounts(path, float(os.stat(path).st_mtime))

- LOGGER.info('%s%s' % (log_header, path))

- with open(path, 'r') as pfnset_f:

- pfnset.load_file(pfnset_f)

- return pfnset

- @property

- def path(self):

- return self._path

- @property

- def pid(self):

- return self._pid

- @property

- def time(self):

- return self._time

- @property

- def reason(self):

- return self._reason

- @property

- def iter_pfn(self):

- for pfn, count in self._pfnset.iteritems():

- yield pfn, count

- def load_file(self, pfnset_f):

- prev_pfn_end_truncated = None

- for line in pfnset_f:

- line = line.strip()

- if line.startswith('GLOBAL_STATS:') or line.startswith('STACKTRACES:'):

- break

- elif line.startswith('PF: '):

- for encoded_pfn in line[3:].split():

- page_frame = PageFrame.parse(encoded_pfn, self._pagesize)

- if page_frame.start_truncated and (

- not prev_pfn_end_truncated or

- prev_pfn_end_truncated != page_frame.pfn):

- LOGGER.error('Broken page frame number: %s.' % encoded_pfn)

- self._pfnset[page_frame.pfn] = self._pfnset.get(page_frame.pfn, 0) + 1

- if page_frame.end_truncated:

- prev_pfn_end_truncated = page_frame.pfn

- else:

- prev_pfn_end_truncated = None

- elif line.startswith('PageSize: '):

- self._pagesize = int(line[10:])

- elif line.startswith('PFN: '):

- self._pfn_meta = line[5:]

- elif line.startswith('PageFrame: '):

- self._pfn_meta = line[11:]

- elif line.startswith('Time: '):

- self._time = float(line[6:])

- elif line.startswith('CommandLine: '):

- self._command_line = line[13:]

- elif line.startswith('Reason: '):

- self._reason = line[8:]

-class Dump(object):

- """Represents a heap profile dump."""

- _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

- _HOOK_PATTERN = re.compile(

- r'^ ([ $])([a-f0-9]+)([ $])-([ $])([a-f0-9]+)([ $])\s+'

- r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)

- _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

- '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')

- _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

- '(?P<RESERVED>[0-9]+)')

- _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')

- _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')

- _TIME_PATTERN_FORMAT = re.compile(

- r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')

- _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')

- def __init__(self, path, modified_time):

- self._path = path

- matched = self._PATH_PATTERN.match(path)

- self._pid = int(matched.group(2))

- self._count = int(matched.group(3))

- self._time = modified_time

- self._map = {}

- self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)

- self._stacktrace_lines = []

- self._global_stats = {} # used only in apply_policy

- self._run_id = ''

- self._pagesize = 4096

- self._pageframe_length = 0

- self._pageframe_encoding = ''

- self._has_pagecount = False

- self._version = ''

- self._lines = []

- @property

- def path(self):

- return self._path

- @property

- def count(self):

- return self._count

- @property

- def time(self):

- return self._time

- @property

- def iter_map(self):

- for region in sorted(self._map.iteritems()):

- yield region[0], region[1]

- def iter_procmaps(self):

- for begin, end, attr in self._map.iter_range():

- yield begin, end, attr

- @property

- def iter_stacktrace(self):

- for line in self._stacktrace_lines:

- yield line

- def global_stat(self, name):

- return self._global_stats[name]

- @property

- def run_id(self):

- return self._run_id

- @property

- def pagesize(self):

- return self._pagesize

- @property

- def pageframe_length(self):

- return self._pageframe_length

- @property

- def pageframe_encoding(self):

- return self._pageframe_encoding

- @property

- def has_pagecount(self):

- return self._has_pagecount

- @staticmethod

- def load(path, log_header='Loading a heap profile dump: '):

- """Loads a heap profile dump.

- Args:

- path: A file path string to load.

- log_header: A preceding string for log messages.

- Returns:

- A loaded Dump object.

- Raises:

- ParsingException for invalid heap profile dumps.

- """

- dump = Dump(path, os.stat(path).st_mtime)

- with open(path, 'r') as f:

- dump.load_file(f, log_header)

- return dump

- def load_file(self, f, log_header):

- self._lines = [line for line in f

- if line and not line.startswith('#')]

- try:

- self._version, ln = self._parse_version()

- self._parse_meta_information()

- if self._version == DUMP_DEEP_6:

- self._parse_mmap_list()

- self._parse_global_stats()

- self._extract_stacktrace_lines(ln)

- except EmptyDumpException:

- LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))

- except ParsingException, e:

- LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))

- raise

- else:

- LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))

- def _parse_version(self):

- """Parses a version string in self._lines.

- Returns:

- A pair of (a string representing a version of the stacktrace dump,

- and an integer indicating a line number next to the version string).

- Raises:

- ParsingException for invalid dump versions.

- """

- version = ''

- # Skip until an identifiable line.

- headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

- if not self._lines:

- raise EmptyDumpException('Empty heap dump file.')

- (ln, found) = skip_while(

- 0, len(self._lines),

- lambda n: not self._lines[n].startswith(headers))

- if not found:

- raise InvalidDumpException('No version header.')

- # Identify a version.

- if self._lines[ln].startswith('heap profile: '):

- version = self._lines[ln][13:].strip()

- if version in (DUMP_DEEP_5, DUMP_DEEP_6):

- (ln, _) = skip_while(

- ln, len(self._lines),

- lambda n: self._lines[n] != 'STACKTRACES:\n')

- elif version in DUMP_DEEP_OBSOLETE:

- raise ObsoleteDumpVersionException(version)

- else:

- raise InvalidDumpException('Invalid version: %s' % version)

- elif self._lines[ln] == 'STACKTRACES:\n':

- raise ObsoleteDumpVersionException(DUMP_DEEP_1)

- elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

- raise ObsoleteDumpVersionException(DUMP_DEEP_2)

- return (version, ln)

- def _parse_global_stats(self):

- """Parses lines in self._lines as global stats."""

- (ln, _) = skip_while(

- 0, len(self._lines),

- lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

- global_stat_names = [

- 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',

- 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',

- 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

- 'nonprofiled-stack', 'nonprofiled-other',

- 'profiled-mmap', 'profiled-malloc']

- for prefix in global_stat_names:

- (ln, _) = skip_while(

- ln, len(self._lines),

- lambda n: self._lines[n].split()[0] != prefix)

- words = self._lines[ln].split()

- self._global_stats[prefix + '_virtual'] = int(words[-2])

- self._global_stats[prefix + '_committed'] = int(words[-1])

- def _parse_meta_information(self):

- """Parses lines in self._lines for meta information."""

- (ln, found) = skip_while(

- 0, len(self._lines),

- lambda n: self._lines[n] != 'META:\n')

- if not found:

- return

- ln += 1

- while True:

- if self._lines[ln].startswith('Time:'):

- matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])

- matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])

- if matched_format:

- self._time = time.mktime(datetime.datetime.strptime(

- matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())

- if matched_format.group(2):

- self._time += float(matched_format.group(2)[1:]) / 1000.0

- elif matched_seconds:

- self._time = float(matched_seconds.group(1))

- elif self._lines[ln].startswith('Reason:'):

- pass # Nothing to do for 'Reason:'

- elif self._lines[ln].startswith('PageSize: '):

- self._pagesize = int(self._lines[ln][10:])

- elif self._lines[ln].startswith('CommandLine:'):

- pass

- elif (self._lines[ln].startswith('PageFrame: ') or

- self._lines[ln].startswith('PFN: ')):

- if self._lines[ln].startswith('PageFrame: '):

- words = self._lines[ln][11:].split(',')

- else:

- words = self._lines[ln][5:].split(',')

- for word in words:

- if word == '24':

- self._pageframe_length = 24

- elif word == 'Base64':

- self._pageframe_encoding = 'base64'

- elif word == 'PageCount':

- self._has_pagecount = True

- elif self._lines[ln].startswith('RunID: '):

- self._run_id = self._lines[ln][7:].strip()

- elif (self._lines[ln].startswith('MMAP_LIST:') or

- self._lines[ln].startswith('GLOBAL_STATS:')):

- # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.

- break

- else:

- pass

- ln += 1

- def _parse_mmap_list(self):

- """Parses lines in self._lines as a mmap list."""

- (ln, found) = skip_while(

- 0, len(self._lines),

- lambda n: self._lines[n] != 'MMAP_LIST:\n')

- if not found:

- return {}

- ln += 1

- self._map = {}

- current_vma = {}

- pageframe_list = []

- while True:

- entry = proc_maps.ProcMaps.parse_line(self._lines[ln])

- if entry:

- current_vma = {}

- for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):

- for key, value in entry.as_dict().iteritems():

- attr[key] = value

- current_vma[key] = value

- ln += 1

- continue

- if self._lines[ln].startswith(' PF: '):

- for pageframe in self._lines[ln][5:].split():

- pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))

- ln += 1

- continue

- matched = self._HOOK_PATTERN.match(self._lines[ln])

- if not matched:

- break

- # 2: starting address

- # 5: end address

- # 7: hooked or unhooked

- # 8: additional information

- if matched.group(7) == 'hooked':

- submatched = self._HOOKED_PATTERN.match(matched.group(8))

- if not submatched:

- submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))

- elif matched.group(7) == 'unhooked':

- submatched = self._UNHOOKED_PATTERN.match(matched.group(8))

- if not submatched:

- submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))

- else:

- assert matched.group(7) in ['hooked', 'unhooked']

- submatched_dict = submatched.groupdict()

- region_info = { 'vma': current_vma }

- if submatched_dict.get('TYPE'):

- region_info['type'] = submatched_dict['TYPE'].strip()

- if submatched_dict.get('COMMITTED'):

- region_info['committed'] = int(submatched_dict['COMMITTED'])

- if submatched_dict.get('RESERVED'):

- region_info['reserved'] = int(submatched_dict['RESERVED'])

- if submatched_dict.get('BUCKETID'):

- region_info['bucket_id'] = int(submatched_dict['BUCKETID'])

- if matched.group(1) == '(':

- start = current_vma['begin']

- else:

- start = int(matched.group(2), 16)

- if matched.group(4) == '(':

- end = current_vma['end']

- else:

- end = int(matched.group(5), 16)

- if pageframe_list and pageframe_list[0].start_truncated:

- pageframe_list[0].set_size(

- pageframe_list[0].size - start % self._pagesize)

- if pageframe_list and pageframe_list[-1].end_truncated:

- pageframe_list[-1].set_size(

- pageframe_list[-1].size - (self._pagesize - end % self._pagesize))

- region_info['pageframe'] = pageframe_list

- pageframe_list = []

- self._map[(start, end)] = (matched.group(7), region_info)

- ln += 1

- def _extract_stacktrace_lines(self, line_number):

- """Extracts the position of stacktrace lines.

- Valid stacktrace lines are stored into self._stacktrace_lines.

- Args:

- line_number: A line number to start parsing in lines.

- Raises:

- ParsingException for invalid dump versions.

- """

- if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):

- (line_number, _) = skip_while(

- line_number, len(self._lines),

- lambda n: not self._lines[n].split()[0].isdigit())

- stacktrace_start = line_number

- (line_number, _) = skip_while(

- line_number, len(self._lines),

- lambda n: self._check_stacktrace_line(self._lines[n]))

- self._stacktrace_lines = self._lines[stacktrace_start:line_number]

- elif self._version in DUMP_DEEP_OBSOLETE:

- raise ObsoleteDumpVersionException(self._version)

- else:

- raise InvalidDumpException('Invalid version: %s' % self._version)

- @staticmethod

- def _check_stacktrace_line(stacktrace_line):

- """Checks if a given stacktrace_line is valid as stacktrace.

- Args:

- stacktrace_line: A string to be checked.

- Returns:

- True if the given stacktrace_line is valid.

- """

- words = stacktrace_line.split()

- if len(words) < BUCKET_ID + 1:

- return False

- if words[BUCKET_ID - 1] != '@':

- return False

- return True

-class DumpList(object):

- """Represents a sequence of heap profile dumps."""

- def __init__(self, dump_list):

- self._dump_list = dump_list

- @staticmethod

- def load(path_list):

- LOGGER.info('Loading heap dump profiles.')

- dump_list = []

- for path in path_list:

- dump_list.append(Dump.load(path, ' '))

- return DumpList(dump_list)

- def __len__(self):

- return len(self._dump_list)

- def __iter__(self):

- for dump in self._dump_list:

- yield dump

- def __getitem__(self, index):

- return self._dump_list[index]

-class Unit(object):

- """Represents a minimum unit of memory usage categorization.

- It is supposed to be inherited for some different spaces like the entire

- virtual memory and malloc arena. Such different spaces are called "worlds"

- in dmprof. (For example, the "vm" world and the "malloc" world.)

- """

- def __init__(self, unit_id, size):

- self._unit_id = unit_id

- self._size = size

- @property

- def unit_id(self):

- return self._unit_id

- @property

- def size(self):

- return self._size

-class VMUnit(Unit):

- """Represents a Unit for a memory region on virtual memory."""

- def __init__(self, unit_id, committed, reserved, mmap, region,

- pageframe=None, group_pfn_counts=None):

- super(VMUnit, self).__init__(unit_id, committed)

- self._reserved = reserved

- self._mmap = mmap

- self._region = region

- self._pageframe = pageframe

- self._group_pfn_counts = group_pfn_counts

- @property

- def committed(self):

- return self._size

- @property

- def reserved(self):

- return self._reserved

- @property

- def mmap(self):

- return self._mmap

- @property

- def region(self):

- return self._region

- @property

- def pageframe(self):

- return self._pageframe

- @property

- def group_pfn_counts(self):

- return self._group_pfn_counts

-class MMapUnit(VMUnit):

- """Represents a Unit for a mmap'ed region."""

- def __init__(self, unit_id, committed, reserved, region, bucket_set,

- pageframe=None, group_pfn_counts=None):

- super(MMapUnit, self).__init__(unit_id, committed, reserved, True,

- region, pageframe, group_pfn_counts)

- self._bucket_set = bucket_set

- def __repr__(self):

- return str(self.region)

- @property

- def bucket_set(self):

- return self._bucket_set

-class UnhookedUnit(VMUnit):

- """Represents a Unit for a non-mmap'ed memory region on virtual memory."""

- def __init__(self, unit_id, committed, reserved, region,

- pageframe=None, group_pfn_counts=None):

- super(UnhookedUnit, self).__init__(unit_id, committed, reserved, False,

- region, pageframe, group_pfn_counts)

- def __repr__(self):

- return str(self.region)

-class MallocUnit(Unit):

- """Represents a Unit for a malloc'ed memory block."""

- def __init__(self, unit_id, size, alloc_count, free_count, bucket):

- super(MallocUnit, self).__init__(unit_id, size)

- self._bucket = bucket

- self._alloc_count = alloc_count

- self._free_count = free_count

- def __repr__(self):

- return str(self.bucket)

- @property

- def bucket(self):

- return self._bucket

- @property

- def alloc_count(self):

- return self._alloc_count

- @property

- def free_count(self):

- return self._free_count

-class UnitSet(object):

- """Represents an iterable set of Units."""

- def __init__(self, world):

- self._units = {}

- self._world = world

- def __repr__(self):

- return str(self._units)

- def __iter__(self):

- for unit_id in sorted(self._units):

- yield self._units[unit_id]

- def append(self, unit, overwrite=False):

- if not overwrite and unit.unit_id in self._units:

- LOGGER.error('The unit id=%s already exists.' % str(unit.unit_id))

- self._units[unit.unit_id] = unit

-class AbstractRule(object):

- """An abstract class for rules to be matched with units."""

- def __init__(self, dct):

- self._name = dct['name']

- self._hidden = dct.get('hidden', False)

- self._subworlds = dct.get('subworlds', [])

- def match(self, unit):

- raise NotImplementedError()

- @property

- def name(self):

- return self._name

- @property

- def hidden(self):

- return self._hidden

- def iter_subworld(self):

- for subworld in self._subworlds:

- yield subworld

-class VMRule(AbstractRule):

- """Represents a Rule to match with virtual memory regions."""

- def __init__(self, dct):

- super(VMRule, self).__init__(dct)

- self._backtrace_function = dct.get('backtrace_function', None)

- if self._backtrace_function:

- self._backtrace_function = re.compile(self._backtrace_function)

- self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)

- if self._backtrace_sourcefile:

- self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)

- self._mmap = dct.get('mmap', None)

- self._sharedwith = dct.get('sharedwith', [])

- self._mapped_pathname = dct.get('mapped_pathname', None)

- if self._mapped_pathname:

- self._mapped_pathname = re.compile(self._mapped_pathname)

- self._mapped_permission = dct.get('mapped_permission', None)

- if self._mapped_permission:

- self._mapped_permission = re.compile(self._mapped_permission)

- def __repr__(self):

- result = cStringIO.StringIO()

- result.write('{"%s"=>' % self._name)

- attributes = []

- attributes.append('mmap: %s' % self._mmap)

- if self._backtrace_function:

- attributes.append('backtrace_function: "%s"' %

- self._backtrace_function.pattern)

- if self._sharedwith:

- attributes.append('sharedwith: "%s"' % self._sharedwith)

- if self._mapped_pathname:

- attributes.append('mapped_pathname: "%s"' % self._mapped_pathname.pattern)

- if self._mapped_permission:

- attributes.append('mapped_permission: "%s"' %

- self._mapped_permission.pattern)

- result.write('%s}' % ', '.join(attributes))

- return result.getvalue()

- def match(self, unit):

- if unit.mmap:

- assert unit.region[0] == 'hooked'

- bucket = unit.bucket_set.get(unit.region[1]['bucket_id'])

- assert bucket

- assert bucket.allocator_type == 'mmap'

- stackfunction = bucket.symbolized_joined_stackfunction

- stacksourcefile = bucket.symbolized_joined_stacksourcefile

- # TODO(dmikurube): Support shared memory.

- sharedwith = None

- if self._mmap == False: # (self._mmap == None) should go through.

- return False

- if (self._backtrace_function and

- not self._backtrace_function.match(stackfunction)):

- return False

- if (self._backtrace_sourcefile and

- not self._backtrace_sourcefile.match(stacksourcefile)):

- return False

- if (self._mapped_pathname and

- not self._mapped_pathname.match(unit.region[1]['vma']['name'])):

- return False

- if (self._mapped_permission and

- not self._mapped_permission.match(

- unit.region[1]['vma']['readable'] +

- unit.region[1]['vma']['writable'] +

- unit.region[1]['vma']['executable'] +

- unit.region[1]['vma']['private'])):

- return False

- if (self._sharedwith and

- unit.pageframe and sharedwith not in self._sharedwith):

- return False

- return True

- else:

- assert unit.region[0] == 'unhooked'

- # TODO(dmikurube): Support shared memory.

- sharedwith = None

- if self._mmap == True: # (self._mmap == None) should go through.

- return False

- if (self._mapped_pathname and

- not self._mapped_pathname.match(unit.region[1]['vma']['name'])):

- return False

- if (self._mapped_permission and

- not self._mapped_permission.match(

- unit.region[1]['vma']['readable'] +

- unit.region[1]['vma']['writable'] +

- unit.region[1]['vma']['executable'] +

- unit.region[1]['vma']['private'])):

- return False

- if (self._sharedwith and

- unit.pageframe and sharedwith not in self._sharedwith):

- return False

- return True

-class MallocRule(AbstractRule):

- """Represents a Rule to match with malloc'ed blocks."""

- def __init__(self, dct):

- super(MallocRule, self).__init__(dct)

- self._backtrace_function = dct.get('backtrace_function', None)

- if self._backtrace_function:

- self._backtrace_function = re.compile(self._backtrace_function)

- self._backtrace_sourcefile = dct.get('backtrace_sourcefile', None)

- if self._backtrace_sourcefile:

- self._backtrace_sourcefile = re.compile(self._backtrace_sourcefile)

- self._typeinfo = dct.get('typeinfo', None)

- if self._typeinfo:

- self._typeinfo = re.compile(self._typeinfo)

- def __repr__(self):

- result = cStringIO.StringIO()

- result.write('{"%s"=>' % self._name)

- attributes = []

- if self._backtrace_function:

- attributes.append('backtrace_function: "%s"' % self._backtrace_function)

- if self._typeinfo:

- attributes.append('typeinfo: "%s"' % self._typeinfo)

- result.write('%s}' % ', '.join(attributes))

- return result.getvalue()

- def match(self, unit):

- assert unit.bucket.allocator_type == 'malloc'

- stackfunction = unit.bucket.symbolized_joined_stackfunction

- stacksourcefile = unit.bucket.symbolized_joined_stacksourcefile

- typeinfo = unit.bucket.symbolized_typeinfo

- if typeinfo.startswith('0x'):

- typeinfo = unit.bucket.typeinfo_name

- return ((not self._backtrace_function or

- self._backtrace_function.match(stackfunction)) and

- (not self._backtrace_sourcefile or

- self._backtrace_sourcefile.match(stacksourcefile)) and

- (not self._typeinfo or self._typeinfo.match(typeinfo)))

-class NoBucketMallocRule(MallocRule):

- """Represents a Rule that small ignorable units match with."""

- def __init__(self):

- super(NoBucketMallocRule, self).__init__({'name': 'tc-no-bucket'})

- self._no_bucket = True

- @property

- def no_bucket(self):

- return self._no_bucket

-class AbstractSorter(object):

- """An abstract class for classifying Units with a set of Rules."""

- def __init__(self, dct):

- self._type = 'sorter'

- self._version = dct['version']

- self._world = dct['world']

- self._name = dct['name']

- self._order = dct['order']

- self._rules = []

- for rule in dct['rules']:

- if dct['world'] == 'vm':

- self._rules.append(VMRule(rule))

- elif dct['world'] == 'malloc':

- self._rules.append(MallocRule(rule))

- else:

- LOGGER.error('Unknown sorter world type')

- def __repr__(self):

- result = cStringIO.StringIO()

- result.write('world=%s' % self._world)

- result.write('order=%s' % self._order)

- result.write('rules:')

- for rule in self._rules:

- result.write(' %s' % rule)

- return result.getvalue()

- @staticmethod

- def load(filename):

- with open(filename) as sorter_f:

- sorter_dict = json.load(sorter_f)

- if sorter_dict['world'] == 'vm':

- return VMSorter(sorter_dict)

- elif sorter_dict['world'] == 'malloc':

- return MallocSorter(sorter_dict)

- else:

- LOGGER.error('Unknown sorter world type')

- return None

- @property

- def world(self):

- return self._world

- @property

- def name(self):

- return self._name

- def find(self, unit):

- raise NotImplementedError()

- def find_rule(self, name):

- """Finds a rule whose name is |name|. """

- for rule in self._rules:

- if rule.name == name:

- return rule

- return None

-class VMSorter(AbstractSorter):

- """Represents a Sorter for memory regions on virtual memory."""

- def __init__(self, dct):

- assert dct['world'] == 'vm'

- super(VMSorter, self).__init__(dct)

- def find(self, unit):

- for rule in self._rules:

- if rule.match(unit):

- return rule

- assert False

-class MallocSorter(AbstractSorter):

- """Represents a Sorter for malloc'ed blocks."""

- def __init__(self, dct):

- assert dct['world'] == 'malloc'

- super(MallocSorter, self).__init__(dct)

- self._no_bucket_rule = NoBucketMallocRule()

- def find(self, unit):

- if not unit.bucket:

- return self._no_bucket_rule

- assert unit.bucket.allocator_type == 'malloc'

- if unit.bucket.component_cache:

- return unit.bucket.component_cache

- for rule in self._rules:

- if rule.match(unit):

- unit.bucket.component_cache = rule

- return rule

- assert False

-class SorterSet(object):

- """Represents an iterable set of Sorters."""

- def __init__(self, additional=None, default=None):

- if not additional:

- additional = []

- if not default:

- default = DEFAULT_SORTERS

- self._sorters = {}

- for filename in default + additional:

- sorter = AbstractSorter.load(filename)

- if sorter.world not in self._sorters:

- self._sorters[sorter.world] = []

- self._sorters[sorter.world].append(sorter)

- def __repr__(self):

- result = cStringIO.StringIO()

- result.write(self._sorters)

- return result.getvalue()

- def __iter__(self):

- for sorters in self._sorters.itervalues():

- for sorter in sorters:

- yield sorter

- def iter_world(self, world):

- for sorter in self._sorters.get(world, []):

- yield sorter

-class Command(object):

- """Subclasses are a subcommand for this executable.

- See COMMANDS in main().

- """

- _DEVICE_LIB_BASEDIRS = ['/data/data/', '/data/app-lib/', '/data/local/tmp']

- def __init__(self, usage):

- self._parser = optparse.OptionParser(usage)

- @staticmethod

- def load_basic_files(

- dump_path, multiple, no_dump=False, alternative_dirs=None):

- prefix = Command._find_prefix(dump_path)

- # If the target process is estimated to be working on Android, converts

- # a path in the Android device to a path estimated to be corresponding in

- # the host. Use --alternative-dirs to specify the conversion manually.

- if not alternative_dirs:

- alternative_dirs = Command._estimate_alternative_dirs(prefix)

- if alternative_dirs:

- for device, host in alternative_dirs.iteritems():

- LOGGER.info('Assuming %s on device as %s on host' % (device, host))

- symbol_data_sources = SymbolDataSources(prefix, alternative_dirs)

- symbol_data_sources.prepare()

- bucket_set = BucketSet()

- bucket_set.load(prefix)

- if not no_dump:

- if multiple:

- dump_list = DumpList.load(Command._find_all_dumps(dump_path))

- else:

- dump = Dump.load(dump_path)

- symbol_mapping_cache = SymbolMappingCache()

- with open(prefix + '.cache.function', 'a+') as cache_f:

- symbol_mapping_cache.update(

- FUNCTION_SYMBOLS, bucket_set,

- SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f)

- with open(prefix + '.cache.typeinfo', 'a+') as cache_f:

- symbol_mapping_cache.update(

- TYPEINFO_SYMBOLS, bucket_set,

- SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f)

- with open(prefix + '.cache.sourcefile', 'a+') as cache_f:

- symbol_mapping_cache.update(

- SOURCEFILE_SYMBOLS, bucket_set,

- SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f)

- bucket_set.symbolize(symbol_mapping_cache)

- if no_dump:

- return bucket_set

- elif multiple:

- return (bucket_set, dump_list)

- else:

- return (bucket_set, dump)

- @staticmethod

- def _find_prefix(path):

- return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

- @staticmethod

- def _estimate_alternative_dirs(prefix):

- """Estimates a path in host from a corresponding path in target device.

- For Android, dmprof.py should find symbol information from binaries in

- the host instead of the Android device because dmprof.py doesn't run on

- the Android device. This method estimates a path in the host

- corresponding to a path in the Android device.

- Returns:

- A dict that maps a path in the Android device to a path in the host.

- If a file in Command._DEVICE_LIB_BASEDIRS is found in /proc/maps, it

- assumes the process was running on Android and maps the path to

- "out/Debug/lib" in the Chromium directory. An empty dict is returned

- unless Android.

- """

- device_lib_path_candidates = set()

- with open(prefix + '.maps') as maps_f:

- maps = proc_maps.ProcMaps.load(maps_f)

- for entry in maps:

- name = entry.as_dict()['name']

- if any([base_dir in name for base_dir in Command._DEVICE_LIB_BASEDIRS]):

- device_lib_path_candidates.add(os.path.dirname(name))

- if len(device_lib_path_candidates) == 1:

- return {device_lib_path_candidates.pop(): os.path.join(

- CHROME_SRC_PATH, 'out', 'Debug', 'lib')}

- else:

- return {}

- @staticmethod

- def _find_all_dumps(dump_path):

- prefix = Command._find_prefix(dump_path)

- dump_path_list = [dump_path]

- n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

- n += 1

- skipped = 0

- while True:

- p = '%s.%04d.heap' % (prefix, n)

- if os.path.exists(p) and os.stat(p).st_size:

- dump_path_list.append(p)

- else:

- if skipped > 10:

- break

- skipped += 1

- n += 1

- return dump_path_list

- @staticmethod

- def _find_all_buckets(dump_path):

- prefix = Command._find_prefix(dump_path)

- bucket_path_list = []

- n = 0

- while True:

- path = '%s.%04d.buckets' % (prefix, n)

- if not os.path.exists(path):

- if n > 10:

- break

- n += 1

- continue

- bucket_path_list.append(path)

- n += 1

- return bucket_path_list

- def _parse_args(self, sys_argv, required):

- options, args = self._parser.parse_args(sys_argv)

- if len(args) < required + 1:

- self._parser.error('needs %d argument(s).\n' % required)

- return None

- return (options, args)

- @staticmethod

- def _parse_policy_list(options_policy):

- if options_policy:

- return options_policy.split(',')

- else:

- return None

-class BucketsCommand(Command):

- def __init__(self):

- super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>')

- def do(self, sys_argv, out=sys.stdout):

- _, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- bucket_set = Command.load_basic_files(dump_path, True, True)

- BucketsCommand._output(bucket_set, out)

- return 0

- @staticmethod

- def _output(bucket_set, out):

- """Prints all buckets with resolving symbols.

- Args:

- bucket_set: A BucketSet object.

- out: An IO object to output.

- """

- for bucket_id, bucket in sorted(bucket_set):

- out.write('%d: %s\n' % (bucket_id, bucket))

-class StacktraceCommand(Command):

- def __init__(self):

- super(StacktraceCommand, self).__init__(

- 'Usage: %prog stacktrace <dump>')

- def do(self, sys_argv):

- _, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- StacktraceCommand._output(dump, bucket_set, sys.stdout)

- return 0

- @staticmethod

- def _output(dump, bucket_set, out):

- """Outputs a given stacktrace.

- Args:

- bucket_set: A BucketSet object.

- out: A file object to output.

- """

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket:

- continue

- for i in range(0, BUCKET_ID - 1):

- out.write(words[i] + ' ')

- for frame in bucket.symbolized_stackfunction:

- out.write(frame + ' ')

- out.write('\n')

-class PolicyCommands(Command):

- def __init__(self, command):

- super(PolicyCommands, self).__init__(

- 'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %

- command)

- self._parser.add_option('-p', '--policy', type='string', dest='policy',

- help='profile with POLICY', metavar='POLICY')

- self._parser.add_option('--alternative-dirs', dest='alternative_dirs',

- metavar='/path/on/target@/path/on/host[:...]',

- help='Read files in /path/on/host/ instead of '

- 'files in /path/on/target/.')

- def _set_up(self, sys_argv):

- options, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- shared_first_dump_paths = args[2:]

- alternative_dirs_dict = {}

- if options.alternative_dirs:

- for alternative_dir_pair in options.alternative_dirs.split(':'):

- target_path, host_path = alternative_dir_pair.split('@', 1)

- alternative_dirs_dict[target_path] = host_path

- (bucket_set, dumps) = Command.load_basic_files(

- dump_path, True, alternative_dirs=alternative_dirs_dict)

- pfn_counts_dict = {}

- for shared_first_dump_path in shared_first_dump_paths:

- shared_dumps = Command._find_all_dumps(shared_first_dump_path)

- for shared_dump in shared_dumps:

- pfn_counts = PFNCounts.load(shared_dump)

- if pfn_counts.pid not in pfn_counts_dict:

- pfn_counts_dict[pfn_counts.pid] = []

- pfn_counts_dict[pfn_counts.pid].append(pfn_counts)

- policy_set = PolicySet.load(Command._parse_policy_list(options.policy))

- return policy_set, dumps, pfn_counts_dict, bucket_set

- @staticmethod

- def _apply_policy(dump, pfn_counts_dict, policy, bucket_set, first_dump_time):

- """Aggregates the total memory size of each component.

- Iterate through all stacktraces and attribute them to one of the components

- based on the policy. It is important to apply policy in right order.

- Args:

- dump: A Dump object.

- pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- first_dump_time: An integer representing time when the first dump is

- dumped.

- Returns:

- A dict mapping components and their corresponding sizes.

- """

- LOGGER.info(' %s' % dump.path)

- all_pfn_dict = {}

- if pfn_counts_dict:

- LOGGER.info(' shared with...')

- for pid, pfnset_list in pfn_counts_dict.iteritems():

- closest_pfnset_index = None

- closest_pfnset_difference = 1024.0

- for index, pfnset in enumerate(pfnset_list):

- time_difference = pfnset.time - dump.time

- if time_difference >= 3.0:

- break

- elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or

- (0.0 <= time_difference and time_difference < 3.0)):

- closest_pfnset_index = index

- closest_pfnset_difference = time_difference

- elif time_difference < 0.0 and pfnset.reason == 'Exiting':

- closest_pfnset_index = None

- break

- if closest_pfnset_index:

- for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:

- all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count

- LOGGER.info(' %s (time difference = %f)' %

- (pfnset_list[closest_pfnset_index].path,

- closest_pfnset_difference))

- else:

- LOGGER.info(' (no match with pid:%d)' % pid)

- sizes = dict((c, 0) for c in policy.components)

- PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)

- verify_global_stats = PolicyCommands._accumulate_maps(

- dump, all_pfn_dict, policy, bucket_set, sizes)

- # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.

- # http://crbug.com/245603.

- for verify_key, verify_value in verify_global_stats.iteritems():

- dump_value = dump.global_stat('%s_committed' % verify_key)

- if dump_value != verify_value:

- LOGGER.warn('%25s: %12d != %d (%d)' % (

- verify_key, dump_value, verify_value, dump_value - verify_value))

- sizes['mmap-no-log'] = (

- dump.global_stat('profiled-mmap_committed') -

- sizes['mmap-total-log'])

- sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')

- sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')

- sizes['tc-no-log'] = (

- dump.global_stat('profiled-malloc_committed') -

- sizes['tc-total-log'])

- sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')

- sizes['tc-unused'] = (

- sizes['mmap-tcmalloc'] -

- dump.global_stat('profiled-malloc_committed'))

- if sizes['tc-unused'] < 0:

- LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' %

- sizes['tc-unused'])

- sizes['tc-unused'] = 0

- sizes['tc-total'] = sizes['mmap-tcmalloc']

- # TODO(dmikurube): global_stat will be deprecated.

- # See http://crbug.com/245603.

- for key, value in {

- 'total': 'total_committed',

- 'filemapped': 'file_committed',

- 'absent': 'absent_committed',

- 'file-exec': 'file-exec_committed',

- 'file-nonexec': 'file-nonexec_committed',

- 'anonymous': 'anonymous_committed',

- 'stack': 'stack_committed',

- 'other': 'other_committed',

- 'unhooked-absent': 'nonprofiled-absent_committed',

- 'total-vm': 'total_virtual',

- 'filemapped-vm': 'file_virtual',

- 'anonymous-vm': 'anonymous_virtual',

- 'other-vm': 'other_virtual' }.iteritems():

- if key in sizes:

- sizes[key] = dump.global_stat(value)

- if 'mustbezero' in sizes:

- removed_list = (

- 'profiled-mmap_committed',

- 'nonprofiled-absent_committed',

- 'nonprofiled-anonymous_committed',

- 'nonprofiled-file-exec_committed',

- 'nonprofiled-file-nonexec_committed',

- 'nonprofiled-stack_committed',

- 'nonprofiled-other_committed')

- sizes['mustbezero'] = (

- dump.global_stat('total_committed') -

- sum(dump.global_stat(removed) for removed in removed_list))

- if 'total-exclude-profiler' in sizes:

- sizes['total-exclude-profiler'] = (

- dump.global_stat('total_committed') -

- (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))

- if 'hour' in sizes:

- sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0

- if 'minute' in sizes:

- sizes['minute'] = (dump.time - first_dump_time) / 60.0

- if 'second' in sizes:

- sizes['second'] = dump.time - first_dump_time

- return sizes

- @staticmethod

- def _accumulate_malloc(dump, policy, bucket_set, sizes):

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket or bucket.allocator_type == 'malloc':

- component_match = policy.find_malloc(bucket)

- elif bucket.allocator_type == 'mmap':

- continue

- else:

- assert False

- sizes[component_match] += int(words[COMMITTED])

- assert not component_match.startswith('mmap-')

- if component_match.startswith('tc-'):

- sizes['tc-total-log'] += int(words[COMMITTED])

- else:

- sizes['other-total-log'] += int(words[COMMITTED])

- @staticmethod

- def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):

- # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.

- # http://crbug.com/245603.

- global_stats = {

- 'total': 0,

- 'file-exec': 0,

- 'file-nonexec': 0,

- 'anonymous': 0,

- 'stack': 0,

- 'other': 0,

- 'nonprofiled-file-exec': 0,

- 'nonprofiled-file-nonexec': 0,

- 'nonprofiled-anonymous': 0,

- 'nonprofiled-stack': 0,

- 'nonprofiled-other': 0,

- 'profiled-mmap': 0,

- }

- for key, value in dump.iter_map:

- # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.

- # It's temporary verification code for transition described in

- # http://crbug.com/245603.

- committed = 0

- if 'committed' in value[1]:

- committed = value[1]['committed']

- global_stats['total'] += committed

- key = 'other'

- name = value[1]['vma']['name']

- if name.startswith('/'):

- if value[1]['vma']['executable'] == 'x':

- key = 'file-exec'

- else:

- key = 'file-nonexec'

- elif name == '[stack]':

- key = 'stack'

- elif name == '':

- key = 'anonymous'

- global_stats[key] += committed

- if value[0] == 'unhooked':

- global_stats['nonprofiled-' + key] += committed

- if value[0] == 'hooked':

- global_stats['profiled-mmap'] += committed

- if value[0] == 'unhooked':

- if pfn_dict and dump.pageframe_length:

- for pageframe in value[1]['pageframe']:

- component_match = policy.find_unhooked(value, pageframe, pfn_dict)

- sizes[component_match] += pageframe.size

- else:

- component_match = policy.find_unhooked(value)

- sizes[component_match] += int(value[1]['committed'])

- elif value[0] == 'hooked':

- if pfn_dict and dump.pageframe_length:

- for pageframe in value[1]['pageframe']:

- component_match, _ = policy.find_mmap(

- value, bucket_set, pageframe, pfn_dict)

- sizes[component_match] += pageframe.size

- assert not component_match.startswith('tc-')

- if component_match.startswith('mmap-'):

- sizes['mmap-total-log'] += pageframe.size

- else:

- sizes['other-total-log'] += pageframe.size

- else:

- component_match, _ = policy.find_mmap(value, bucket_set)

- sizes[component_match] += int(value[1]['committed'])

- if component_match.startswith('mmap-'):

- sizes['mmap-total-log'] += int(value[1]['committed'])

- else:

- sizes['other-total-log'] += int(value[1]['committed'])

- else:

- LOGGER.error('Unrecognized mapping status: %s' % value[0])

- return global_stats

-class CSVCommand(PolicyCommands):

- def __init__(self):

- super(CSVCommand, self).__init__('csv')

- def do(self, sys_argv):

- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

- return CSVCommand._output(

- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

- @staticmethod

- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

- max_components = 0

- for label in policy_set:

- max_components = max(max_components, len(policy_set[label].components))

- for label in sorted(policy_set):

- components = policy_set[label].components

- if len(policy_set) > 1:

- out.write('%s%s\n' % (label, ',' * (max_components - 1)))

- out.write('%s%s\n' % (

- ','.join(components), ',' * (max_components - len(components))))

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = PolicyCommands._apply_policy(

- dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)

- s = []

- for c in components:

- if c in ('hour', 'minute', 'second'):

- s.append('%05.5f' % (component_sizes[c]))

- else:

- s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

- out.write('%s%s\n' % (

- ','.join(s), ',' * (max_components - len(components))))

- bucket_set.clear_component_cache()

- return 0

-class JSONCommand(PolicyCommands):

- def __init__(self):

- super(JSONCommand, self).__init__('json')

- def do(self, sys_argv):

- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

- return JSONCommand._output(

- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

- @staticmethod

- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

- json_base = {

- 'version': 'JSON_DEEP_2',

- 'policies': {},

- }

- for label in sorted(policy_set):

- json_base['policies'][label] = {

- 'legends': policy_set[label].components,

- 'snapshots': [],

- }

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = PolicyCommands._apply_policy(

- dump, pfn_counts_dict, policy_set[label], bucket_set, dumps[0].time)

- component_sizes['dump_path'] = dump.path

- component_sizes['dump_time'] = datetime.datetime.fromtimestamp(

- dump.time).strftime('%Y-%m-%d %H:%M:%S')

- json_base['policies'][label]['snapshots'].append(component_sizes)

- bucket_set.clear_component_cache()

- json.dump(json_base, out, indent=2, sort_keys=True)

- return 0

-class ListCommand(PolicyCommands):

- def __init__(self):

- super(ListCommand, self).__init__('list')

- def do(self, sys_argv):

- policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)

- return ListCommand._output(

- policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)

- @staticmethod

- def _output(policy_set, dumps, pfn_counts_dict, bucket_set, out):

- for label in sorted(policy_set):

- LOGGER.info('Applying a policy %s to...' % label)

- for dump in dumps:

- component_sizes = PolicyCommands._apply_policy(

- dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)

- out.write('%s for %s:\n' % (label, dump.path))

- for c in policy_set[label].components:

- if c in ['hour', 'minute', 'second']:

- out.write('%40s %12.3f\n' % (c, component_sizes[c]))

- else:

- out.write('%40s %12d\n' % (c, component_sizes[c]))

- bucket_set.clear_component_cache()

- return 0

-class MapCommand(Command):

- def __init__(self):

- super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>')

- def do(self, sys_argv, out=sys.stdout):

- _, args = self._parse_args(sys_argv, 2)

- dump_path = args[1]

- target_policy = args[2]

- (bucket_set, dumps) = Command.load_basic_files(dump_path, True)

- policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

- MapCommand._output(dumps, bucket_set, policy_set[target_policy], out)

- return 0

- @staticmethod

- def _output(dumps, bucket_set, policy, out):

- """Prints all stacktraces in a given component of given depth.

- Args:

- dumps: A list of Dump objects.

- bucket_set: A BucketSet object.

- policy: A Policy object.

- out: An IO object to output.

- """

- max_dump_count = 0

- range_dict = ExclusiveRangeDict(ListAttribute)

- for dump in dumps:

- max_dump_count = max(max_dump_count, dump.count)

- for key, value in dump.iter_map:

- for begin, end, attr in range_dict.iter_range(key[0], key[1]):

- attr[dump.count] = value

- max_dump_count_digit = len(str(max_dump_count))

- for begin, end, attr in range_dict.iter_range():

- out.write('%x-%x\n' % (begin, end))

- if len(attr) < max_dump_count:

- attr[max_dump_count] = None

- for index, value in enumerate(attr[1:]):

- out.write(' #%0*d: ' % (max_dump_count_digit, index + 1))

- if not value:

- out.write('None\n')

- elif value[0] == 'hooked':

- component_match, _ = policy.find_mmap(value, bucket_set)

- out.write('%s @ %d\n' % (component_match, value[1]['bucket_id']))

- else:

- component_match = policy.find_unhooked(value)

- region_info = value[1]

- size = region_info['committed']

- out.write('%s [%d bytes] %s%s%s%s %s\n' % (

- component_match, size, value[1]['vma']['readable'],

- value[1]['vma']['writable'], value[1]['vma']['executable'],

- value[1]['vma']['private'], value[1]['vma']['name']))

-class ExpandCommand(Command):

- def __init__(self):

- super(ExpandCommand, self).__init__(

- 'Usage: %prog expand <dump> <policy> <component> <depth>')

- def do(self, sys_argv):

- _, args = self._parse_args(sys_argv, 4)

- dump_path = args[1]

- target_policy = args[2]

- component_name = args[3]

- depth = args[4]

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

- ExpandCommand._output(dump, policy_set[target_policy], bucket_set,

- component_name, int(depth), sys.stdout)

- return 0

- @staticmethod

- def _output(dump, policy, bucket_set, component_name, depth, out):

- """Prints all stacktraces in a given component of given depth.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- depth: An integer representing depth to be printed.

- out: An IO object to output.

- """

- sizes = {}

- ExpandCommand._accumulate(

- dump, policy, bucket_set, component_name, depth, sizes)

- sorted_sizes_list = sorted(

- sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

- total = 0

- # TODO(dmikurube): Better formatting.

- for size_pair in sorted_sizes_list:

- out.write('%10d %s\n' % (size_pair[1], size_pair[0]))

- total += size_pair[1]

- LOGGER.info('total: %d\n' % total)

- @staticmethod

- def _add_size(precedence, bucket, depth, committed, sizes):

- stacktrace_sequence = precedence

- for function, sourcefile in zip(

- bucket.symbolized_stackfunction[

- 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)],

- bucket.symbolized_stacksourcefile[

- 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]):

- stacktrace_sequence += '%s(@%s) ' % (function, sourcefile)

- if not stacktrace_sequence in sizes:

- sizes[stacktrace_sequence] = 0

- sizes[stacktrace_sequence] += committed

- @staticmethod

- def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):

- rule = policy.find_rule(component_name)

- if not rule:

- pass

- elif rule.allocator_type == 'malloc':

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket or bucket.allocator_type == 'malloc':

- component_match = policy.find_malloc(bucket)

- elif bucket.allocator_type == 'mmap':

- continue

- else:

- assert False

- if component_match == component_name:

- precedence = ''

- precedence += '(alloc=%d) ' % int(words[ALLOC_COUNT])

- precedence += '(free=%d) ' % int(words[FREE_COUNT])

- if bucket.typeinfo:

- precedence += '(type=%s) ' % bucket.symbolized_typeinfo

- precedence += '(type.name=%s) ' % bucket.typeinfo_name

- ExpandCommand._add_size(precedence, bucket, depth,

- int(words[COMMITTED]), sizes)

- elif rule.allocator_type == 'mmap':

- for _, region in dump.iter_map:

- if region[0] != 'hooked':

- continue

- component_match, bucket = policy.find_mmap(region, bucket_set)

- if component_match == component_name:

- ExpandCommand._add_size('', bucket, depth,

- region[1]['committed'], sizes)

-class PProfCommand(Command):

- def __init__(self):

- super(PProfCommand, self).__init__(

- 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

- self._parser.add_option('-c', '--component', type='string',

- dest='component',

- help='restrict to COMPONENT', metavar='COMPONENT')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 2)

- dump_path = args[1]

- target_policy = args[2]

- component = options.component

- (bucket_set, dump) = Command.load_basic_files(dump_path, False)

- policy_set = PolicySet.load(Command._parse_policy_list(target_policy))

- with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:

- maps_lines = maps_f.readlines()

- PProfCommand._output(

- dump, policy_set[target_policy], bucket_set, maps_lines, component,

- sys.stdout)

- return 0

- @staticmethod

- def _output(dump, policy, bucket_set, maps_lines, component_name, out):

- """Converts the heap profile dump so it can be processed by pprof.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- maps_lines: A list of strings containing /proc/.../maps.

- component_name: A name of component for filtering.

- out: An IO object to output.

- """

- out.write('heap profile: ')

- com_committed, com_allocs = PProfCommand._accumulate(

- dump, policy, bucket_set, component_name)

- out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

- com_allocs, com_committed, com_allocs, com_committed))

- PProfCommand._output_stacktrace_lines(

- dump, policy, bucket_set, component_name, out)

- out.write('MAPPED_LIBRARIES:\n')

- for line in maps_lines:

- out.write(line)

- @staticmethod

- def _accumulate(dump, policy, bucket_set, component_name):

- """Accumulates size of committed chunks and the number of allocated chunks.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- Returns:

- Two integers which are the accumulated size of committed regions and the

- number of allocated chunks, respectively.

- """

- com_committed = 0

- com_allocs = 0

- for _, region in dump.iter_map:

- if region[0] != 'hooked':

- continue

- component_match, bucket = policy.find_mmap(region, bucket_set)

- if (component_name and component_name != component_match) or (

- region[1]['committed'] == 0):

- continue

- com_committed += region[1]['committed']

- com_allocs += 1

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket or bucket.allocator_type == 'malloc':

- component_match = policy.find_malloc(bucket)

- elif bucket.allocator_type == 'mmap':

- continue

- else:

- assert False

- if (not bucket or

- (component_name and component_name != component_match)):

- continue

- com_committed += int(words[COMMITTED])

- com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

- return com_committed, com_allocs

- @staticmethod

- def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):

- """Prints information of stacktrace lines for pprof.

- Args:

- dump: A Dump object.

- policy: A Policy object.

- bucket_set: A BucketSet object.

- component_name: A name of component for filtering.

- out: An IO object to output.

- """

- for _, region in dump.iter_map:

- if region[0] != 'hooked':

- continue

- component_match, bucket = policy.find_mmap(region, bucket_set)

- if (component_name and component_name != component_match) or (

- region[1]['committed'] == 0):

- continue

- out.write(' 1: %8s [ 1: %8s] @' % (

- region[1]['committed'], region[1]['committed']))

- for address in bucket.stacktrace:

- out.write(' 0x%016x' % address)

- out.write('\n')

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if not bucket or bucket.allocator_type == 'malloc':

- component_match = policy.find_malloc(bucket)

- elif bucket.allocator_type == 'mmap':

- continue

- else:

- assert False

- if (not bucket or

- (component_name and component_name != component_match)):

- continue

- out.write('%6d: %8s [%6d: %8s] @' % (

- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

- words[COMMITTED],

- int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

- words[COMMITTED]))

- for address in bucket.stacktrace:

- out.write(' 0x%016x' % address)

- out.write('\n')

-class UploadCommand(Command):

- def __init__(self):

- super(UploadCommand, self).__init__(

- 'Usage: %prog upload [--gsutil path/to/gsutil] '

- '<first-dump> <destination-gs-path>')

- self._parser.add_option('--gsutil', default='gsutil',

- help='path to GSUTIL', metavar='GSUTIL')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 2)

- dump_path = args[1]

- gs_path = args[2]

- dump_files = Command._find_all_dumps(dump_path)

- bucket_files = Command._find_all_buckets(dump_path)

- prefix = Command._find_prefix(dump_path)

- symbol_data_sources = SymbolDataSources(prefix)

- symbol_data_sources.prepare()

- symbol_path = symbol_data_sources.path()

- handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof')

- os.close(handle_zip)

- try:

- file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED)

- for filename in dump_files:

- file_zip.write(filename, os.path.basename(os.path.abspath(filename)))

- for filename in bucket_files:

- file_zip.write(filename, os.path.basename(os.path.abspath(filename)))

- symbol_basename = os.path.basename(os.path.abspath(symbol_path))

- for filename in os.listdir(symbol_path):

- if not filename.startswith('.'):

- file_zip.write(os.path.join(symbol_path, filename),

- os.path.join(symbol_basename, os.path.basename(

- os.path.abspath(filename))))

- file_zip.close()

- returncode = UploadCommand._run_gsutil(

- options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path)

- finally:

- os.remove(filename_zip)

- return returncode

- @staticmethod

- def _run_gsutil(gsutil, *args):

- """Run gsutil as a subprocess.

- Args:

- *args: Arguments to pass to gsutil. The first argument should be an

- operation such as ls, cp or cat.

- Returns:

- The return code from the process.

- """

- command = [gsutil] + list(args)

- LOGGER.info("Running: %s", command)

- try:

- return subprocess.call(command)

- except OSError, e:

- LOGGER.error('Error to run gsutil: %s', e)

-class CatCommand(Command):

- def __init__(self):

- super(CatCommand, self).__init__('Usage: %prog cat <first-dump>')

- self._parser.add_option('--alternative-dirs', dest='alternative_dirs',

- metavar='/path/on/target@/path/on/host[:...]',

- help='Read files in /path/on/host/ instead of '

- 'files in /path/on/target/.')

- self._parser.add_option('--indent', dest='indent', action='store_true',

- help='Indent the output.')

- def do(self, sys_argv):

- options, args = self._parse_args(sys_argv, 1)

- dump_path = args[1]

- # TODO(dmikurube): Support shared memory.

- alternative_dirs_dict = {}

- if options.alternative_dirs:

- for alternative_dir_pair in options.alternative_dirs.split(':'):

- target_path, host_path = alternative_dir_pair.split('@', 1)

- alternative_dirs_dict[target_path] = host_path

- (bucket_set, dumps) = Command.load_basic_files(

- dump_path, True, alternative_dirs=alternative_dirs_dict)

- json_root = OrderedDict()

- json_root['version'] = 1

- json_root['run_id'] = None

- for dump in dumps:

- if json_root['run_id'] and json_root['run_id'] != dump.run_id:

- LOGGER.error('Inconsistent heap profile dumps.')

- json_root['run_id'] = ''

- break

- json_root['run_id'] = dump.run_id

- json_root['snapshots'] = []

- # Load all sorters.

- sorters = SorterSet()

- for dump in dumps:

- json_root['snapshots'].append(

- self._fill_snapshot(dump, bucket_set, sorters))

- if options.indent:

- json.dump(json_root, sys.stdout, indent=2)

- else:

- json.dump(json_root, sys.stdout)

- print ''

- @staticmethod

- def _fill_snapshot(dump, bucket_set, sorters):

- root = OrderedDict()

- root['time'] = dump.time

- root['worlds'] = OrderedDict()

- root['worlds']['vm'] = CatCommand._fill_world(

- dump, bucket_set, sorters, 'vm')

- root['worlds']['malloc'] = CatCommand._fill_world(

- dump, bucket_set, sorters, 'malloc')

- return root

- @staticmethod

- def _fill_world(dump, bucket_set, sorters, world):

- root = OrderedDict()

- root['name'] = 'world'

- if world == 'vm':

- root['unit_fields'] = ['committed', 'reserved']

- elif world == 'malloc':

- root['unit_fields'] = ['size', 'alloc_count', 'free_count']

- # Make { vm | malloc } units with their sizes.

- root['units'] = OrderedDict()

- unit_set = UnitSet(world)

- if world == 'vm':

- for unit in CatCommand._iterate_vm_unit(dump, None, bucket_set):

- unit_set.append(unit)

- for unit in unit_set:

- root['units'][unit.unit_id] = [unit.committed, unit.reserved]

- elif world == 'malloc':

- for unit in CatCommand._iterate_malloc_unit(dump, bucket_set):

- unit_set.append(unit)

- for unit in unit_set:

- root['units'][unit.unit_id] = [

- unit.size, unit.alloc_count, unit.free_count]

- # Iterate for { vm | malloc } sorters.

- root['breakdown'] = OrderedDict()

- for sorter in sorters.iter_world(world):

- breakdown = OrderedDict()

- for unit in unit_set:

- found = sorter.find(unit)

- if found.name not in breakdown:

- category = OrderedDict()

- category['name'] = found.name

- category['color'] = 'random'

- subworlds = {}

- for subworld in found.iter_subworld():

- subworlds[subworld] = False

- if subworlds:

- category['subworlds'] = subworlds

- if found.hidden:

- category['hidden'] = True

- category['units'] = []

- breakdown[found.name] = category

- breakdown[found.name]['units'].append(unit.unit_id)

- root['breakdown'][sorter.name] = breakdown

- return root

- @staticmethod

- def _iterate_vm_unit(dump, pfn_dict, bucket_set):

- unit_id = 0

- for _, region in dump.iter_map:

- unit_id += 1

- if region[0] == 'unhooked':

- if pfn_dict and dump.pageframe_length:

- for pageframe in region[1]['pageframe']:

- yield UnhookedUnit(unit_id, pageframe.size, pageframe.size,

- region, pageframe, pfn_dict)

- else:

- yield UnhookedUnit(unit_id,

- int(region[1]['committed']),

- int(region[1]['reserved']),

- region)

- elif region[0] == 'hooked':

- if pfn_dict and dump.pageframe_length:

- for pageframe in region[1]['pageframe']:

- yield MMapUnit(unit_id,

- pageframe.size,

- region, bucket_set, pageframe, pfn_dict)

- else:

- yield MMapUnit(unit_id,

- int(region[1]['committed']),

- int(region[1]['reserved']),

- region,

- bucket_set)

- else:

- LOGGER.error('Unrecognized mapping status: %s' % region[0])

- @staticmethod

- def _iterate_malloc_unit(dump, bucket_set):

- for line in dump.iter_stacktrace:

- words = line.split()

- bucket = bucket_set.get(int(words[BUCKET_ID]))

- if bucket and bucket.allocator_type == 'malloc':

- yield MallocUnit(int(words[BUCKET_ID]),

- int(words[COMMITTED]),

- int(words[ALLOC_COUNT]),

- int(words[FREE_COUNT]),

- bucket)

- elif not bucket:

- # 'Not-found' buckets are all assumed as malloc buckets.

- yield MallocUnit(int(words[BUCKET_ID]),

- int(words[COMMITTED]),

- int(words[ALLOC_COUNT]),

- int(words[FREE_COUNT]),

- None)

def main():

COMMANDS = {

- 'buckets': BucketsCommand,

- 'cat': CatCommand,

- 'csv': CSVCommand,

- 'expand': ExpandCommand,

- 'json': JSONCommand,

- 'list': ListCommand,

- 'map': MapCommand,

- 'pprof': PProfCommand,

- 'stacktrace': StacktraceCommand,

- 'upload': UploadCommand,

+ 'buckets': subcommands.BucketsCommand,

+ 'cat': subcommands.CatCommand,

+ 'csv': subcommands.CSVCommand,

+ 'expand': subcommands.ExpandCommand,

+ 'json': subcommands.JSONCommand,

+ 'list': subcommands.ListCommand,

+ 'map': subcommands.MapCommand,

+ 'pprof': subcommands.PProfCommand,

+ 'stacktrace': subcommands.StacktraceCommand,

+ 'upload': subcommands.UploadCommand,

}

if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

« no previous file with comments | « no previous file | tools/deep_memory_profiler/lib/__init__.py » ('j') | no next file with comments »