tools/deep_memory_profiler/lib/dump.py - Issue 19346002: Refactor dmprof: Split dmprof.py into modules.

Side by Side Diff: tools/deep_memory_profiler/lib/dump.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2013 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import copy

	6 import datetime

	7 import logging

	8 import os

	9 import re

	10 import time

	11

	12 from lib.bucket import BUCKET_ID

	13 from lib.exceptions import EmptyDumpException, InvalidDumpException

	14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException

	15 from lib.pageframe import PageFrame

	16 from lib.range_dict import ExclusiveRangeDict

	17 from lib.symbol import proc_maps

	18

	19

	20 LOGGER = logging.getLogger('dmprof')

	21

	22

	23 # Heap Profile Dump versions

	24

	25 # DUMP_DEEP_[1-4] are obsolete.

	26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

	27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

	28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

	29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

	30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

	31 DUMP_DEEP_1 = 'DUMP_DEEP_1'

	32 DUMP_DEEP_2 = 'DUMP_DEEP_2'

	33 DUMP_DEEP_3 = 'DUMP_DEEP_3'

	34 DUMP_DEEP_4 = 'DUMP_DEEP_4'

	35

	36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

	37

	38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

	39 # malloc and mmap are identified in bucket files.

	40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

	41 DUMP_DEEP_5 = 'DUMP_DEEP_5'

	42

	43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.

	44 DUMP_DEEP_6 = 'DUMP_DEEP_6'

	45

	46

	47 class Dump(object):

	48 """Represents a heap profile dump."""

	49

	50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')

	51

	52 _HOOK_PATTERN = re.compile(

	53 r'^ ([ $])([a-f0-9]+)([ $])-([ $])([a-f0-9]+)([ $])\s+'

	54 r'(hooked\|unhooked)\s+(.+)$', re.IGNORECASE)

	55

	56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

	57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')

	58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '

	59 '(?P<RESERVED>[0-9]+)')

	60

	61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')

	62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')

	63

	64 _TIME_PATTERN_FORMAT = re.compile(

	65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')

	66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')

	67

	68 def __init__(self, path, modified_time):

	69 self._path = path

	70 matched = self._PATH_PATTERN.match(path)

	71 self._pid = int(matched.group(2))

	72 self._count = int(matched.group(3))

	73 self._time = modified_time

	74 self._map = {}

	75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)

	76 self._stacktrace_lines = []

	77 self._global_stats = {} # used only in apply_policy

	78

	79 self._run_id = ''

	80 self._pagesize = 4096

	81 self._pageframe_length = 0

	82 self._pageframe_encoding = ''

	83 self._has_pagecount = False

	84

	85 self._version = ''

	86 self._lines = []

	87

	88 @property

	89 def path(self):

	90 return self._path

	91

	92 @property

	93 def count(self):

	94 return self._count

	95

	96 @property

	97 def time(self):

	98 return self._time

	99

	100 @property

	101 def iter_map(self):

	102 for region in sorted(self._map.iteritems()):

	103 yield region[0], region[1]

	104

	105 def iter_procmaps(self):

	106 for begin, end, attr in self._map.iter_range():

	107 yield begin, end, attr

	108

	109 @property

	110 def iter_stacktrace(self):

	111 for line in self._stacktrace_lines:

	112 yield line

	113

	114 def global_stat(self, name):

	115 return self._global_stats[name]

	116

	117 @property

	118 def run_id(self):

	119 return self._run_id

	120

	121 @property

	122 def pagesize(self):

	123 return self._pagesize

	124

	125 @property

	126 def pageframe_length(self):

	127 return self._pageframe_length

	128

	129 @property

	130 def pageframe_encoding(self):

	131 return self._pageframe_encoding

	132

	133 @property

	134 def has_pagecount(self):

	135 return self._has_pagecount

	136

	137 @staticmethod

	138 def load(path, log_header='Loading a heap profile dump: '):

	139 """Loads a heap profile dump.

	140

	141 Args:

	142 path: A file path string to load.

	143 log_header: A preceding string for log messages.

	144

	145 Returns:

	146 A loaded Dump object.

	147

	148 Raises:

	149 ParsingException for invalid heap profile dumps.

	150 """

	151 dump = Dump(path, os.stat(path).st_mtime)

	152 with open(path, 'r') as f:

	153 dump.load_file(f, log_header)

	154 return dump

	155

	156 def load_file(self, f, log_header):

	157 self._lines = [line for line in f

	158 if line and not line.startswith('#')]

	159

	160 try:

	161 self._version, ln = self._parse_version()

	162 self._parse_meta_information()

	163 if self._version == DUMP_DEEP_6:

	164 self._parse_mmap_list()

	165 self._parse_global_stats()

	166 self._extract_stacktrace_lines(ln)

	167 except EmptyDumpException:

	168 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))

	169 except ParsingException, e:

	170 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))

	171 raise

	172 else:

	173 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))

	174

	175 def _parse_version(self):

	176 """Parses a version string in self._lines.

	177

	178 Returns:

	179 A pair of (a string representing a version of the stacktrace dump,

	180 and an integer indicating a line number next to the version string).

	181

	182 Raises:

	183 ParsingException for invalid dump versions.

	184 """

	185 version = ''

	186

	187 # Skip until an identifiable line.

	188 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

	189 if not self._lines:

	190 raise EmptyDumpException('Empty heap dump file.')

	191 (ln, found) = skip_while(

	192 0, len(self._lines),

	193 lambda n: not self._lines[n].startswith(headers))

	194 if not found:

	195 raise InvalidDumpException('No version header.')

	196

	197 # Identify a version.

	198 if self._lines[ln].startswith('heap profile: '):

	199 version = self._lines[ln][13:].strip()

	200 if version in (DUMP_DEEP_5, DUMP_DEEP_6):

	201 (ln, _) = skip_while(

	202 ln, len(self._lines),

	203 lambda n: self._lines[n] != 'STACKTRACES:\n')

	204 elif version in DUMP_DEEP_OBSOLETE:

	205 raise ObsoleteDumpVersionException(version)

	206 else:

	207 raise InvalidDumpException('Invalid version: %s' % version)

	208 elif self._lines[ln] == 'STACKTRACES:\n':

	209 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

	210 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

	211 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

	212

	213 return (version, ln)

	214

	215 def _parse_global_stats(self):

	216 """Parses lines in self._lines as global stats."""

	217 (ln, _) = skip_while(

	218 0, len(self._lines),

	219 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

	220

	221 global_stat_names = [

	222 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',

	223 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',

	224 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

	225 'nonprofiled-stack', 'nonprofiled-other',

	226 'profiled-mmap', 'profiled-malloc']

	227

	228 for prefix in global_stat_names:

	229 (ln, _) = skip_while(

	230 ln, len(self._lines),

	231 lambda n: self._lines[n].split()[0] != prefix)

	232 words = self._lines[ln].split()

	233 self._global_stats[prefix + '_virtual'] = int(words[-2])

	234 self._global_stats[prefix + '_committed'] = int(words[-1])

	235

	236 def _parse_meta_information(self):

	237 """Parses lines in self._lines for meta information."""

	238 (ln, found) = skip_while(

	239 0, len(self._lines),

	240 lambda n: self._lines[n] != 'META:\n')

	241 if not found:

	242 return

	243 ln += 1

	244

	245 while True:

	246 if self._lines[ln].startswith('Time:'):

	247 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])

	248 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])

	249 if matched_format:

	250 self._time = time.mktime(datetime.datetime.strptime(

	251 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())

	252 if matched_format.group(2):

	253 self._time += float(matched_format.group(2)[1:]) / 1000.0

	254 elif matched_seconds:

	255 self._time = float(matched_seconds.group(1))

	256 elif self._lines[ln].startswith('Reason:'):

	257 pass # Nothing to do for 'Reason:'

	258 elif self._lines[ln].startswith('PageSize: '):

	259 self._pagesize = int(self._lines[ln][10:])

	260 elif self._lines[ln].startswith('CommandLine:'):

	261 pass

	262 elif (self._lines[ln].startswith('PageFrame: ') or

	263 self._lines[ln].startswith('PFN: ')):

	264 if self._lines[ln].startswith('PageFrame: '):

	265 words = self._lines[ln][11:].split(',')

	266 else:

	267 words = self._lines[ln][5:].split(',')

	268 for word in words:

	269 if word == '24':

	270 self._pageframe_length = 24

	271 elif word == 'Base64':

	272 self._pageframe_encoding = 'base64'

	273 elif word == 'PageCount':

	274 self._has_pagecount = True

	275 elif self._lines[ln].startswith('RunID: '):

	276 self._run_id = self._lines[ln][7:].strip()

	277 elif (self._lines[ln].startswith('MMAP_LIST:') or

	278 self._lines[ln].startswith('GLOBAL_STATS:')):

	279 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.

	280 break

	281 else:

	282 pass

	283 ln += 1

	284

	285 def _parse_mmap_list(self):

	286 """Parses lines in self._lines as a mmap list."""

	287 (ln, found) = skip_while(

	288 0, len(self._lines),

	289 lambda n: self._lines[n] != 'MMAP_LIST:\n')

	290 if not found:

	291 return {}

	292

	293 ln += 1

	294 self._map = {}

	295 current_vma = {}

	296 pageframe_list = []

	297 while True:

	298 entry = proc_maps.ProcMaps.parse_line(self._lines[ln])

	299 if entry:

	300 current_vma = {}

	301 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):

	302 for key, value in entry.as_dict().iteritems():

	303 attr[key] = value

	304 current_vma[key] = value

	305 ln += 1

	306 continue

	307

	308 if self._lines[ln].startswith(' PF: '):

	309 for pageframe in self._lines[ln][5:].split():

	310 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))

	311 ln += 1

	312 continue

	313

	314 matched = self._HOOK_PATTERN.match(self._lines[ln])

	315 if not matched:

	316 break

	317 # 2: starting address

	318 # 5: end address

	319 # 7: hooked or unhooked

	320 # 8: additional information

	321 if matched.group(7) == 'hooked':

	322 submatched = self._HOOKED_PATTERN.match(matched.group(8))

	323 if not submatched:

	324 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))

	325 elif matched.group(7) == 'unhooked':

	326 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))

	327 if not submatched:

	328 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))

	329 else:

	330 assert matched.group(7) in ['hooked', 'unhooked']

	331

	332 submatched_dict = submatched.groupdict()

	333 region_info = { 'vma': current_vma }

	334 if submatched_dict.get('TYPE'):

	335 region_info['type'] = submatched_dict['TYPE'].strip()

	336 if submatched_dict.get('COMMITTED'):

	337 region_info['committed'] = int(submatched_dict['COMMITTED'])

	338 if submatched_dict.get('RESERVED'):

	339 region_info['reserved'] = int(submatched_dict['RESERVED'])

	340 if submatched_dict.get('BUCKETID'):

	341 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])

	342

	343 if matched.group(1) == '(':

	344 start = current_vma['begin']

	345 else:

	346 start = int(matched.group(2), 16)

	347 if matched.group(4) == '(':

	348 end = current_vma['end']

	349 else:

	350 end = int(matched.group(5), 16)

	351

	352 if pageframe_list and pageframe_list[0].start_truncated:

	353 pageframe_list[0].set_size(

	354 pageframe_list[0].size - start % self._pagesize)

	355 if pageframe_list and pageframe_list[-1].end_truncated:

	356 pageframe_list[-1].set_size(

	357 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))

	358 region_info['pageframe'] = pageframe_list

	359 pageframe_list = []

	360

	361 self._map[(start, end)] = (matched.group(7), region_info)

	362 ln += 1

	363

	364 def _extract_stacktrace_lines(self, line_number):

	365 """Extracts the position of stacktrace lines.

	366

	367 Valid stacktrace lines are stored into self._stacktrace_lines.

	368

	369 Args:

	370 line_number: A line number to start parsing in lines.

	371

	372 Raises:

	373 ParsingException for invalid dump versions.

	374 """

	375 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):

	376 (line_number, _) = skip_while(

	377 line_number, len(self._lines),

	378 lambda n: not self._lines[n].split()[0].isdigit())

	379 stacktrace_start = line_number

	380 (line_number, _) = skip_while(

	381 line_number, len(self._lines),

	382 lambda n: self._check_stacktrace_line(self._lines[n]))

	383 self._stacktrace_lines = self._lines[stacktrace_start:line_number]

	384

	385 elif self._version in DUMP_DEEP_OBSOLETE:

	386 raise ObsoleteDumpVersionException(self._version)

	387

	388 else:

	389 raise InvalidDumpException('Invalid version: %s' % self._version)

	390

	391 @staticmethod

	392 def _check_stacktrace_line(stacktrace_line):

	393 """Checks if a given stacktrace_line is valid as stacktrace.

	394

	395 Args:

	396 stacktrace_line: A string to be checked.

	397

	398 Returns:

	399 True if the given stacktrace_line is valid.

	400 """

	401 words = stacktrace_line.split()

	402 if len(words) < BUCKET_ID + 1:

	403 return False

	404 if words[BUCKET_ID - 1] != '@':

	405 return False

	406 return True

	407

	408

	409 class DumpList(object):

	410 """Represents a sequence of heap profile dumps."""

	411

	412 def __init__(self, dump_list):

	413 self._dump_list = dump_list

	414

	415 @staticmethod

	416 def load(path_list):

	417 LOGGER.info('Loading heap dump profiles.')

	418 dump_list = []

	419 for path in path_list:

	420 dump_list.append(Dump.load(path, ' '))

	421 return DumpList(dump_list)

	422

	423 def __len__(self):

	424 return len(self._dump_list)

	425

	426 def __iter__(self):

	427 for dump in self._dump_list:

	428 yield dump

	429

	430 def __getitem__(self, index):

	431 return self._dump_list[index]

	432

	433

	434 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):

	435 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""

	436 _DUMMY_ENTRY = proc_maps.ProcMapsEntry(

	437 0, # begin

	438 0, # end

	439 '-', # readable

	440 '-', # writable

	441 '-', # executable

	442 '-', # private

	443 0, # offset

	444 '00', # major

	445 '00', # minor

	446 0, # inode

	447 '' # name

	448 )

	449

	450 def __init__(self):

	451 super(ProcMapsEntryAttribute, self).__init__()

	452 self._entry = self._DUMMY_ENTRY.as_dict()

	453

	454 def __str__(self):

	455 return str(self._entry)

	456

	457 def __repr__(self):

	458 return 'ProcMapsEntryAttribute' + str(self._entry)

	459

	460 def __getitem__(self, key):

	461 return self._entry[key]

	462

	463 def __setitem__(self, key, value):

	464 if key not in self._entry:

	465 raise KeyError(key)

	466 self._entry[key] = value

	467

	468 def copy(self):

	469 new_entry = ProcMapsEntryAttribute()

	470 for key, value in self._entry.iteritems():

	471 new_entry[key] = copy.deepcopy(value)

	472 return new_entry

	473

	474

	475 def skip_while(index, max_index, skipping_condition):

	476 """Increments \|index\| until \|skipping_condition\|(\|index\|) is False.

	477

	478 Returns:

	479 A pair of an integer indicating a line number after skipped, and a

	480 boolean value which is True if found a line which skipping_condition

	481 is False for.

	482 """

	483 while skipping_condition(index):

	484 index += 1

	485 if index >= max_index:

	486 return index, False

	487 return index, True

OLD	NEW

« no previous file with comments | « tools/deep_memory_profiler/lib/bucket.py ('k') | tools/deep_memory_profiler/lib/exceptions.py » ('j') | no next file with comments »