Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(288)

Side by Side Diff: tools/deep_memory_profiler/lib/dump.py

Issue 19346002: Refactor dmprof: Split dmprof.py into modules. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/deep_memory_profiler/lib/bucket.py ('k') | tools/deep_memory_profiler/lib/exceptions.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import copy
6 import datetime
7 import logging
8 import os
9 import re
10 import time
11
12 from lib.bucket import BUCKET_ID
13 from lib.exceptions import EmptyDumpException, InvalidDumpException
14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
15 from lib.pageframe import PageFrame
16 from lib.range_dict import ExclusiveRangeDict
17 from lib.symbol import proc_maps
18
19
20 LOGGER = logging.getLogger('dmprof')
21
22
23 # Heap Profile Dump versions
24
25 # DUMP_DEEP_[1-4] are obsolete.
26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
31 DUMP_DEEP_1 = 'DUMP_DEEP_1'
32 DUMP_DEEP_2 = 'DUMP_DEEP_2'
33 DUMP_DEEP_3 = 'DUMP_DEEP_3'
34 DUMP_DEEP_4 = 'DUMP_DEEP_4'
35
36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
37
38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
39 # malloc and mmap are identified in bucket files.
40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
41 DUMP_DEEP_5 = 'DUMP_DEEP_5'
42
43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
44 DUMP_DEEP_6 = 'DUMP_DEEP_6'
45
46
47 class Dump(object):
48 """Represents a heap profile dump."""
49
50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
51
52 _HOOK_PATTERN = re.compile(
53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
55
56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
59 '(?P<RESERVED>[0-9]+)')
60
61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
63
64 _TIME_PATTERN_FORMAT = re.compile(
65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
67
68 def __init__(self, path, modified_time):
69 self._path = path
70 matched = self._PATH_PATTERN.match(path)
71 self._pid = int(matched.group(2))
72 self._count = int(matched.group(3))
73 self._time = modified_time
74 self._map = {}
75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
76 self._stacktrace_lines = []
77 self._global_stats = {} # used only in apply_policy
78
79 self._run_id = ''
80 self._pagesize = 4096
81 self._pageframe_length = 0
82 self._pageframe_encoding = ''
83 self._has_pagecount = False
84
85 self._version = ''
86 self._lines = []
87
88 @property
89 def path(self):
90 return self._path
91
92 @property
93 def count(self):
94 return self._count
95
96 @property
97 def time(self):
98 return self._time
99
100 @property
101 def iter_map(self):
102 for region in sorted(self._map.iteritems()):
103 yield region[0], region[1]
104
105 def iter_procmaps(self):
106 for begin, end, attr in self._map.iter_range():
107 yield begin, end, attr
108
109 @property
110 def iter_stacktrace(self):
111 for line in self._stacktrace_lines:
112 yield line
113
114 def global_stat(self, name):
115 return self._global_stats[name]
116
117 @property
118 def run_id(self):
119 return self._run_id
120
121 @property
122 def pagesize(self):
123 return self._pagesize
124
125 @property
126 def pageframe_length(self):
127 return self._pageframe_length
128
129 @property
130 def pageframe_encoding(self):
131 return self._pageframe_encoding
132
133 @property
134 def has_pagecount(self):
135 return self._has_pagecount
136
137 @staticmethod
138 def load(path, log_header='Loading a heap profile dump: '):
139 """Loads a heap profile dump.
140
141 Args:
142 path: A file path string to load.
143 log_header: A preceding string for log messages.
144
145 Returns:
146 A loaded Dump object.
147
148 Raises:
149 ParsingException for invalid heap profile dumps.
150 """
151 dump = Dump(path, os.stat(path).st_mtime)
152 with open(path, 'r') as f:
153 dump.load_file(f, log_header)
154 return dump
155
156 def load_file(self, f, log_header):
157 self._lines = [line for line in f
158 if line and not line.startswith('#')]
159
160 try:
161 self._version, ln = self._parse_version()
162 self._parse_meta_information()
163 if self._version == DUMP_DEEP_6:
164 self._parse_mmap_list()
165 self._parse_global_stats()
166 self._extract_stacktrace_lines(ln)
167 except EmptyDumpException:
168 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
169 except ParsingException, e:
170 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
171 raise
172 else:
173 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
174
175 def _parse_version(self):
176 """Parses a version string in self._lines.
177
178 Returns:
179 A pair of (a string representing a version of the stacktrace dump,
180 and an integer indicating a line number next to the version string).
181
182 Raises:
183 ParsingException for invalid dump versions.
184 """
185 version = ''
186
187 # Skip until an identifiable line.
188 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
189 if not self._lines:
190 raise EmptyDumpException('Empty heap dump file.')
191 (ln, found) = skip_while(
192 0, len(self._lines),
193 lambda n: not self._lines[n].startswith(headers))
194 if not found:
195 raise InvalidDumpException('No version header.')
196
197 # Identify a version.
198 if self._lines[ln].startswith('heap profile: '):
199 version = self._lines[ln][13:].strip()
200 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
201 (ln, _) = skip_while(
202 ln, len(self._lines),
203 lambda n: self._lines[n] != 'STACKTRACES:\n')
204 elif version in DUMP_DEEP_OBSOLETE:
205 raise ObsoleteDumpVersionException(version)
206 else:
207 raise InvalidDumpException('Invalid version: %s' % version)
208 elif self._lines[ln] == 'STACKTRACES:\n':
209 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
210 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
211 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
212
213 return (version, ln)
214
215 def _parse_global_stats(self):
216 """Parses lines in self._lines as global stats."""
217 (ln, _) = skip_while(
218 0, len(self._lines),
219 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
220
221 global_stat_names = [
222 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
223 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
224 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
225 'nonprofiled-stack', 'nonprofiled-other',
226 'profiled-mmap', 'profiled-malloc']
227
228 for prefix in global_stat_names:
229 (ln, _) = skip_while(
230 ln, len(self._lines),
231 lambda n: self._lines[n].split()[0] != prefix)
232 words = self._lines[ln].split()
233 self._global_stats[prefix + '_virtual'] = int(words[-2])
234 self._global_stats[prefix + '_committed'] = int(words[-1])
235
236 def _parse_meta_information(self):
237 """Parses lines in self._lines for meta information."""
238 (ln, found) = skip_while(
239 0, len(self._lines),
240 lambda n: self._lines[n] != 'META:\n')
241 if not found:
242 return
243 ln += 1
244
245 while True:
246 if self._lines[ln].startswith('Time:'):
247 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
248 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
249 if matched_format:
250 self._time = time.mktime(datetime.datetime.strptime(
251 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
252 if matched_format.group(2):
253 self._time += float(matched_format.group(2)[1:]) / 1000.0
254 elif matched_seconds:
255 self._time = float(matched_seconds.group(1))
256 elif self._lines[ln].startswith('Reason:'):
257 pass # Nothing to do for 'Reason:'
258 elif self._lines[ln].startswith('PageSize: '):
259 self._pagesize = int(self._lines[ln][10:])
260 elif self._lines[ln].startswith('CommandLine:'):
261 pass
262 elif (self._lines[ln].startswith('PageFrame: ') or
263 self._lines[ln].startswith('PFN: ')):
264 if self._lines[ln].startswith('PageFrame: '):
265 words = self._lines[ln][11:].split(',')
266 else:
267 words = self._lines[ln][5:].split(',')
268 for word in words:
269 if word == '24':
270 self._pageframe_length = 24
271 elif word == 'Base64':
272 self._pageframe_encoding = 'base64'
273 elif word == 'PageCount':
274 self._has_pagecount = True
275 elif self._lines[ln].startswith('RunID: '):
276 self._run_id = self._lines[ln][7:].strip()
277 elif (self._lines[ln].startswith('MMAP_LIST:') or
278 self._lines[ln].startswith('GLOBAL_STATS:')):
279 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
280 break
281 else:
282 pass
283 ln += 1
284
285 def _parse_mmap_list(self):
286 """Parses lines in self._lines as a mmap list."""
287 (ln, found) = skip_while(
288 0, len(self._lines),
289 lambda n: self._lines[n] != 'MMAP_LIST:\n')
290 if not found:
291 return {}
292
293 ln += 1
294 self._map = {}
295 current_vma = {}
296 pageframe_list = []
297 while True:
298 entry = proc_maps.ProcMaps.parse_line(self._lines[ln])
299 if entry:
300 current_vma = {}
301 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
302 for key, value in entry.as_dict().iteritems():
303 attr[key] = value
304 current_vma[key] = value
305 ln += 1
306 continue
307
308 if self._lines[ln].startswith(' PF: '):
309 for pageframe in self._lines[ln][5:].split():
310 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
311 ln += 1
312 continue
313
314 matched = self._HOOK_PATTERN.match(self._lines[ln])
315 if not matched:
316 break
317 # 2: starting address
318 # 5: end address
319 # 7: hooked or unhooked
320 # 8: additional information
321 if matched.group(7) == 'hooked':
322 submatched = self._HOOKED_PATTERN.match(matched.group(8))
323 if not submatched:
324 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
325 elif matched.group(7) == 'unhooked':
326 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
327 if not submatched:
328 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
329 else:
330 assert matched.group(7) in ['hooked', 'unhooked']
331
332 submatched_dict = submatched.groupdict()
333 region_info = { 'vma': current_vma }
334 if submatched_dict.get('TYPE'):
335 region_info['type'] = submatched_dict['TYPE'].strip()
336 if submatched_dict.get('COMMITTED'):
337 region_info['committed'] = int(submatched_dict['COMMITTED'])
338 if submatched_dict.get('RESERVED'):
339 region_info['reserved'] = int(submatched_dict['RESERVED'])
340 if submatched_dict.get('BUCKETID'):
341 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
342
343 if matched.group(1) == '(':
344 start = current_vma['begin']
345 else:
346 start = int(matched.group(2), 16)
347 if matched.group(4) == '(':
348 end = current_vma['end']
349 else:
350 end = int(matched.group(5), 16)
351
352 if pageframe_list and pageframe_list[0].start_truncated:
353 pageframe_list[0].set_size(
354 pageframe_list[0].size - start % self._pagesize)
355 if pageframe_list and pageframe_list[-1].end_truncated:
356 pageframe_list[-1].set_size(
357 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
358 region_info['pageframe'] = pageframe_list
359 pageframe_list = []
360
361 self._map[(start, end)] = (matched.group(7), region_info)
362 ln += 1
363
364 def _extract_stacktrace_lines(self, line_number):
365 """Extracts the position of stacktrace lines.
366
367 Valid stacktrace lines are stored into self._stacktrace_lines.
368
369 Args:
370 line_number: A line number to start parsing in lines.
371
372 Raises:
373 ParsingException for invalid dump versions.
374 """
375 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
376 (line_number, _) = skip_while(
377 line_number, len(self._lines),
378 lambda n: not self._lines[n].split()[0].isdigit())
379 stacktrace_start = line_number
380 (line_number, _) = skip_while(
381 line_number, len(self._lines),
382 lambda n: self._check_stacktrace_line(self._lines[n]))
383 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
384
385 elif self._version in DUMP_DEEP_OBSOLETE:
386 raise ObsoleteDumpVersionException(self._version)
387
388 else:
389 raise InvalidDumpException('Invalid version: %s' % self._version)
390
391 @staticmethod
392 def _check_stacktrace_line(stacktrace_line):
393 """Checks if a given stacktrace_line is valid as stacktrace.
394
395 Args:
396 stacktrace_line: A string to be checked.
397
398 Returns:
399 True if the given stacktrace_line is valid.
400 """
401 words = stacktrace_line.split()
402 if len(words) < BUCKET_ID + 1:
403 return False
404 if words[BUCKET_ID - 1] != '@':
405 return False
406 return True
407
408
409 class DumpList(object):
410 """Represents a sequence of heap profile dumps."""
411
412 def __init__(self, dump_list):
413 self._dump_list = dump_list
414
415 @staticmethod
416 def load(path_list):
417 LOGGER.info('Loading heap dump profiles.')
418 dump_list = []
419 for path in path_list:
420 dump_list.append(Dump.load(path, ' '))
421 return DumpList(dump_list)
422
423 def __len__(self):
424 return len(self._dump_list)
425
426 def __iter__(self):
427 for dump in self._dump_list:
428 yield dump
429
430 def __getitem__(self, index):
431 return self._dump_list[index]
432
433
434 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
435 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
436 _DUMMY_ENTRY = proc_maps.ProcMapsEntry(
437 0, # begin
438 0, # end
439 '-', # readable
440 '-', # writable
441 '-', # executable
442 '-', # private
443 0, # offset
444 '00', # major
445 '00', # minor
446 0, # inode
447 '' # name
448 )
449
450 def __init__(self):
451 super(ProcMapsEntryAttribute, self).__init__()
452 self._entry = self._DUMMY_ENTRY.as_dict()
453
454 def __str__(self):
455 return str(self._entry)
456
457 def __repr__(self):
458 return 'ProcMapsEntryAttribute' + str(self._entry)
459
460 def __getitem__(self, key):
461 return self._entry[key]
462
463 def __setitem__(self, key, value):
464 if key not in self._entry:
465 raise KeyError(key)
466 self._entry[key] = value
467
468 def copy(self):
469 new_entry = ProcMapsEntryAttribute()
470 for key, value in self._entry.iteritems():
471 new_entry[key] = copy.deepcopy(value)
472 return new_entry
473
474
475 def skip_while(index, max_index, skipping_condition):
476 """Increments |index| until |skipping_condition|(|index|) is False.
477
478 Returns:
479 A pair of an integer indicating a line number after skipped, and a
480 boolean value which is True if found a line which skipping_condition
481 is False for.
482 """
483 while skipping_condition(index):
484 index += 1
485 if index >= max_index:
486 return index, False
487 return index, True
OLDNEW
« no previous file with comments | « tools/deep_memory_profiler/lib/bucket.py ('k') | tools/deep_memory_profiler/lib/exceptions.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698