tools/deep_memory_profiler/dmprof - Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler.

Side by Side Diff: tools/deep_memory_profiler/dmprof

Issue 11417048: Retry: Add a first test for tools/deep_memory_profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env bash

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """The deep heap profiler script for Chrome."""	6 # Re-direct the arguments to dmprof.py.

7	7

8 from datetime import datetime	8 BASEDIR=`dirname $0`

9 import json	9 ARGV="$@"

10 import logging

11 import optparse

12 import os

13 import re

14 import shutil

15 import subprocess

16 import sys

17 import tempfile

18	10

19 BASE_PATH = os.path.dirname(os.path.abspath(__file__))	11 PYTHONPATH=$BASEDIR/../python/google python \

20 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(	12 "$BASEDIR/dmprof.py" $ARGV

21 BASE_PATH, os.pardir, 'find_runtime_symbols')

22 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)

23

24 from find_runtime_symbols import find_runtime_symbols_list

25 from find_runtime_symbols import find_runtime_typeinfo_symbols_list

26 from find_runtime_symbols import RuntimeSymbolsInProcess

27 from prepare_symbol_info import prepare_symbol_info

28

29 BUCKET_ID = 5

30 VIRTUAL = 0

31 COMMITTED = 1

32 ALLOC_COUNT = 2

33 FREE_COUNT = 3

34 NULL_REGEX = re.compile('')

35

36 LOGGER = logging.getLogger('dmprof')

37 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')

38 FUNCTION_ADDRESS = 'function'

39 TYPEINFO_ADDRESS = 'typeinfo'

40

41

42 # Heap Profile Dump versions

43

44 # DUMP_DEEP_[1-4] are obsolete.

45 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.

46 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.

47 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".

48 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.

49 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.

50 DUMP_DEEP_1 = 'DUMP_DEEP_1'

51 DUMP_DEEP_2 = 'DUMP_DEEP_2'

52 DUMP_DEEP_3 = 'DUMP_DEEP_3'

53 DUMP_DEEP_4 = 'DUMP_DEEP_4'

54

55 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)

56

57 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.

58 # malloc and mmap are identified in bucket files.

59 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.

60 DUMP_DEEP_5 = 'DUMP_DEEP_5'

61

62

63 # Heap Profile Policy versions

64

65 # POLICY_DEEP_1 DOES NOT include allocation_type columns.

66 # mmap regions are distincted w/ mmap frames in the pattern column.

67 POLICY_DEEP_1 = 'POLICY_DEEP_1'

68

69 # POLICY_DEEP_2 DOES include allocation_type columns.

70 # mmap regions are distincted w/ the allocation_type column.

71 POLICY_DEEP_2 = 'POLICY_DEEP_2'

72

73 # POLICY_DEEP_3 is in JSON format.

74 POLICY_DEEP_3 = 'POLICY_DEEP_3'

75

76 # POLICY_DEEP_3 contains typeinfo.

77 POLICY_DEEP_4 = 'POLICY_DEEP_4'

78

79

80 class EmptyDumpException(Exception):

81 def __init__(self, value):

82 self.value = value

83 def __str__(self):

84 return repr(self.value)

85

86

87 class ParsingException(Exception):

88 def __init__(self, value):

89 self.value = value

90 def __str__(self):

91 return repr(self.value)

92

93

94 class InvalidDumpException(ParsingException):

95 def __init__(self, value):

96 self.value = value

97 def __str__(self):

98 return "invalid heap profile dump: %s" % repr(self.value)

99

100

101 class ObsoleteDumpVersionException(ParsingException):

102 def __init__(self, value):

103 self.value = value

104 def __str__(self):

105 return "obsolete heap profile dump version: %s" % repr(self.value)

106

107

108 def skip_while(index, max_index, skipping_condition):

109 """Increments \|index\| until \|skipping_condition\|(\|index\|) is False.

110

111 Returns:

112 A pair of an integer indicating a line number after skipped, and a

113 boolean value which is True if found a line which skipping_condition

114 is False for.

115 """

116 while skipping_condition(index):

117 index += 1

118 if index >= max_index:

119 return index, False

120 return index, True

121

122

123 class SymbolMapping(object):

124 """Manages all symbol information on process memory mapping.

125

126 The symbol information consists of all symbols in the binary files obtained

127 by find_runtime_symbols/prepare_symbol_info.py which uses /proc/<pid>/maps,

128 nm and so on. It is minimum requisite information to run dmprof.

129

130 The information is prepared in a directory "\|prefix\|.symmap" by prepare().

131 The directory is more portable than Chromium binaries. Users can save it

132 and re-analyze with the portable one.

133

134 Note that loading the symbol information takes a long time. It is very big

135 in general -- it doesn't know which functions are called and which types are

136 used actually. Used symbols can be cached in the "SymbolCache" class.

137 """

138 def __init__(self, prefix):

139 self._prefix = prefix

140 self._prepared_symbol_mapping_path = None

141 self._loaded_symbol_mapping = None

142

143 def prepare(self):

144 """Extracts symbol mapping from binaries and prepares it to use.

145

146 The symbol mapping is stored in a directory whose name is stored in

147 \|self._prepared_symbol_mapping_path\|.

148

149 Returns:

150 True if succeeded.

151 """

152 LOGGER.info('Preparing symbol mapping...')

153 self._prepared_symbol_mapping_path, used_tempdir = prepare_symbol_info(

154 self._prefix + '.maps', self._prefix + '.symmap', True)

155 if self._prepared_symbol_mapping_path:

156 LOGGER.info(' Prepared symbol mapping.')

157 if used_tempdir:

158 LOGGER.warn(' Using a temporary directory for symbol mapping.')

159 LOGGER.warn(' Delete it by yourself.')

160 LOGGER.warn(' Or, move the directory by yourself to use it later.')

161 return True

162 else:

163 LOGGER.warn(' Failed to prepare symbol mapping.')

164 return False

165

166 def get(self):

167 """Returns symbol mapping.

168

169 Returns:

170 Loaded symbol mapping. None if failed.

171 """

172 if not self._prepared_symbol_mapping_path and not self.prepare():

173 return None

174 if not self._loaded_symbol_mapping:

175 LOGGER.info('Loading symbol mapping...')

176 self._loaded_symbol_mapping = RuntimeSymbolsInProcess.load(

177 self._prepared_symbol_mapping_path)

178 return self._loaded_symbol_mapping

179

180

181 class SymbolCache(object):

182 """Manages cache of used symbol mapping.

183

184 The original symbol mapping is by "SymbolMapping" (maps, nm and readelf for

185 examples), and "SymbolCache" just caches "how dmprof interprets the address"

186 to speed-up another analysis for the same binary and profile dumps.

187 Handling all symbol mapping takes a long time in "SymbolMapping".

188 "SymbolCache" caches used symbol mapping on memory and in files.

189 """

190 def __init__(self, prefix):

191 self._prefix = prefix

192 self._symbol_cache_paths = {

193 FUNCTION_ADDRESS: prefix + '.funcsym',

194 TYPEINFO_ADDRESS: prefix + '.typesym',

195 }

196 self._find_runtime_symbols_functions = {

197 FUNCTION_ADDRESS: find_runtime_symbols_list,

198 TYPEINFO_ADDRESS: find_runtime_typeinfo_symbols_list,

199 }

200 self._symbol_caches = {

201 FUNCTION_ADDRESS: {},

202 TYPEINFO_ADDRESS: {},

203 }

204

205 def update(self, address_type, bucket_set, symbol_mapping):

206 """Updates symbol mapping on memory and in a ".*sym" cache file.

207

208 It reads cached symbol mapping from a ".*sym" file if it exists. Then,

209 it looks up unresolved addresses from a given "SymbolMapping". Finally,

210 both symbol mappings on memory and in the ".*sym" cache file are updated.

211

212 Symbol files are formatted as follows:

213 <Address> <Symbol>

214 <Address> <Symbol>

215 <Address> <Symbol>

216 ...

217

218 Args:

219 address_type: A type of addresses to update. It should be one of

220 FUNCTION_ADDRESS or TYPEINFO_ADDRESS.

221 bucket_set: A BucketSet object.

222 symbol_mapping: A SymbolMapping object.

223 """

224 self._load(address_type)

225

226 unresolved_addresses = sorted(

227 address for address in bucket_set.iter_addresses(address_type)

228 if address not in self._symbol_caches[address_type])

229

230 if not unresolved_addresses:

231 LOGGER.info('No need to resolve any more addresses.')

232 return

233

234 symbol_cache_path = self._symbol_cache_paths[address_type]

235 with open(symbol_cache_path, mode='a+') as symbol_f:

236 LOGGER.info('Loading %d unresolved addresses.' %

237 len(unresolved_addresses))

238 symbol_list = self._find_runtime_symbols_functions[address_type](

239 symbol_mapping.get(), unresolved_addresses)

240

241 for address, symbol in zip(unresolved_addresses, symbol_list):

242 stripped_symbol = symbol.strip() or '??'

243 self._symbol_caches[address_type][address] = stripped_symbol

244 symbol_f.write('%x %s\n' % (address, stripped_symbol))

245

246 def lookup(self, address_type, address):

247 """Looks up a symbol for a given \|address\|.

248

249 Args:

250 address_type: A type of addresses to lookup. It should be one of

251 FUNCTION_ADDRESS or TYPEINFO_ADDRESS.

252 address: An integer that represents an address.

253

254 Returns:

255 A string that represents a symbol.

256 """

257 return self._symbol_caches[address_type].get(address)

258

259 def _load(self, address_type):

260 symbol_cache_path = self._symbol_cache_paths[address_type]

261 try:

262 with open(symbol_cache_path, mode='r') as symbol_f:

263 for line in symbol_f:

264 items = line.rstrip().split(None, 1)

265 if len(items) == 1:

266 items.append('??')

267 self._symbol_caches[address_type][int(items[0], 16)] = items[1]

268 LOGGER.info('Loaded %d entries from symbol cache.' %

269 len(self._symbol_caches[address_type]))

270 except IOError as e:

271 LOGGER.info('No valid symbol cache file is found.')

272

273

274 class Rule(object):

275 """Represents one matching rule in a policy file."""

276

277 def __init__(self, name, mmap, stacktrace_pattern, typeinfo_pattern=None):

278 self._name = name

279 self._mmap = mmap

280 self._stacktrace_pattern = re.compile(stacktrace_pattern + r'\Z')

281 if typeinfo_pattern:

282 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')

283 else:

284 self._typeinfo_pattern = None

285

286 @property

287 def name(self):

288 return self._name

289

290 @property

291 def mmap(self):

292 return self._mmap

293

294 @property

295 def stacktrace_pattern(self):

296 return self._stacktrace_pattern

297

298 @property

299 def typeinfo_pattern(self):

300 return self._typeinfo_pattern

301

302

303 class Policy(object):

304 """Represents a policy, a content of a policy file."""

305

306 def __init__(self, rules, version, components):

307 self._rules = rules

308 self._version = version

309 self._components = components

310

311 @property

312 def rules(self):

313 return self._rules

314

315 @property

316 def version(self):

317 return self._version

318

319 @property

320 def components(self):

321 return self._components

322

323 def find(self, bucket):

324 """Finds a matching component name which a given \|bucket\| belongs to.

325

326 Args:

327 bucket: A Bucket object to be searched for.

328

329 Returns:

330 A string representing a component name.

331 """

332 if not bucket:

333 return 'no-bucket'

334 if bucket.component_cache:

335 return bucket.component_cache

336

337 stacktrace = bucket.symbolized_joined_stacktrace

338 typeinfo = bucket.symbolized_typeinfo

339 if typeinfo.startswith('0x'):

340 typeinfo = bucket.typeinfo_name

341

342 for rule in self._rules:

343 if (bucket.mmap == rule.mmap and

344 rule.stacktrace_pattern.match(stacktrace) and

345 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):

346 bucket.component_cache = rule.name

347 return rule.name

348

349 assert False

350

351 @staticmethod

352 def load(filename, format):

353 """Loads a policy file of \|filename\| in a \|format\|.

354

355 Args:

356 filename: A filename to be loaded.

357 format: A string to specify a format of the file. Only 'json' is

358 supported for now.

359

360 Returns:

361 A loaded Policy object.

362 """

363 with open(os.path.join(BASE_PATH, filename)) as policy_f:

364 return Policy.parse(policy_f, format)

365

366 @staticmethod

367 def parse(policy_f, format):

368 """Parses a policy file content in a \|format\|.

369

370 Args:

371 policy_f: An IO object to be loaded.

372 format: A string to specify a format of the file. Only 'json' is

373 supported for now.

374

375 Returns:

376 A loaded Policy object.

377 """

378 if format == 'json':

379 return Policy._parse_json(policy_f)

380 else:

381 return None

382

383 @staticmethod

384 def _parse_json(policy_f):

385 """Parses policy file in json format.

386

387 A policy file contains component's names and their stacktrace pattern

388 written in regular expression. Those patterns are matched against each

389 symbols of each stacktraces in the order written in the policy file

390

391 Args:

392 policy_f: A File/IO object to read.

393

394 Returns:

395 A loaded policy object.

396 """

397 policy = json.load(policy_f)

398

399 rules = []

400 for rule in policy['rules']:

401 rules.append(Rule(

402 rule['name'],

403 rule['allocator'] == 'mmap',

404 rule['stacktrace'],

405 rule['typeinfo'] if 'typeinfo' in rule else None))

406 return Policy(rules, policy['version'], policy['components'])

407

408

409 class PolicySet(object):

410 """Represents a set of policies."""

411

412 def __init__(self, policy_directory):

413 self._policy_directory = policy_directory

414

415 @staticmethod

416 def load(labels=None):

417 """Loads a set of policies via the "default policy directory".

418

419 The "default policy directory" contains pairs of policies and their labels.

420 For example, a policy "policy.l0.json" is labeled "l0" in the default

421 policy directory "policies.json".

422

423 All policies in the directory are loaded by default. Policies can be

424 limited by \|labels\|.

425

426 Args:

427 labels: An array that contains policy labels to be loaded.

428

429 Returns:

430 A PolicySet object.

431 """

432 default_policy_directory = PolicySet._load_default_policy_directory()

433 if labels:

434 specified_policy_directory = {}

435 for label in labels:

436 if label in default_policy_directory:

437 specified_policy_directory[label] = default_policy_directory[label]

438 # TODO(dmikurube): Load an un-labeled policy file.

439 return PolicySet._load_policies(specified_policy_directory)

440 else:

441 return PolicySet._load_policies(default_policy_directory)

442

443 def __len__(self):

444 return len(self._policy_directory)

445

446 def __iter__(self):

447 for label in self._policy_directory:

448 yield label

449

450 def __getitem__(self, label):

451 return self._policy_directory[label]

452

453 @staticmethod

454 def _load_default_policy_directory():

455 with open(POLICIES_JSON_PATH, mode='r') as policies_f:

456 default_policy_directory = json.load(policies_f)

457 return default_policy_directory

458

459 @staticmethod

460 def _load_policies(directory):

461 LOGGER.info('Loading policy files.')

462 policies = {}

463 for label in directory:

464 LOGGER.info(' %s: %s' % (label, directory[label]['file']))

465 loaded = Policy.load(directory[label]['file'], directory[label]['format'])

466 if loaded:

467 policies[label] = loaded

468 return PolicySet(policies)

469

470

471 class Bucket(object):

472 """Represents a bucket, which is a unit of memory block classification."""

473

474 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name):

475 self._stacktrace = stacktrace

476 self._mmap = mmap

477 self._typeinfo = typeinfo

478 self._typeinfo_name = typeinfo_name

479

480 self._symbolized_stacktrace = stacktrace

481 self._symbolized_joined_stacktrace = ''

482 self._symbolized_typeinfo = typeinfo_name

483

484 self.component_cache = ''

485

486 def symbolize(self, symbol_cache):

487 """Makes a symbolized stacktrace and typeinfo with \|symbol_cache\|.

488

489 Args:

490 symbol_cache: A SymbolCache object.

491 """

492 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.

493 self._symbolized_stacktrace = [

494 symbol_cache.lookup(FUNCTION_ADDRESS, address)

495 for address in self._stacktrace]

496 self._symbolized_joined_stacktrace = ' '.join(self._symbolized_stacktrace)

497 if not self._typeinfo:

498 self._symbolized_typeinfo = 'no typeinfo'

499 else:

500 self._symbolized_typeinfo = symbol_cache.lookup(

501 TYPEINFO_ADDRESS, self._typeinfo)

502 if not self._symbolized_typeinfo:

503 self._symbolized_typeinfo = 'no typeinfo'

504

505 def clear_component_cache(self):

506 self.component_cache = ''

507

508 @property

509 def stacktrace(self):

510 return self._stacktrace

511

512 @property

513 def mmap(self):

514 return self._mmap

515

516 @property

517 def typeinfo(self):

518 return self._typeinfo

519

520 @property

521 def typeinfo_name(self):

522 return self._typeinfo_name

523

524 @property

525 def symbolized_stacktrace(self):

526 return self._symbolized_stacktrace

527

528 @property

529 def symbolized_joined_stacktrace(self):

530 return self._symbolized_joined_stacktrace

531

532 @property

533 def symbolized_typeinfo(self):

534 return self._symbolized_typeinfo

535

536

537 class BucketSet(object):

538 """Represents a set of bucket."""

539 def __init__(self):

540 self._buckets = {}

541 self._addresses = {

542 FUNCTION_ADDRESS: set(),

543 TYPEINFO_ADDRESS: set(),

544 }

545

546 @staticmethod

547 def load(prefix):

548 """Loads all related bucket files.

549

550 Args:

551 prefix: A prefix string for bucket file names.

552

553 Returns:

554 A loaded BucketSet object.

555 """

556 LOGGER.info('Loading bucket files.')

557 bucket_set = BucketSet()

558

559 n = 0

560 while True:

561 path = '%s.%04d.buckets' % (prefix, n)

562 if not os.path.exists(path):

563 if n > 10:

564 break

565 n += 1

566 continue

567 LOGGER.info(' %s' % path)

568 with open(path, 'r') as f:

569 bucket_set._load_file(f)

570 n += 1

571

572 return bucket_set

573

574 def _load_file(self, bucket_f):

575 for line in bucket_f:

576 words = line.split()

577 typeinfo = None

578 typeinfo_name = ''

579 stacktrace_begin = 2

580 for index, word in enumerate(words):

581 if index < 2:

582 continue

583 if word[0] == 't':

584 typeinfo = int(word[1:], 16)

585 self._addresses[TYPEINFO_ADDRESS].add(typeinfo)

586 elif word[0] == 'n':

587 typeinfo_name = word[1:]

588 else:

589 stacktrace_begin = index

590 break

591 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]

592 for frame in stacktrace:

593 self._addresses[FUNCTION_ADDRESS].add(frame)

594 self._buckets[int(words[0])] = Bucket(

595 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name)

596

597 def __iter__(self):

598 for bucket_id, bucket_content in self._buckets.iteritems():

599 yield bucket_id, bucket_content

600

601 def __getitem__(self, bucket_id):

602 return self._buckets[bucket_id]

603

604 def get(self, bucket_id):

605 return self._buckets.get(bucket_id)

606

607 def symbolize(self, symbol_cache):

608 for bucket_content in self._buckets.itervalues():

609 bucket_content.symbolize(symbol_cache)

610

611 def clear_component_cache(self):

612 for bucket_content in self._buckets.itervalues():

613 bucket_content.clear_component_cache()

614

615 def iter_addresses(self, address_type):

616 for function in self._addresses[address_type]:

617 yield function

618

619

620 class Dump(object):

621 """Represents a heap profile dump."""

622

623 def __init__(self):

624 self._path = ''

625 self._time = None

626 self._stacktrace_lines = []

627 self._global_stats = {} # used only in apply_policy

628

629 self._version = ''

630 self._lines = []

631

632 @property

633 def path(self):

634 return self._path

635

636 @property

637 def time(self):

638 return self._time

639

640 @property

641 def iter_stacktrace(self):

642 for line in self._stacktrace_lines:

643 yield line

644

645 def global_stat(self, name):

646 return self._global_stats[name]

647

648 @staticmethod

649 def load(path, log_header='Loading a heap profile dump: '):

650 """Loads a heap profile dump.

651

652 Args:

653 path: A file path string to load.

654 log_header: A preceding string for log messages.

655

656 Returns:

657 A loaded Dump object.

658

659 Raises:

660 ParsingException for invalid heap profile dumps.

661 """

662 dump = Dump()

663 dump._path = path

664 dump._time = os.stat(dump._path).st_mtime

665 dump._version = ''

666

667 dump._lines = [line for line in open(dump._path, 'r')

668 if line and not line.startswith('#')]

669

670 try:

671 dump._version, ln = dump._parse_version()

672 dump._parse_global_stats()

673 dump._extract_stacktrace_lines(ln)

674 except EmptyDumpException:

675 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, path))

676 except ParsingException, e:

677 LOGGER.error('%s%s ...error %s' % (log_header, path, e))

678 raise

679 else:

680 LOGGER.info('%s%s (version: %s)' % (log_header, path, dump._version))

681

682 return dump

683

684 def _parse_version(self):

685 """Parses a version string in self._lines.

686

687 Returns:

688 A pair of (a string representing a version of the stacktrace dump,

689 and an integer indicating a line number next to the version string).

690

691 Raises:

692 ParsingException for invalid dump versions.

693 """

694 version = ''

695

696 # Skip until an identifiable line.

697 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')

698 if not self._lines:

699 raise EmptyDumpException('Empty heap dump file.')

700 (ln, found) = skip_while(

701 0, len(self._lines),

702 lambda n: not self._lines[n].startswith(headers))

703 if not found:

704 raise InvalidDumpException('No version header.')

705

706 # Identify a version.

707 if self._lines[ln].startswith('heap profile: '):

708 version = self._lines[ln][13:].strip()

709 if version == DUMP_DEEP_5:

710 (ln, _) = skip_while(

711 ln, len(self._lines),

712 lambda n: self._lines[n] != 'STACKTRACES:\n')

713 elif version in DUMP_DEEP_OBSOLETE:

714 raise ObsoleteDumpVersionException(version)

715 else:

716 raise InvalidDumpException('Invalid version: %s' % version)

717 elif self._lines[ln] == 'STACKTRACES:\n':

718 raise ObsoleteDumpVersionException(DUMP_DEEP_1)

719 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':

720 raise ObsoleteDumpVersionException(DUMP_DEEP_2)

721

722 return (version, ln)

723

724 def _parse_global_stats(self):

725 """Parses lines in self._lines as global stats."""

726 (ln, _) = skip_while(

727 0, len(self._lines),

728 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')

729

730 global_stat_names = [

731 'total', 'file-exec', 'file-nonexec', 'anonymous', 'stack', 'other',

732 'nonprofiled-absent', 'nonprofiled-anonymous',

733 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',

734 'nonprofiled-stack', 'nonprofiled-other',

735 'profiled-mmap', 'profiled-malloc']

736

737 for prefix in global_stat_names:

738 (ln, _) = skip_while(

739 ln, len(self._lines),

740 lambda n: self._lines[n].split()[0] != prefix)

741 words = self._lines[ln].split()

742 self._global_stats[prefix + '_virtual'] = int(words[-2])

743 self._global_stats[prefix + '_committed'] = int(words[-1])

744

745 def _extract_stacktrace_lines(self, line_number):

746 """Extracts the position of stacktrace lines.

747

748 Valid stacktrace lines are stored into self._stacktrace_lines.

749

750 Args:

751 line_number: A line number to start parsing in lines.

752

753 Raises:

754 ParsingException for invalid dump versions.

755 """

756 if self._version == DUMP_DEEP_5:

757 (line_number, _) = skip_while(

758 line_number, len(self._lines),

759 lambda n: not self._lines[n].split()[0].isdigit())

760 stacktrace_start = line_number

761 (line_number, _) = skip_while(

762 line_number, len(self._lines),

763 lambda n: self._check_stacktrace_line(self._lines[n]))

764 self._stacktrace_lines = self._lines[stacktrace_start:line_number]

765

766 elif self._version in DUMP_DEEP_OBSOLETE:

767 raise ObsoleteDumpVersionException(self._version)

768

769 else:

770 raise InvalidDumpException('Invalid version: %s' % self._version)

771

772 @staticmethod

773 def _check_stacktrace_line(stacktrace_line):

774 """Checks if a given stacktrace_line is valid as stacktrace.

775

776 Args:

777 stacktrace_line: A string to be checked.

778

779 Returns:

780 True if the given stacktrace_line is valid.

781 """

782 words = stacktrace_line.split()

783 if len(words) < BUCKET_ID + 1:

784 return False

785 if words[BUCKET_ID - 1] != '@':

786 return False

787 return True

788

789

790 class DumpList(object):

791 """Represents a sequence of heap profile dumps."""

792

793 def __init__(self, dump_list):

794 self._dump_list = dump_list

795

796 @staticmethod

797 def load(path_list):

798 LOGGER.info('Loading heap dump profiles.')

799 dump_list = []

800 for path in path_list:

801 dump_list.append(Dump.load(path, ' '))

802 return DumpList(dump_list)

803

804 def __len__(self):

805 return len(self._dump_list)

806

807 def __iter__(self):

808 for dump in self._dump_list:

809 yield dump

810

811 def __getitem__(self, index):

812 return self._dump_list[index]

813

814

815 class Command(object):

816 """Subclasses are a subcommand for this executable.

817

818 See COMMANDS in main().

819 """

820 def __init__(self, usage):

821 self._parser = optparse.OptionParser(usage)

822

823 @staticmethod

824 def load_basic_files(dump_path, multiple):

825 prefix = Command._find_prefix(dump_path)

826 symbol_mapping = SymbolMapping(prefix)

827 symbol_mapping.prepare()

828 bucket_set = BucketSet.load(prefix)

829 if multiple:

830 dump_list = DumpList.load(Command._find_all_dumps(dump_path))

831 else:

832 dump = Dump.load(dump_path)

833 symbol_cache = SymbolCache(prefix)

834 symbol_cache.update(FUNCTION_ADDRESS, bucket_set, symbol_mapping)

835 symbol_cache.update(TYPEINFO_ADDRESS, bucket_set, symbol_mapping)

836 bucket_set.symbolize(symbol_cache)

837 if multiple:

838 return (bucket_set, dump_list)

839 else:

840 return (bucket_set, dump)

841

842 @staticmethod

843 def _find_prefix(path):

844 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)

845

846 @staticmethod

847 def _find_all_dumps(dump_path):

848 prefix = Command._find_prefix(dump_path)

849 dump_path_list = [dump_path]

850

851 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])

852 n += 1

853 while True:

854 p = '%s.%04d.heap' % (prefix, n)

855 if os.path.exists(p):

856 dump_path_list.append(p)

857 else:

858 break

859 n += 1

860

861 return dump_path_list

862

863 def _parse_args(self, sys_argv, required):

864 options, args = self._parser.parse_args(sys_argv)

865 if len(args) != required + 1:

866 self._parser.error('needs %d argument(s).\n' % required)

867 return None

868 return (options, args)

869

870 def _parse_policy_list(self, options_policy):

871 if options_policy:

872 return options_policy.split(',')

873 else:

874 return None

875

876

877 class StacktraceCommand(Command):

878 def __init__(self):

879 super(StacktraceCommand, self).__init__(

880 'Usage: %prog stacktrace <dump>')

881

882 def do(self, sys_argv):

883 options, args = self._parse_args(sys_argv, 1)

884 dump_path = args[1]

885 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

886

887 StacktraceCommand._output(dump, bucket_set, sys.stdout)

888 return 0

889

890 @staticmethod

891 def _output(dump, bucket_set, out):

892 """Outputs a given stacktrace.

893

894 Args:

895 bucket_set: A BucketSet object.

896 out: A file object to output.

897 """

898 for line in dump.iter_stacktrace:

899 words = line.split()

900 bucket = bucket_set.get(int(words[BUCKET_ID]))

901 if not bucket:

902 continue

903 for i in range(0, BUCKET_ID - 1):

904 out.write(words[i] + ' ')

905 for frame in bucket.symbolized_stacktrace:

906 out.write(frame + ' ')

907 out.write('\n')

908

909

910 class PolicyCommands(Command):

911 def __init__(self, command):

912 super(PolicyCommands, self).__init__(

913 'Usage: %%prog %s [-p POLICY] <first-dump>' % command)

914 self._parser.add_option('-p', '--policy', type='string', dest='policy',

915 help='profile with POLICY', metavar='POLICY')

916

917 def _set_up(self, sys_argv):

918 options, args = self._parse_args(sys_argv, 1)

919 dump_path = args[1]

920 (bucket_set, dumps) = Command.load_basic_files(dump_path, True)

921

922 policy_set = PolicySet.load(self._parse_policy_list(options.policy))

923 return policy_set, dumps, bucket_set

924

925 def _apply_policy(self, dump, policy, bucket_set, first_dump_time):

926 """Aggregates the total memory size of each component.

927

928 Iterate through all stacktraces and attribute them to one of the components

929 based on the policy. It is important to apply policy in right order.

930

931 Args:

932 dump: A Dump object.

933 policy: A Policy object.

934 bucket_set: A BucketSet object.

935 first_dump_time: An integer representing time when the first dump is

936 dumped.

937

938 Returns:

939 A dict mapping components and their corresponding sizes.

940 """

941 LOGGER.info(' %s' % dump.path)

942 sizes = dict((c, 0) for c in policy.components)

943

944 PolicyCommands._accumulate(dump, policy, bucket_set, sizes)

945

946 sizes['mmap-no-log'] = (

947 dump.global_stat('profiled-mmap_committed') -

948 sizes['mmap-total-log'])

949 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')

950 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')

951

952 sizes['tc-no-log'] = (

953 dump.global_stat('profiled-malloc_committed') -

954 sizes['tc-total-log'])

955 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')

956 sizes['tc-unused'] = (

957 sizes['mmap-tcmalloc'] -

958 dump.global_stat('profiled-malloc_committed'))

959 sizes['tc-total'] = sizes['mmap-tcmalloc']

960

961 for key, value in {

962 'total': 'total_committed',

963 'filemapped': 'file_committed',

964 'file-exec': 'file-exec_committed',

965 'file-nonexec': 'file-nonexec_committed',

966 'anonymous': 'anonymous_committed',

967 'stack': 'stack_committed',

968 'other': 'other_committed',

969 'unhooked-absent': 'nonprofiled-absent_committed',

970 'unhooked-anonymous': 'nonprofiled-anonymous_committed',

971 'unhooked-file-exec': 'nonprofiled-file-exec_committed',

972 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',

973 'unhooked-stack': 'nonprofiled-stack_committed',

974 'unhooked-other': 'nonprofiled-other_committed',

975 'total-vm': 'total_virtual',

976 'filemapped-vm': 'file_virtual',

977 'anonymous-vm': 'anonymous_virtual',

978 'other-vm': 'other_virtual' }.iteritems():

979 if key in sizes:

980 sizes[key] = dump.global_stat(value)

981

982 if 'mustbezero' in sizes:

983 removed_list = (

984 'profiled-mmap_committed',

985 'nonprofiled-absent_committed',

986 'nonprofiled-anonymous_committed',

987 'nonprofiled-file-exec_committed',

988 'nonprofiled-file-nonexec_committed',

989 'nonprofiled-stack_committed',

990 'nonprofiled-other_committed')

991 sizes['mustbezero'] = (

992 dump.global_stat('total_committed') -

993 sum(dump.global_stat(removed) for removed in removed_list))

994 if 'total-exclude-profiler' in sizes:

995 sizes['total-exclude-profiler'] = (

996 dump.global_stat('total_committed') -

997 (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))

998 if 'hour' in sizes:

999 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0

1000 if 'minute' in sizes:

1001 sizes['minute'] = (dump.time - first_dump_time) / 60.0

1002 if 'second' in sizes:

1003 sizes['second'] = dump.time - first_dump_time

1004

1005 return sizes

1006

1007 @staticmethod

1008 def _accumulate(dump, policy, bucket_set, sizes):

1009 for line in dump.iter_stacktrace:

1010 words = line.split()

1011 bucket = bucket_set.get(int(words[BUCKET_ID]))

1012 component_match = policy.find(bucket)

1013 sizes[component_match] += int(words[COMMITTED])

1014

1015 if component_match.startswith('tc-'):

1016 sizes['tc-total-log'] += int(words[COMMITTED])

1017 elif component_match.startswith('mmap-'):

1018 sizes['mmap-total-log'] += int(words[COMMITTED])

1019 else:

1020 sizes['other-total-log'] += int(words[COMMITTED])

1021

1022

1023 class CSVCommand(PolicyCommands):

1024 def __init__(self):

1025 super(CSVCommand, self).__init__('csv')

1026

1027 def do(self, sys_argv):

1028 policy_set, dumps, bucket_set = self._set_up(sys_argv)

1029 return self._output(policy_set, dumps, bucket_set, sys.stdout)

1030

1031 def _output(self, policy_set, dumps, bucket_set, out):

1032 max_components = 0

1033 for label in policy_set:

1034 max_components = max(max_components, len(policy_set[label].components))

1035

1036 for label in sorted(policy_set):

1037 components = policy_set[label].components

1038 if len(policy_set) > 1:

1039 out.write('%s%s\n' % (label, ',' * (max_components - 1)))

1040 out.write('%s%s\n' % (

1041 ','.join(components), ',' * (max_components - len(components))))

1042

1043 LOGGER.info('Applying a policy %s to...' % label)

1044 for dump in dumps:

1045 component_sizes = self._apply_policy(

1046 dump, policy_set[label], bucket_set, dumps[0].time)

1047 s = []

1048 for c in components:

1049 if c in ('hour', 'minute', 'second'):

1050 s.append('%05.5f' % (component_sizes[c]))

1051 else:

1052 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))

1053 out.write('%s%s\n' % (

1054 ','.join(s), ',' * (max_components - len(components))))

1055

1056 bucket_set.clear_component_cache()

1057

1058 return 0

1059

1060

1061 class JSONCommand(PolicyCommands):

1062 def __init__(self):

1063 super(JSONCommand, self).__init__('json')

1064

1065 def do(self, sys_argv):

1066 policy_set, dumps, bucket_set = self._set_up(sys_argv)

1067 return self._output(policy_set, dumps, bucket_set, sys.stdout)

1068

1069 def _output(self, policy_set, dumps, bucket_set, out):

1070 json_base = {

1071 'version': 'JSON_DEEP_2',

1072 'policies': {},

1073 }

1074

1075 for label in sorted(policy_set):

1076 json_base['policies'][label] = {

1077 'legends': policy_set[label].components,

1078 'snapshots': [],

1079 }

1080

1081 LOGGER.info('Applying a policy %s to...' % label)

1082 for dump in dumps:

1083 component_sizes = self._apply_policy(

1084 dump, policy_set[label], bucket_set, dumps[0].time)

1085 component_sizes['dump_path'] = dump.path

1086 component_sizes['dump_time'] = datetime.fromtimestamp(

1087 dump.time).strftime('%Y-%m-%d %H:%M:%S')

1088 json_base['policies'][label]['snapshots'].append(component_sizes)

1089

1090 bucket_set.clear_component_cache()

1091

1092 json.dump(json_base, out, indent=2, sort_keys=True)

1093

1094 return 0

1095

1096

1097 class ListCommand(PolicyCommands):

1098 def __init__(self):

1099 super(ListCommand, self).__init__('list')

1100

1101 def do(self, sys_argv):

1102 policy_set, dumps, bucket_set = self._set_up(sys_argv)

1103 return self._output(policy_set, dumps, bucket_set, sys.stdout)

1104

1105 def _output(self, policy_set, dumps, bucket_set, out):

1106 for label in sorted(policy_set):

1107 LOGGER.info('Applying a policy %s to...' % label)

1108 for dump in dumps:

1109 component_sizes = self._apply_policy(

1110 dump, policy_set[label], bucket_set, dump.time)

1111 out.write('%s for %s:\n' % (label, dump.path))

1112 for c in policy_set[label].components:

1113 if c in ['hour', 'minute', 'second']:

1114 out.write('%40s %12.3f\n' % (c, component_sizes[c]))

1115 else:

1116 out.write('%40s %12d\n' % (c, component_sizes[c]))

1117

1118 bucket_set.clear_component_cache()

1119

1120 return 0

1121

1122

1123 class ExpandCommand(Command):

1124 def __init__(self):

1125 super(ExpandCommand, self).__init__(

1126 'Usage: %prog expand <dump> <policy> <component> <depth>')

1127

1128 def do(self, sys_argv):

1129 options, args = self._parse_args(sys_argv, 4)

1130 dump_path = args[1]

1131 target_policy = args[2]

1132 component_name = args[3]

1133 depth = args[4]

1134 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

1135 policy_set = PolicySet.load(self._parse_policy_list(target_policy))

1136

1137 self._output(dump, policy_set[target_policy], bucket_set,

1138 component_name, int(depth), sys.stdout)

1139 return 0

1140

1141 def _output(self, dump, policy, bucket_set, component_name, depth, out):

1142 """Prints all stacktraces in a given component of given depth.

1143

1144 Args:

1145 dump: A Dump object.

1146 policy: A Policy object.

1147 bucket_set: A BucketSet object.

1148 component_name: A name of component for filtering.

1149 depth: An integer representing depth to be printed.

1150 out: An IO object to output.

1151 """

1152 sizes = {}

1153

1154 ExpandCommand._accumulate(

1155 dump, policy, bucket_set, component_name, depth, sizes)

1156

1157 sorted_sizes_list = sorted(

1158 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)

1159 total = 0

1160 for size_pair in sorted_sizes_list:

1161 out.write('%10d %s\n' % (size_pair[1], size_pair[0]))

1162 total += size_pair[1]

1163 LOGGER.info('total: %d\n' % total)

1164

1165 @staticmethod

1166 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):

1167 for line in dump.iter_stacktrace:

1168 words = line.split()

1169 bucket = bucket_set.get(int(words[BUCKET_ID]))

1170 component_match = policy.find(bucket)

1171 if component_match == component_name:

1172 stacktrace_sequence = ''

1173 if bucket.typeinfo:

1174 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo

1175 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name

1176 for stack in bucket.symbolized_stacktrace[

1177 0 : min(len(bucket.symbolized_stacktrace), 1 + depth)]:

1178 stacktrace_sequence += stack + ' '

1179 if not stacktrace_sequence in sizes:

1180 sizes[stacktrace_sequence] = 0

1181 sizes[stacktrace_sequence] += int(words[COMMITTED])

1182

1183

1184 class PProfCommand(Command):

1185 def __init__(self):

1186 super(PProfCommand, self).__init__(

1187 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')

1188 self._parser.add_option('-c', '--component', type='string',

1189 dest='component',

1190 help='restrict to COMPONENT', metavar='COMPONENT')

1191

1192 def do(self, sys_argv):

1193 options, args = self._parse_args(sys_argv, 2)

1194

1195 dump_path = args[1]

1196 target_policy = args[2]

1197 component = options.component

1198

1199 (bucket_set, dump) = Command.load_basic_files(dump_path, False)

1200 policy_set = PolicySet.load(self._parse_policy_list(target_policy))

1201

1202 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:

1203 maps_lines = maps_f.readlines()

1204 PProfCommand._output(

1205 dump, policy_set[target_policy], bucket_set, maps_lines, component,

1206 sys.stdout)

1207

1208 return 0

1209

1210 @staticmethod

1211 def _output(dump, policy, bucket_set, maps_lines, component_name, out):

1212 """Converts the heap profile dump so it can be processed by pprof.

1213

1214 Args:

1215 dump: A Dump object.

1216 policy: A Policy object.

1217 bucket_set: A BucketSet object.

1218 maps_lines: A list of strings containing /proc/.../maps.

1219 component_name: A name of component for filtering.

1220 out: An IO object to output.

1221 """

1222 out.write('heap profile: ')

1223 com_committed, com_allocs = PProfCommand._accumulate(

1224 dump, policy, bucket_set, component_name)

1225

1226 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (

1227 com_allocs, com_committed, com_allocs, com_committed))

1228

1229 PProfCommand._output_stacktrace_lines(

1230 dump, policy, bucket_set, component_name, out)

1231

1232 out.write('MAPPED_LIBRARIES:\n')

1233 for line in maps_lines:

1234 out.write(line)

1235

1236 @staticmethod

1237 def _accumulate(dump, policy, bucket_set, component_name):

1238 """Accumulates size of committed chunks and the number of allocated chunks.

1239

1240 Args:

1241 dump: A Dump object.

1242 policy: A Policy object.

1243 bucket_set: A BucketSet object.

1244 component_name: A name of component for filtering.

1245

1246 Returns:

1247 Two integers which are the accumulated size of committed regions and the

1248 number of allocated chunks, respectively.

1249 """

1250 com_committed = 0

1251 com_allocs = 0

1252 for line in dump.iter_stacktrace:

1253 words = line.split()

1254 bucket = bucket_set.get(int(words[BUCKET_ID]))

1255 if (not bucket or

1256 (component_name and component_name != policy.find(bucket))):

1257 continue

1258

1259 com_committed += int(words[COMMITTED])

1260 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])

1261

1262 return com_committed, com_allocs

1263

1264 @staticmethod

1265 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):

1266 """Prints information of stacktrace lines for pprof.

1267

1268 Args:

1269 dump: A Dump object.

1270 policy: A Policy object.

1271 bucket_set: A BucketSet object.

1272 component_name: A name of component for filtering.

1273 out: An IO object to output.

1274 """

1275 for line in dump.iter_stacktrace:

1276 words = line.split()

1277 bucket = bucket_set.get(int(words[BUCKET_ID]))

1278 if (not bucket or

1279 (component_name and component_name != policy.find(bucket))):

1280 continue

1281

1282 out.write('%6d: %8s [%6d: %8s] @' % (

1283 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

1284 words[COMMITTED],

1285 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),

1286 words[COMMITTED]))

1287 for address in bucket.stacktrace:

1288 out.write(' 0x%016x' % address)

1289 out.write('\n')

1290

1291

1292 def main():

1293 COMMANDS = {

1294 'csv': CSVCommand,

1295 'expand': ExpandCommand,

1296 'json': JSONCommand,

1297 'list': ListCommand,

1298 'pprof': PProfCommand,

1299 'stacktrace': StacktraceCommand,

1300 }

1301

1302 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):

1303 sys.stderr.write("""Usage: %s <command> [options] [<args>]

1304

1305 Commands:

1306 csv Classify memory usage in CSV

1307 expand Show all stacktraces contained in the specified component

1308 json Classify memory usage in JSON

1309 list Classify memory usage in simple listing format

1310 pprof Format the profile dump so that it can be processed by pprof

1311 stacktrace Convert runtime addresses to symbol names

1312

1313 Quick Reference:

1314 dmprof csv [-p POLICY] <first-dump>

1315 dmprof expand <dump> <policy> <component> <depth>

1316 dmprof json [-p POLICY] <first-dump>

1317 dmprof list [-p POLICY] <first-dump>

1318 dmprof pprof [-c COMPONENT] <dump> <policy>

1319 dmprof stacktrace <dump>

1320 """ % (sys.argv[0]))

1321 sys.exit(1)

1322 action = sys.argv.pop(1)

1323

1324 LOGGER.setLevel(logging.DEBUG)

1325 handler = logging.StreamHandler()

1326 handler.setLevel(logging.INFO)

1327 formatter = logging.Formatter('%(message)s')

1328 handler.setFormatter(formatter)

1329 LOGGER.addHandler(handler)

1330

1331 try:

1332 errorcode = COMMANDS[action]().do(sys.argv)

1333 except ParsingException, e:

1334 errorcode = 1

1335 sys.stderr.write('Exit by parsing error: %s\n' % e)

1336

1337 return errorcode

1338

1339

1340 if __name__ == '__main__':

1341 sys.exit(main())

OLD	NEW

« no previous file with comments | « tools/deep_memory_profiler/PRESUBMIT.py ('k') | tools/deep_memory_profiler/dmprof.py » ('j') | tools/deep_memory_profiler/dmprof.py » ('J')