Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: tools/find_runtime_symbols/static_symbols.py

Issue 10825075: Classify memory usage by allocated type in Deep Memory Profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebased Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/find_runtime_symbols/proc_maps.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/find_runtime_symbols/static_symbols.py
diff --git a/tools/find_runtime_symbols/static_symbols.py b/tools/find_runtime_symbols/static_symbols.py
index 0141202153738491d655c359e13e50a65d69f3da..eb943e18680afdec9c623fccb94065fe45545df3 100644
--- a/tools/find_runtime_symbols/static_symbols.py
+++ b/tools/find_runtime_symbols/static_symbols.py
@@ -3,18 +3,18 @@
# found in the LICENSE file.
import bisect
-import json
import os
import re
import sys
-from parse_proc_maps import parse_proc_maps
-from util import executable_condition
-
_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
+_READELF_SECTION_HEADER_PATTER = re.compile(
+ '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
+ '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
+ '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
class ParsingException(Exception):
@@ -22,64 +22,42 @@ class ParsingException(Exception):
return repr(self.args[0])
-class StaticSymbols(object):
- """Represents static symbol information."""
-
- def __init__(self, maps, procedure_boundaries):
- self.maps = maps
- self.procedure_boundaries = procedure_boundaries
-
- # TODO(dmikurube): It will be deprecated.
- @staticmethod
- def _load_nm(prepared_data_dir, maps_filename, nm_json_filename):
- with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
- maps = parse_proc_maps(f)
- with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f:
- nm_files = json.load(f)
-
- symbol_tables = {}
- for entry in maps.iter(executable_condition):
- if nm_files.has_key(entry.name):
- if nm_files[entry.name]['format'] == 'bsd':
- with open(os.path.join(prepared_data_dir,
- nm_files[entry.name]['file']), mode='r') as f:
- symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
- f, nm_files[entry.name]['mangled'])
-
- return StaticSymbols(maps, symbol_tables)
-
- @staticmethod
- def _load_files(prepared_data_dir, maps_filename, files_filename):
- with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
- maps = parse_proc_maps(f)
- with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f:
- files = json.load(f)
-
- symbol_tables = {}
- for entry in maps.iter(executable_condition):
- if entry.name in files:
- if 'nm' in files[entry.name]:
- nm_entry = files[entry.name]['nm']
- if nm_entry['format'] == 'bsd':
- with open(os.path.join(prepared_data_dir, nm_entry['file']),
- mode='r') as f:
- symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
- f, nm_entry['mangled'])
- if 'readelf-e' in files:
- readelf_entry = files[entry.name]['readelf-e']
- # TODO(dmikurube) Implement it.
-
- return StaticSymbols(maps, symbol_tables)
-
- @staticmethod
- def load(prepared_data_dir):
- if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')):
- return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json')
- else:
- return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json')
-
-
-class ProcedureBoundary(object):
+class AddressMapping(object):
+ def __init__(self):
+ self._symbol_map = {}
+
+ def append(self, start, entry):
+ self._symbol_map[start] = entry
+
+ def find(self, address):
+ return self._symbol_map.get(address)
+
+
+class RangeAddressMapping(AddressMapping):
+ def __init__(self):
+ AddressMapping.__init__(self)
+ self._sorted_start_list = []
+ self._is_sorted = True
+
+ def append(self, start, entry):
+ if self._sorted_start_list:
+ if self._sorted_start_list[-1] > start:
+ self._is_sorted = False
+ elif self._sorted_start_list[-1] == start:
+ return
+ self._sorted_start_list.append(start)
+ self._symbol_map[start] = entry
+
+ def find(self, address):
+ if not self._is_sorted:
+ self._sorted_start_list.sort()
+ self._is_sorted = True
+ found_index = bisect.bisect_left(self._sorted_start_list, address)
+ found_start_address = self._sorted_start_list[found_index - 1]
+ return self._symbol_map[found_start_address]
+
+
+class Procedure(object):
"""A class for a procedure symbol and an address range for the symbol."""
def __init__(self, start, end, name):
@@ -87,113 +65,199 @@ class ProcedureBoundary(object):
self.end = end
self.name = name
+ def __eq__(self, other):
+ return (self.start == other.start and
+ self.end == other.end and
+ self.name == other.name)
-class ProcedureBoundaryTable(object):
- """A class of a set of ProcedureBoundary."""
+ def __ne__(self, other):
+ return not self.__eq__(other)
- def __init__(self):
- self.sorted_value_list = []
- self.dictionary = {}
- self.sorted = True
-
- def append(self, entry):
- if self.sorted_value_list:
- if self.sorted_value_list[-1] > entry.start:
- self.sorted = False
- elif self.sorted_value_list[-1] == entry.start:
- return
- self.sorted_value_list.append(entry.start)
- self.dictionary[entry.start] = entry
-
- def find_procedure(self, address):
- if not self.sorted:
- self.sorted_value_list.sort()
- self.sorted = True
- found_index = bisect.bisect_left(self.sorted_value_list, address)
- found_start_address = self.sorted_value_list[found_index - 1]
- return self.dictionary[found_start_address]
-
-
-def _get_short_function_name(function):
- while True:
- function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
- if not number:
- break
- while True:
- function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
- if not number:
- break
- return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
-
-
-def _parse_nm_bsd_line(line):
- if line[8] == ' ':
- return line[0:8], line[9], line[11:]
- elif line[16] == ' ':
- return line[0:16], line[17], line[19:]
- raise ParsingException('Invalid nm output.')
-
-
-def _get_static_symbols_from_nm_bsd(f, mangled=False):
- """Gets procedure boundaries from a result of nm -n --format bsd.
-
- Args:
- f: A file object containing a result of nm. It must be sorted and
- in BSD-style. (Use "[eu-]nm -n --format bsd")
-
- Returns:
- A result ProcedureBoundaryTable object.
- """
- symbol_table = ProcedureBoundaryTable()
-
- last_start = 0
- routine = ''
-
- for line in f:
- sym_value, sym_type, sym_name = _parse_nm_bsd_line(line)
-
- if sym_value[0] == ' ':
- continue
-
- start_val = int(sym_value, 16)
-
- # It's possible for two symbols to share the same address, if
- # one is a zero-length variable (like __start_google_malloc) or
- # one symbol is a weak alias to another (like __libc_malloc).
- # In such cases, we want to ignore all values except for the
- # actual symbol, which in nm-speak has type "T". The logic
- # below does this, though it's a bit tricky: what happens when
- # we have a series of lines with the same address, is the first
- # one gets queued up to be processed. However, it won't
- # *actually* be processed until later, when we read a line with
- # a different address. That means that as long as we're reading
- # lines with the same address, we have a chance to replace that
- # item in the queue, which we do whenever we see a 'T' entry --
- # that is, a line with type 'T'. If we never see a 'T' entry,
- # we'll just go ahead and process the first entry (which never
- # got touched in the queue), and ignore the others.
- if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
- # We are the 'T' symbol at this address, replace previous symbol.
- routine = sym_name
- continue
- elif start_val == last_start:
- # We're not the 'T' symbol at this address, so ignore us.
- continue
+ def __str__(self):
+ return '%x-%x: %s' % (self.start, self.end, self.name)
- # Tag this routine with the starting address in case the image
- # has multiple occurrences of this routine. We use a syntax
- # that resembles template paramters that are automatically
- # stripped out by ShortFunctionName()
- sym_name += "<%016x>" % start_val
- if not mangled:
- routine = _get_short_function_name(routine)
- symbol_table.append(ProcedureBoundary(last_start, start_val, routine))
+class ElfSection(object):
+ """A class for an elf section header."""
- last_start = start_val
- routine = sym_name
+ def __init__(
+ self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
+ self.number = number
+ self.name = name
+ self.stype = stype
+ self.address = address
+ self.offset = offset
+ self.size = size
+ self.es = es
+ self.flg = flg
+ self.lk = lk
+ self.inf = inf
+ self.al = al
+
+ def __eq__(self, other):
+ return (self.number == other.number and
+ self.name == other.name and
+ self.stype == other.stype and
+ self.address == other.address and
+ self.offset == other.offset and
+ self.size == other.size and
+ self.es == other.es and
+ self.flg == other.flg and
+ self.lk == other.lk and
+ self.inf == other.inf and
+ self.al == other.al)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
- if not mangled:
- routine = _get_short_function_name(routine)
- symbol_table.append(ProcedureBoundary(last_start, last_start, routine))
- return symbol_table
+ def __str__(self):
+ return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
+
+
+class StaticSymbolsInFile(object):
+ """Represents static symbol information in a binary file."""
+
+ def __init__(self, my_name):
+ self.my_name = my_name
+ self._elf_sections = []
+ self._procedures = RangeAddressMapping()
+ self._typeinfos = AddressMapping()
+
+ def _append_elf_section(self, elf_section):
+ self._elf_sections.append(elf_section)
+
+ def _append_procedure(self, start, procedure):
+ self._procedures.append(start, procedure)
+
+ def _append_typeinfo(self, start, typeinfo):
+ self._typeinfos.append(start, typeinfo)
+
+ def _find_symbol_by_runtime_address(self, address, vma, target):
+ if not (vma.begin <= address < vma.end):
+ return None
+
+ if vma.name != self.my_name:
+ return None
+
+ file_offset = address - (vma.begin - vma.offset)
+ elf_address = None
+ for section in self._elf_sections:
+ if section.offset <= file_offset < (section.offset + section.size):
+ elf_address = section.address + file_offset - section.offset
+ if not elf_address:
+ return None
+
+ return target.find(elf_address)
+
+ def find_procedure_by_runtime_address(self, address, vma):
+ return self._find_symbol_by_runtime_address(address, vma, self._procedures)
+
+ def find_typeinfo_by_runtime_address(self, address, vma):
+ return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
+
+ def load_readelf_ew(self, f):
+ found_header = False
+ for line in f:
+ if line.rstrip() == 'Section Headers:':
+ found_header = True
+ break
+ if not found_header:
+ return None
+
+ for line in f:
+ line = line.rstrip()
+ matched = _READELF_SECTION_HEADER_PATTER.match(line)
+ if matched:
+ self._append_elf_section(ElfSection(
+ int(matched.group(1), 10), # number
+ matched.group(2), # name
+ matched.group(3), # stype
+ int(matched.group(4), 16), # address
+ int(matched.group(5), 16), # offset
+ int(matched.group(6), 16), # size
+ matched.group(7), # es
+ matched.group(8), # flg
+ matched.group(9), # lk
+ matched.group(10), # inf
+ matched.group(11) # al
+ ))
+ else:
+ if line in ('Key to Flags:', 'Program Headers:'):
+ break
+
+ def _parse_nm_bsd_line(self, line):
+ if line[8] == ' ':
+ return line[0:8], line[9], line[11:]
+ elif line[16] == ' ':
+ return line[0:16], line[17], line[19:]
+ raise ParsingException('Invalid nm output.')
+
+ def _get_short_function_name(self, function):
+ while True:
+ function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
+ if not number:
+ break
+ while True:
+ function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
+ if not number:
+ break
+ return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
+
+ def load_nm_bsd(self, f, mangled=False):
+ last_start = 0
+ routine = ''
+
+ for line in f:
+ line = line.rstrip()
+ sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
+
+ if sym_value[0] == ' ':
+ continue
+
+ start_val = int(sym_value, 16)
+
+ if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
+ (not mangled and sym_name.startswith('typeinfo'))):
+ self._append_typeinfo(start_val, sym_name)
+
+ # It's possible for two symbols to share the same address, if
+ # one is a zero-length variable (like __start_google_malloc) or
+ # one symbol is a weak alias to another (like __libc_malloc).
+ # In such cases, we want to ignore all values except for the
+ # actual symbol, which in nm-speak has type "T". The logic
+ # below does this, though it's a bit tricky: what happens when
+ # we have a series of lines with the same address, is the first
+ # one gets queued up to be processed. However, it won't
+ # *actually* be processed until later, when we read a line with
+ # a different address. That means that as long as we're reading
+ # lines with the same address, we have a chance to replace that
+ # item in the queue, which we do whenever we see a 'T' entry --
+ # that is, a line with type 'T'. If we never see a 'T' entry,
+ # we'll just go ahead and process the first entry (which never
+ # got touched in the queue), and ignore the others.
+ if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
+ # We are the 'T' symbol at this address, replace previous symbol.
+ routine = sym_name
+ continue
+ elif start_val == last_start:
+ # We're not the 'T' symbol at this address, so ignore us.
+ continue
+
+ # Tag this routine with the starting address in case the image
+ # has multiple occurrences of this routine. We use a syntax
+ # that resembles template paramters that are automatically
+ # stripped out by ShortFunctionName()
+ sym_name += "<%016x>" % start_val
+
+ if not mangled:
+ routine = self._get_short_function_name(routine)
+ self._append_procedure(
+ last_start, Procedure(last_start, start_val, routine))
+
+ last_start = start_val
+ routine = sym_name
+
+ if not mangled:
+ routine = self._get_short_function_name(routine)
+ self._append_procedure(
+ last_start, Procedure(last_start, last_start, routine))
« no previous file with comments | « tools/find_runtime_symbols/proc_maps.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698