tools/find_runtime_symbols/static_symbols.py - Issue 10825075: Classify memory usage by allocated type in Deep Memory Profiler.

Side by Side Diff: tools/find_runtime_symbols/static_symbols.py

Issue 10825075: Classify memory usage by allocated type in Deep Memory Profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebased Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import bisect	5 import bisect

6 import json

7 import os	6 import os

8 import re	7 import re

9 import sys	8 import sys

10	9

11 from parse_proc_maps import parse_proc_maps

12 from util import executable_condition

13

14	10

15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')	11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')

16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')	12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')

17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')	13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')

	14 _READELF_SECTION_HEADER_PATTER = re.compile(

	15 '^\s\[\s(Nr\|\d+)\]\s+(\|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'

	16 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'

	17 '([0-9]+)\s+([0-9]+)\s+([0-9]+)')

18	18

19	19

20 class ParsingException(Exception):	20 class ParsingException(Exception):

21 def __str__(self):	21 def __str__(self):

22 return repr(self.args[0])	22 return repr(self.args[0])

23	23

24	24

25 class StaticSymbols(object):	25 class AddressMapping(object):

26 """Represents static symbol information."""	26 def __init__(self):

	27 self._symbol_map = {}

27	28

28 def __init__(self, maps, procedure_boundaries):	29 def append(self, start, entry):

29 self.maps = maps	30 self._symbol_map[start] = entry

30 self.procedure_boundaries = procedure_boundaries

31	31

32 # TODO(dmikurube): It will be deprecated.	32 def find(self, address):

33 @staticmethod	33 return self._symbol_map.get(address)

34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename):

35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:

36 maps = parse_proc_maps(f)

37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f:

38 nm_files = json.load(f)

39

40 symbol_tables = {}

41 for entry in maps.iter(executable_condition):

42 if nm_files.has_key(entry.name):

43 if nm_files[entry.name]['format'] == 'bsd':

44 with open(os.path.join(prepared_data_dir,

45 nm_files[entry.name]['file']), mode='r') as f:

46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(

47 f, nm_files[entry.name]['mangled'])

48

49 return StaticSymbols(maps, symbol_tables)

50

51 @staticmethod

52 def _load_files(prepared_data_dir, maps_filename, files_filename):

53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:

54 maps = parse_proc_maps(f)

55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f:

56 files = json.load(f)

57

58 symbol_tables = {}

59 for entry in maps.iter(executable_condition):

60 if entry.name in files:

61 if 'nm' in files[entry.name]:

62 nm_entry = files[entry.name]['nm']

63 if nm_entry['format'] == 'bsd':

64 with open(os.path.join(prepared_data_dir, nm_entry['file']),

65 mode='r') as f:

66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(

67 f, nm_entry['mangled'])

68 if 'readelf-e' in files:

69 readelf_entry = files[entry.name]['readelf-e']

70 # TODO(dmikurube) Implement it.

71

72 return StaticSymbols(maps, symbol_tables)

73

74 @staticmethod

75 def load(prepared_data_dir):

76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')):

77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json')

78 else:

79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json')

80	34

81	35

82 class ProcedureBoundary(object):	36 class RangeAddressMapping(AddressMapping):

	37 def __init__(self):

	38 AddressMapping.__init__(self)

	39 self._sorted_start_list = []

	40 self._is_sorted = True

	41

	42 def append(self, start, entry):

	43 if self._sorted_start_list:

	44 if self._sorted_start_list[-1] > start:

	45 self._is_sorted = False

	46 elif self._sorted_start_list[-1] == start:

	47 return

	48 self._sorted_start_list.append(start)

	49 self._symbol_map[start] = entry

	50

	51 def find(self, address):

	52 if not self._is_sorted:

	53 self._sorted_start_list.sort()

	54 self._is_sorted = True

	55 found_index = bisect.bisect_left(self._sorted_start_list, address)

	56 found_start_address = self._sorted_start_list[found_index - 1]

	57 return self._symbol_map[found_start_address]

	58

	59

	60 class Procedure(object):

83 """A class for a procedure symbol and an address range for the symbol."""	61 """A class for a procedure symbol and an address range for the symbol."""

84	62

85 def __init__(self, start, end, name):	63 def __init__(self, start, end, name):

86 self.start = start	64 self.start = start

87 self.end = end	65 self.end = end

88 self.name = name	66 self.name = name

89	67

	68 def __eq__(self, other):

	69 return (self.start == other.start and

	70 self.end == other.end and

	71 self.name == other.name)

90	72

91 class ProcedureBoundaryTable(object):	73 def __ne__(self, other):

92 """A class of a set of ProcedureBoundary."""	74 return not self.__eq__(other)

93	75

94 def __init__(self):	76 def __str__(self):

95 self.sorted_value_list = []	77 return '%x-%x: %s' % (self.start, self.end, self.name)

96 self.dictionary = {}

97 self.sorted = True

98

99 def append(self, entry):

100 if self.sorted_value_list:

101 if self.sorted_value_list[-1] > entry.start:

102 self.sorted = False

103 elif self.sorted_value_list[-1] == entry.start:

104 return

105 self.sorted_value_list.append(entry.start)

106 self.dictionary[entry.start] = entry

107

108 def find_procedure(self, address):

109 if not self.sorted:

110 self.sorted_value_list.sort()

111 self.sorted = True

112 found_index = bisect.bisect_left(self.sorted_value_list, address)

113 found_start_address = self.sorted_value_list[found_index - 1]

114 return self.dictionary[found_start_address]

115	78

116	79

117 def _get_short_function_name(function):	80 class ElfSection(object):

118 while True:	81 """A class for an elf section header."""

119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)	82

120 if not number:	83 def __init__(

121 break	84 self, number, name, stype, address, offset, size, es, flg, lk, inf, al):

122 while True:	85 self.number = number

123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)	86 self.name = name

124 if not number:	87 self.stype = stype

125 break	88 self.address = address

126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)	89 self.offset = offset

	90 self.size = size

	91 self.es = es

	92 self.flg = flg

	93 self.lk = lk

	94 self.inf = inf

	95 self.al = al

	96

	97 def __eq__(self, other):

	98 return (self.number == other.number and

	99 self.name == other.name and

	100 self.stype == other.stype and

	101 self.address == other.address and

	102 self.offset == other.offset and

	103 self.size == other.size and

	104 self.es == other.es and

	105 self.flg == other.flg and

	106 self.lk == other.lk and

	107 self.inf == other.inf and

	108 self.al == other.al)

	109

	110 def __ne__(self, other):

	111 return not self.__eq__(other)

	112

	113 def __str__(self):

	114 return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)

127	115

128	116

129 def _parse_nm_bsd_line(line):	117 class StaticSymbolsInFile(object):

130 if line[8] == ' ':	118 """Represents static symbol information in a binary file."""

131 return line[0:8], line[9], line[11:]

132 elif line[16] == ' ':

133 return line[0:16], line[17], line[19:]

134 raise ParsingException('Invalid nm output.')

135	119

	120 def __init__(self, my_name):

	121 self.my_name = my_name

	122 self._elf_sections = []

	123 self._procedures = RangeAddressMapping()

	124 self._typeinfos = AddressMapping()

136	125

137 def _get_static_symbols_from_nm_bsd(f, mangled=False):	126 def _append_elf_section(self, elf_section):

138 """Gets procedure boundaries from a result of nm -n --format bsd.	127 self._elf_sections.append(elf_section)

139	128

140 Args:	129 def _append_procedure(self, start, procedure):

141 f: A file object containing a result of nm. It must be sorted and	130 self._procedures.append(start, procedure)

142 in BSD-style. (Use "[eu-]nm -n --format bsd")

143	131

144 Returns:	132 def _append_typeinfo(self, start, typeinfo):

145 A result ProcedureBoundaryTable object.	133 self._typeinfos.append(start, typeinfo)

146 """

147 symbol_table = ProcedureBoundaryTable()

148	134

149 last_start = 0	135 def _find_symbol_by_runtime_address(self, address, vma, target):

150 routine = ''	136 if not (vma.begin <= address < vma.end):

	137 return None

151	138

152 for line in f:	139 if vma.name != self.my_name:

153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line)	140 return None

154	141

155 if sym_value[0] == ' ':	142 file_offset = address - (vma.begin - vma.offset)

156 continue	143 elf_address = None

	144 for section in self._elf_sections:

	145 if section.offset <= file_offset < (section.offset + section.size):

	146 elf_address = section.address + file_offset - section.offset

	147 if not elf_address:

	148 return None

157	149

158 start_val = int(sym_value, 16)	150 return target.find(elf_address)

159	151

160 # It's possible for two symbols to share the same address, if	152 def find_procedure_by_runtime_address(self, address, vma):

161 # one is a zero-length variable (like __start_google_malloc) or	153 return self._find_symbol_by_runtime_address(address, vma, self._procedures)

162 # one symbol is a weak alias to another (like __libc_malloc).	154

163 # In such cases, we want to ignore all values except for the	155 def find_typeinfo_by_runtime_address(self, address, vma):

164 # actual symbol, which in nm-speak has type "T". The logic	156 return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)

165 # below does this, though it's a bit tricky: what happens when	157

166 # we have a series of lines with the same address, is the first	158 def load_readelf_ew(self, f):

167 # one gets queued up to be processed. However, it won't	159 found_header = False

168 # actually be processed until later, when we read a line with	160 for line in f:

169 # a different address. That means that as long as we're reading	161 if line.rstrip() == 'Section Headers:':

170 # lines with the same address, we have a chance to replace that	162 found_header = True

171 # item in the queue, which we do whenever we see a 'T' entry --	163 break

172 # that is, a line with type 'T'. If we never see a 'T' entry,	164 if not found_header:

173 # we'll just go ahead and process the first entry (which never	165 return None

174 # got touched in the queue), and ignore the others.	166

175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):	167 for line in f:

176 # We are the 'T' symbol at this address, replace previous symbol.	168 line = line.rstrip()

	169 matched = _READELF_SECTION_HEADER_PATTER.match(line)

	170 if matched:

	171 self._append_elf_section(ElfSection(

	172 int(matched.group(1), 10), # number

	173 matched.group(2), # name

	174 matched.group(3), # stype

	175 int(matched.group(4), 16), # address

	176 int(matched.group(5), 16), # offset

	177 int(matched.group(6), 16), # size

	178 matched.group(7), # es

	179 matched.group(8), # flg

	180 matched.group(9), # lk

	181 matched.group(10), # inf

	182 matched.group(11) # al

	183 ))

	184 else:

	185 if line in ('Key to Flags:', 'Program Headers:'):

	186 break

	187

	188 def _parse_nm_bsd_line(self, line):

	189 if line[8] == ' ':

	190 return line[0:8], line[9], line[11:]

	191 elif line[16] == ' ':

	192 return line[0:16], line[17], line[19:]

	193 raise ParsingException('Invalid nm output.')

	194

	195 def _get_short_function_name(self, function):

	196 while True:

	197 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)

	198 if not number:

	199 break

	200 while True:

	201 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)

	202 if not number:

	203 break

	204 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

	205

	206 def load_nm_bsd(self, f, mangled=False):

	207 last_start = 0

	208 routine = ''

	209

	210 for line in f:

	211 line = line.rstrip()

	212 sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)

	213

	214 if sym_value[0] == ' ':

	215 continue

	216

	217 start_val = int(sym_value, 16)

	218

	219 if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and

	220 (not mangled and sym_name.startswith('typeinfo'))):

	221 self._append_typeinfo(start_val, sym_name)

	222

	223 # It's possible for two symbols to share the same address, if

	224 # one is a zero-length variable (like __start_google_malloc) or

	225 # one symbol is a weak alias to another (like __libc_malloc).

	226 # In such cases, we want to ignore all values except for the

	227 # actual symbol, which in nm-speak has type "T". The logic

	228 # below does this, though it's a bit tricky: what happens when

	229 # we have a series of lines with the same address, is the first

	230 # one gets queued up to be processed. However, it won't

	231 # actually be processed until later, when we read a line with

	232 # a different address. That means that as long as we're reading

	233 # lines with the same address, we have a chance to replace that

	234 # item in the queue, which we do whenever we see a 'T' entry --

	235 # that is, a line with type 'T'. If we never see a 'T' entry,

	236 # we'll just go ahead and process the first entry (which never

	237 # got touched in the queue), and ignore the others.

	238 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):

	239 # We are the 'T' symbol at this address, replace previous symbol.

	240 routine = sym_name

	241 continue

	242 elif start_val == last_start:

	243 # We're not the 'T' symbol at this address, so ignore us.

	244 continue

	245

	246 # Tag this routine with the starting address in case the image

	247 # has multiple occurrences of this routine. We use a syntax

	248 # that resembles template paramters that are automatically

	249 # stripped out by ShortFunctionName()

	250 sym_name += "<%016x>" % start_val

	251

	252 if not mangled:

	253 routine = self._get_short_function_name(routine)

	254 self._append_procedure(

	255 last_start, Procedure(last_start, start_val, routine))

	256

	257 last_start = start_val

177 routine = sym_name	258 routine = sym_name

178 continue

179 elif start_val == last_start:

180 # We're not the 'T' symbol at this address, so ignore us.

181 continue

182

183 # Tag this routine with the starting address in case the image

184 # has multiple occurrences of this routine. We use a syntax

185 # that resembles template paramters that are automatically

186 # stripped out by ShortFunctionName()

187 sym_name += "<%016x>" % start_val

188	259

189 if not mangled:	260 if not mangled:

190 routine = _get_short_function_name(routine)	261 routine = self._get_short_function_name(routine)

191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine))	262 self._append_procedure(

192	263 last_start, Procedure(last_start, last_start, routine))

193 last_start = start_val

194 routine = sym_name

195

196 if not mangled:

197 routine = _get_short_function_name(routine)

198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))

199 return symbol_table

OLD	NEW

« no previous file with comments | « tools/find_runtime_symbols/proc_maps.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »