Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: tools/find_runtime_symbols/static_symbols.py

Issue 10825075: Classify memory usage by allocated type in Deep Memory Profiler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebased Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/find_runtime_symbols/proc_maps.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import bisect 5 import bisect
6 import json
7 import os 6 import os
8 import re 7 import re
9 import sys 8 import sys
10 9
11 from parse_proc_maps import parse_proc_maps
12 from util import executable_condition
13
14 10
15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') 11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') 12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') 13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
14 _READELF_SECTION_HEADER_PATTER = re.compile(
15 '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
16 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
17 '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
18 18
19 19
20 class ParsingException(Exception): 20 class ParsingException(Exception):
21 def __str__(self): 21 def __str__(self):
22 return repr(self.args[0]) 22 return repr(self.args[0])
23 23
24 24
25 class StaticSymbols(object): 25 class AddressMapping(object):
26 """Represents static symbol information.""" 26 def __init__(self):
27 self._symbol_map = {}
27 28
28 def __init__(self, maps, procedure_boundaries): 29 def append(self, start, entry):
29 self.maps = maps 30 self._symbol_map[start] = entry
30 self.procedure_boundaries = procedure_boundaries
31 31
32 # TODO(dmikurube): It will be deprecated. 32 def find(self, address):
33 @staticmethod 33 return self._symbol_map.get(address)
34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename):
35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
36 maps = parse_proc_maps(f)
37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f:
38 nm_files = json.load(f)
39
40 symbol_tables = {}
41 for entry in maps.iter(executable_condition):
42 if nm_files.has_key(entry.name):
43 if nm_files[entry.name]['format'] == 'bsd':
44 with open(os.path.join(prepared_data_dir,
45 nm_files[entry.name]['file']), mode='r') as f:
46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
47 f, nm_files[entry.name]['mangled'])
48
49 return StaticSymbols(maps, symbol_tables)
50
51 @staticmethod
52 def _load_files(prepared_data_dir, maps_filename, files_filename):
53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
54 maps = parse_proc_maps(f)
55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f:
56 files = json.load(f)
57
58 symbol_tables = {}
59 for entry in maps.iter(executable_condition):
60 if entry.name in files:
61 if 'nm' in files[entry.name]:
62 nm_entry = files[entry.name]['nm']
63 if nm_entry['format'] == 'bsd':
64 with open(os.path.join(prepared_data_dir, nm_entry['file']),
65 mode='r') as f:
66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
67 f, nm_entry['mangled'])
68 if 'readelf-e' in files:
69 readelf_entry = files[entry.name]['readelf-e']
70 # TODO(dmikurube) Implement it.
71
72 return StaticSymbols(maps, symbol_tables)
73
74 @staticmethod
75 def load(prepared_data_dir):
76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')):
77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json')
78 else:
79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json')
80 34
81 35
82 class ProcedureBoundary(object): 36 class RangeAddressMapping(AddressMapping):
37 def __init__(self):
38 AddressMapping.__init__(self)
39 self._sorted_start_list = []
40 self._is_sorted = True
41
42 def append(self, start, entry):
43 if self._sorted_start_list:
44 if self._sorted_start_list[-1] > start:
45 self._is_sorted = False
46 elif self._sorted_start_list[-1] == start:
47 return
48 self._sorted_start_list.append(start)
49 self._symbol_map[start] = entry
50
51 def find(self, address):
52 if not self._is_sorted:
53 self._sorted_start_list.sort()
54 self._is_sorted = True
55 found_index = bisect.bisect_left(self._sorted_start_list, address)
56 found_start_address = self._sorted_start_list[found_index - 1]
57 return self._symbol_map[found_start_address]
58
59
60 class Procedure(object):
83 """A class for a procedure symbol and an address range for the symbol.""" 61 """A class for a procedure symbol and an address range for the symbol."""
84 62
85 def __init__(self, start, end, name): 63 def __init__(self, start, end, name):
86 self.start = start 64 self.start = start
87 self.end = end 65 self.end = end
88 self.name = name 66 self.name = name
89 67
68 def __eq__(self, other):
69 return (self.start == other.start and
70 self.end == other.end and
71 self.name == other.name)
90 72
91 class ProcedureBoundaryTable(object): 73 def __ne__(self, other):
92 """A class of a set of ProcedureBoundary.""" 74 return not self.__eq__(other)
93 75
94 def __init__(self): 76 def __str__(self):
95 self.sorted_value_list = [] 77 return '%x-%x: %s' % (self.start, self.end, self.name)
96 self.dictionary = {}
97 self.sorted = True
98
99 def append(self, entry):
100 if self.sorted_value_list:
101 if self.sorted_value_list[-1] > entry.start:
102 self.sorted = False
103 elif self.sorted_value_list[-1] == entry.start:
104 return
105 self.sorted_value_list.append(entry.start)
106 self.dictionary[entry.start] = entry
107
108 def find_procedure(self, address):
109 if not self.sorted:
110 self.sorted_value_list.sort()
111 self.sorted = True
112 found_index = bisect.bisect_left(self.sorted_value_list, address)
113 found_start_address = self.sorted_value_list[found_index - 1]
114 return self.dictionary[found_start_address]
115 78
116 79
117 def _get_short_function_name(function): 80 class ElfSection(object):
118 while True: 81 """A class for an elf section header."""
119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) 82
120 if not number: 83 def __init__(
121 break 84 self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
122 while True: 85 self.number = number
123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) 86 self.name = name
124 if not number: 87 self.stype = stype
125 break 88 self.address = address
126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) 89 self.offset = offset
90 self.size = size
91 self.es = es
92 self.flg = flg
93 self.lk = lk
94 self.inf = inf
95 self.al = al
96
97 def __eq__(self, other):
98 return (self.number == other.number and
99 self.name == other.name and
100 self.stype == other.stype and
101 self.address == other.address and
102 self.offset == other.offset and
103 self.size == other.size and
104 self.es == other.es and
105 self.flg == other.flg and
106 self.lk == other.lk and
107 self.inf == other.inf and
108 self.al == other.al)
109
110 def __ne__(self, other):
111 return not self.__eq__(other)
112
113 def __str__(self):
114 return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
127 115
128 116
129 def _parse_nm_bsd_line(line): 117 class StaticSymbolsInFile(object):
130 if line[8] == ' ': 118 """Represents static symbol information in a binary file."""
131 return line[0:8], line[9], line[11:]
132 elif line[16] == ' ':
133 return line[0:16], line[17], line[19:]
134 raise ParsingException('Invalid nm output.')
135 119
120 def __init__(self, my_name):
121 self.my_name = my_name
122 self._elf_sections = []
123 self._procedures = RangeAddressMapping()
124 self._typeinfos = AddressMapping()
136 125
137 def _get_static_symbols_from_nm_bsd(f, mangled=False): 126 def _append_elf_section(self, elf_section):
138 """Gets procedure boundaries from a result of nm -n --format bsd. 127 self._elf_sections.append(elf_section)
139 128
140 Args: 129 def _append_procedure(self, start, procedure):
141 f: A file object containing a result of nm. It must be sorted and 130 self._procedures.append(start, procedure)
142 in BSD-style. (Use "[eu-]nm -n --format bsd")
143 131
144 Returns: 132 def _append_typeinfo(self, start, typeinfo):
145 A result ProcedureBoundaryTable object. 133 self._typeinfos.append(start, typeinfo)
146 """
147 symbol_table = ProcedureBoundaryTable()
148 134
149 last_start = 0 135 def _find_symbol_by_runtime_address(self, address, vma, target):
150 routine = '' 136 if not (vma.begin <= address < vma.end):
137 return None
151 138
152 for line in f: 139 if vma.name != self.my_name:
153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line) 140 return None
154 141
155 if sym_value[0] == ' ': 142 file_offset = address - (vma.begin - vma.offset)
156 continue 143 elf_address = None
144 for section in self._elf_sections:
145 if section.offset <= file_offset < (section.offset + section.size):
146 elf_address = section.address + file_offset - section.offset
147 if not elf_address:
148 return None
157 149
158 start_val = int(sym_value, 16) 150 return target.find(elf_address)
159 151
160 # It's possible for two symbols to share the same address, if 152 def find_procedure_by_runtime_address(self, address, vma):
161 # one is a zero-length variable (like __start_google_malloc) or 153 return self._find_symbol_by_runtime_address(address, vma, self._procedures)
162 # one symbol is a weak alias to another (like __libc_malloc). 154
163 # In such cases, we want to ignore all values except for the 155 def find_typeinfo_by_runtime_address(self, address, vma):
164 # actual symbol, which in nm-speak has type "T". The logic 156 return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
165 # below does this, though it's a bit tricky: what happens when 157
166 # we have a series of lines with the same address, is the first 158 def load_readelf_ew(self, f):
167 # one gets queued up to be processed. However, it won't 159 found_header = False
168 # *actually* be processed until later, when we read a line with 160 for line in f:
169 # a different address. That means that as long as we're reading 161 if line.rstrip() == 'Section Headers:':
170 # lines with the same address, we have a chance to replace that 162 found_header = True
171 # item in the queue, which we do whenever we see a 'T' entry -- 163 break
172 # that is, a line with type 'T'. If we never see a 'T' entry, 164 if not found_header:
173 # we'll just go ahead and process the first entry (which never 165 return None
174 # got touched in the queue), and ignore the others. 166
175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): 167 for line in f:
176 # We are the 'T' symbol at this address, replace previous symbol. 168 line = line.rstrip()
169 matched = _READELF_SECTION_HEADER_PATTER.match(line)
170 if matched:
171 self._append_elf_section(ElfSection(
172 int(matched.group(1), 10), # number
173 matched.group(2), # name
174 matched.group(3), # stype
175 int(matched.group(4), 16), # address
176 int(matched.group(5), 16), # offset
177 int(matched.group(6), 16), # size
178 matched.group(7), # es
179 matched.group(8), # flg
180 matched.group(9), # lk
181 matched.group(10), # inf
182 matched.group(11) # al
183 ))
184 else:
185 if line in ('Key to Flags:', 'Program Headers:'):
186 break
187
188 def _parse_nm_bsd_line(self, line):
189 if line[8] == ' ':
190 return line[0:8], line[9], line[11:]
191 elif line[16] == ' ':
192 return line[0:16], line[17], line[19:]
193 raise ParsingException('Invalid nm output.')
194
195 def _get_short_function_name(self, function):
196 while True:
197 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
198 if not number:
199 break
200 while True:
201 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
202 if not number:
203 break
204 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
205
206 def load_nm_bsd(self, f, mangled=False):
207 last_start = 0
208 routine = ''
209
210 for line in f:
211 line = line.rstrip()
212 sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
213
214 if sym_value[0] == ' ':
215 continue
216
217 start_val = int(sym_value, 16)
218
219 if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
220 (not mangled and sym_name.startswith('typeinfo'))):
221 self._append_typeinfo(start_val, sym_name)
222
223 # It's possible for two symbols to share the same address, if
224 # one is a zero-length variable (like __start_google_malloc) or
225 # one symbol is a weak alias to another (like __libc_malloc).
226 # In such cases, we want to ignore all values except for the
227 # actual symbol, which in nm-speak has type "T". The logic
228 # below does this, though it's a bit tricky: what happens when
229 # we have a series of lines with the same address, is the first
230 # one gets queued up to be processed. However, it won't
231 # *actually* be processed until later, when we read a line with
232 # a different address. That means that as long as we're reading
233 # lines with the same address, we have a chance to replace that
234 # item in the queue, which we do whenever we see a 'T' entry --
235 # that is, a line with type 'T'. If we never see a 'T' entry,
236 # we'll just go ahead and process the first entry (which never
237 # got touched in the queue), and ignore the others.
238 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
239 # We are the 'T' symbol at this address, replace previous symbol.
240 routine = sym_name
241 continue
242 elif start_val == last_start:
243 # We're not the 'T' symbol at this address, so ignore us.
244 continue
245
246 # Tag this routine with the starting address in case the image
247 # has multiple occurrences of this routine. We use a syntax
248 # that resembles template paramters that are automatically
249 # stripped out by ShortFunctionName()
250 sym_name += "<%016x>" % start_val
251
252 if not mangled:
253 routine = self._get_short_function_name(routine)
254 self._append_procedure(
255 last_start, Procedure(last_start, start_val, routine))
256
257 last_start = start_val
177 routine = sym_name 258 routine = sym_name
178 continue
179 elif start_val == last_start:
180 # We're not the 'T' symbol at this address, so ignore us.
181 continue
182
183 # Tag this routine with the starting address in case the image
184 # has multiple occurrences of this routine. We use a syntax
185 # that resembles template paramters that are automatically
186 # stripped out by ShortFunctionName()
187 sym_name += "<%016x>" % start_val
188 259
189 if not mangled: 260 if not mangled:
190 routine = _get_short_function_name(routine) 261 routine = self._get_short_function_name(routine)
191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) 262 self._append_procedure(
192 263 last_start, Procedure(last_start, last_start, routine))
193 last_start = start_val
194 routine = sym_name
195
196 if not mangled:
197 routine = _get_short_function_name(routine)
198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))
199 return symbol_table
OLDNEW
« no previous file with comments | « tools/find_runtime_symbols/proc_maps.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698