Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Side by Side Diff: tools/find_runtime_symbols/static_symbols.py

Issue 10826008: Load static symbol information lazily with some clean-ups. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: refine Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/find_runtime_symbols/procedure_boundaries.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import bisect 5 import bisect
6 import json
6 import os 7 import os
7 import re 8 import re
8 import sys 9 import sys
9 10
11 from parse_proc_maps import parse_proc_maps
12 from util import executable_condition
13
10 14
11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') 15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') 16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') 17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
14 18
15 19
16 class ParsingException(Exception): 20 class ParsingException(Exception):
17 def __str__(self): 21 def __str__(self):
18 return repr(self.args[0]) 22 return repr(self.args[0])
19 23
20 24
25 class StaticSymbols(object):
26 """Represents static symbol information."""
27
28 def __init__(self, maps, procedure_boundaries):
29 self.maps = maps
30 self.procedure_boundaries = procedure_boundaries
31
32 # TODO(dmikurube): It will be deprecated.
33 @staticmethod
34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename):
35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
36 maps = parse_proc_maps(f)
37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f:
38 nm_files = json.load(f)
39
40 symbol_tables = {}
41 for entry in maps.iter(executable_condition):
42 if nm_files.has_key(entry.name):
43 if nm_files[entry.name]['format'] == 'bsd':
44 with open(os.path.join(prepared_data_dir,
45 nm_files[entry.name]['file']), mode='r') as f:
46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
47 f, nm_files[entry.name]['mangled'])
48
49 return StaticSymbols(maps, symbol_tables)
50
51 @staticmethod
52 def _load_files(prepared_data_dir, maps_filename, files_filename):
53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:
54 maps = parse_proc_maps(f)
55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f:
56 files = json.load(f)
57
58 symbol_tables = {}
59 for entry in maps.iter(executable_condition):
60 if entry.name in files:
61 if 'nm' in files[entry.name]:
62 nm_entry = files[entry.name]['nm']
63 if nm_entry['format'] == 'bsd':
64 with open(os.path.join(prepared_data_dir, nm_entry['file']),
65 mode='r') as f:
66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(
67 f, nm_entry['mangled'])
68 if 'readelf-e' in files:
69 readelf_entry = files[entry.name]['readelf-e']
70 # TODO(dmikurube) Implement it.
71
72 return StaticSymbols(maps, symbol_tables)
73
74 @staticmethod
75 def load(prepared_data_dir):
76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')):
77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json')
78 else:
79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json')
80
81
21 class ProcedureBoundary(object): 82 class ProcedureBoundary(object):
22 """A class for a procedure symbol and an address range for the symbol.""" 83 """A class for a procedure symbol and an address range for the symbol."""
23 84
24 def __init__(self, start, end, name): 85 def __init__(self, start, end, name):
25 self.start = start 86 self.start = start
26 self.end = end 87 self.end = end
27 self.name = name 88 self.name = name
28 89
29 90
30 class ProcedureBoundaryTable(object): 91 class ProcedureBoundaryTable(object):
(...skipping 27 matching lines...) Expand all
58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) 119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
59 if not number: 120 if not number:
60 break 121 break
61 while True: 122 while True:
62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) 123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
63 if not number: 124 if not number:
64 break 125 break
65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) 126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
66 127
67 128
68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False): 129 def _parse_nm_bsd_line(line):
130 if line[8] == ' ':
131 return line[0:8], line[9], line[11:]
132 elif line[16] == ' ':
133 return line[0:16], line[17], line[19:]
134 raise ParsingException('Invalid nm output.')
135
136
137 def _get_static_symbols_from_nm_bsd(f, mangled=False):
69 """Gets procedure boundaries from a result of nm -n --format bsd. 138 """Gets procedure boundaries from a result of nm -n --format bsd.
70 139
71 Args: 140 Args:
72 f: A file object containing a result of nm. It must be sorted and 141 f: A file object containing a result of nm. It must be sorted and
73 in BSD-style. (Use "[eu-]nm -n --format bsd") 142 in BSD-style. (Use "[eu-]nm -n --format bsd")
74 143
75 Returns: 144 Returns:
76 A result ProcedureBoundaryTable object. 145 A result ProcedureBoundaryTable object.
77 """ 146 """
78 symbol_table = ProcedureBoundaryTable() 147 symbol_table = ProcedureBoundaryTable()
79 148
80 last_start = 0 149 last_start = 0
81 routine = '' 150 routine = ''
82 151
83 for line in f: 152 for line in f:
84 symbol_info = line.rstrip().split(None, 2) 153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line)
85 if len(symbol_info) == 3:
86 if len(symbol_info[0]) == 1:
87 symbol_info = line.split(None, 1)
88 (sym_type, this_routine) = symbol_info
89 sym_value = ''
90 else:
91 (sym_value, sym_type, this_routine) = symbol_info
92 elif len(symbol_info) == 2:
93 if len(symbol_info[0]) == 1:
94 (sym_type, this_routine) = symbol_info
95 sym_value = ''
96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16:
97 (sym_value, this_routine) = symbol_info
98 sym_type = ' '
99 else:
100 raise ParsingException('Invalid output 1 from (eu-)nm.')
101 else:
102 raise ParsingException('Invalid output 2 from (eu-)nm.')
103 154
104 if sym_value == '': 155 if sym_value[0] == ' ':
105 continue 156 continue
106 157
107 start_val = int(sym_value, 16) 158 start_val = int(sym_value, 16)
108 159
109 # It's possible for two symbols to share the same address, if 160 # It's possible for two symbols to share the same address, if
110 # one is a zero-length variable (like __start_google_malloc) or 161 # one is a zero-length variable (like __start_google_malloc) or
111 # one symbol is a weak alias to another (like __libc_malloc). 162 # one symbol is a weak alias to another (like __libc_malloc).
112 # In such cases, we want to ignore all values except for the 163 # In such cases, we want to ignore all values except for the
113 # actual symbol, which in nm-speak has type "T". The logic 164 # actual symbol, which in nm-speak has type "T". The logic
114 # below does this, though it's a bit tricky: what happens when 165 # below does this, though it's a bit tricky: what happens when
115 # we have a series of lines with the same address, is the first 166 # we have a series of lines with the same address, is the first
116 # one gets queued up to be processed. However, it won't 167 # one gets queued up to be processed. However, it won't
117 # *actually* be processed until later, when we read a line with 168 # *actually* be processed until later, when we read a line with
118 # a different address. That means that as long as we're reading 169 # a different address. That means that as long as we're reading
119 # lines with the same address, we have a chance to replace that 170 # lines with the same address, we have a chance to replace that
120 # item in the queue, which we do whenever we see a 'T' entry -- 171 # item in the queue, which we do whenever we see a 'T' entry --
121 # that is, a line with type 'T'. If we never see a 'T' entry, 172 # that is, a line with type 'T'. If we never see a 'T' entry,
122 # we'll just go ahead and process the first entry (which never 173 # we'll just go ahead and process the first entry (which never
123 # got touched in the queue), and ignore the others. 174 # got touched in the queue), and ignore the others.
124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): 175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
125 # We are the 'T' symbol at this address, replace previous symbol. 176 # We are the 'T' symbol at this address, replace previous symbol.
126 routine = this_routine 177 routine = sym_name
127 continue 178 continue
128 elif start_val == last_start: 179 elif start_val == last_start:
129 # We're not the 'T' symbol at this address, so ignore us. 180 # We're not the 'T' symbol at this address, so ignore us.
130 continue 181 continue
131 182
132 # Tag this routine with the starting address in case the image 183 # Tag this routine with the starting address in case the image
133 # has multiple occurrences of this routine. We use a syntax 184 # has multiple occurrences of this routine. We use a syntax
134 # that resembles template paramters that are automatically 185 # that resembles template paramters that are automatically
135 # stripped out by ShortFunctionName() 186 # stripped out by ShortFunctionName()
136 this_routine += "<%016x>" % start_val 187 sym_name += "<%016x>" % start_val
137 188
138 if not mangled: 189 if not mangled:
139 routine = _get_short_function_name(routine) 190 routine = _get_short_function_name(routine)
140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) 191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine))
141 192
142 last_start = start_val 193 last_start = start_val
143 routine = this_routine 194 routine = sym_name
144 195
145 if not mangled: 196 if not mangled:
146 routine = _get_short_function_name(routine) 197 routine = _get_short_function_name(routine)
147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) 198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))
148 return symbol_table 199 return symbol_table
OLDNEW
« no previous file with comments | « tools/find_runtime_symbols/procedure_boundaries.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698