OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import bisect | 5 import bisect |
| 6 import json |
6 import os | 7 import os |
7 import re | 8 import re |
8 import sys | 9 import sys |
9 | 10 |
| 11 from parse_proc_maps import parse_proc_maps |
| 12 from util import executable_condition |
| 13 |
10 | 14 |
11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') | 15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') |
12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') | 16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') |
13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') | 17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') |
14 | 18 |
15 | 19 |
16 class ParsingException(Exception): | 20 class ParsingException(Exception): |
17 def __str__(self): | 21 def __str__(self): |
18 return repr(self.args[0]) | 22 return repr(self.args[0]) |
19 | 23 |
20 | 24 |
| 25 class StaticSymbols(object): |
| 26 """Represents static symbol information.""" |
| 27 |
| 28 def __init__(self, maps, procedure_boundaries): |
| 29 self.maps = maps |
| 30 self.procedure_boundaries = procedure_boundaries |
| 31 |
| 32 # TODO(dmikurube): It will be deprecated. |
| 33 @staticmethod |
| 34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename): |
| 35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: |
| 36 maps = parse_proc_maps(f) |
| 37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f: |
| 38 nm_files = json.load(f) |
| 39 |
| 40 symbol_tables = {} |
| 41 for entry in maps.iter(executable_condition): |
| 42 if nm_files.has_key(entry.name): |
| 43 if nm_files[entry.name]['format'] == 'bsd': |
| 44 with open(os.path.join(prepared_data_dir, |
| 45 nm_files[entry.name]['file']), mode='r') as f: |
| 46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( |
| 47 f, nm_files[entry.name]['mangled']) |
| 48 |
| 49 return StaticSymbols(maps, symbol_tables) |
| 50 |
| 51 @staticmethod |
| 52 def _load_files(prepared_data_dir, maps_filename, files_filename): |
| 53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: |
| 54 maps = parse_proc_maps(f) |
| 55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f: |
| 56 files = json.load(f) |
| 57 |
| 58 symbol_tables = {} |
| 59 for entry in maps.iter(executable_condition): |
| 60 if entry.name in files: |
| 61 if 'nm' in files[entry.name]: |
| 62 nm_entry = files[entry.name]['nm'] |
| 63 if nm_entry['format'] == 'bsd': |
| 64 with open(os.path.join(prepared_data_dir, nm_entry['file']), |
| 65 mode='r') as f: |
| 66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( |
| 67 f, nm_entry['mangled']) |
| 68 if 'readelf-e' in files: |
| 69 readelf_entry = files[entry.name]['readelf-e'] |
| 70 # TODO(dmikurube) Implement it. |
| 71 |
| 72 return StaticSymbols(maps, symbol_tables) |
| 73 |
| 74 @staticmethod |
| 75 def load(prepared_data_dir): |
| 76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')): |
| 77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json') |
| 78 else: |
| 79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json') |
| 80 |
| 81 |
21 class ProcedureBoundary(object): | 82 class ProcedureBoundary(object): |
22 """A class for a procedure symbol and an address range for the symbol.""" | 83 """A class for a procedure symbol and an address range for the symbol.""" |
23 | 84 |
24 def __init__(self, start, end, name): | 85 def __init__(self, start, end, name): |
25 self.start = start | 86 self.start = start |
26 self.end = end | 87 self.end = end |
27 self.name = name | 88 self.name = name |
28 | 89 |
29 | 90 |
30 class ProcedureBoundaryTable(object): | 91 class ProcedureBoundaryTable(object): |
(...skipping 27 matching lines...) Expand all Loading... |
58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) | 119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) |
59 if not number: | 120 if not number: |
60 break | 121 break |
61 while True: | 122 while True: |
62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) | 123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) |
63 if not number: | 124 if not number: |
64 break | 125 break |
65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) | 126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) |
66 | 127 |
67 | 128 |
68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False): | 129 def _parse_nm_bsd_line(line): |
| 130 if line[8] == ' ': |
| 131 return line[0:8], line[9], line[11:] |
| 132 elif line[16] == ' ': |
| 133 return line[0:16], line[17], line[19:] |
| 134 raise ParsingException('Invalid nm output.') |
| 135 |
| 136 |
| 137 def _get_static_symbols_from_nm_bsd(f, mangled=False): |
69 """Gets procedure boundaries from a result of nm -n --format bsd. | 138 """Gets procedure boundaries from a result of nm -n --format bsd. |
70 | 139 |
71 Args: | 140 Args: |
72 f: A file object containing a result of nm. It must be sorted and | 141 f: A file object containing a result of nm. It must be sorted and |
73 in BSD-style. (Use "[eu-]nm -n --format bsd") | 142 in BSD-style. (Use "[eu-]nm -n --format bsd") |
74 | 143 |
75 Returns: | 144 Returns: |
76 A result ProcedureBoundaryTable object. | 145 A result ProcedureBoundaryTable object. |
77 """ | 146 """ |
78 symbol_table = ProcedureBoundaryTable() | 147 symbol_table = ProcedureBoundaryTable() |
79 | 148 |
80 last_start = 0 | 149 last_start = 0 |
81 routine = '' | 150 routine = '' |
82 | 151 |
83 for line in f: | 152 for line in f: |
84 symbol_info = line.rstrip().split(None, 2) | 153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line) |
85 if len(symbol_info) == 3: | |
86 if len(symbol_info[0]) == 1: | |
87 symbol_info = line.split(None, 1) | |
88 (sym_type, this_routine) = symbol_info | |
89 sym_value = '' | |
90 else: | |
91 (sym_value, sym_type, this_routine) = symbol_info | |
92 elif len(symbol_info) == 2: | |
93 if len(symbol_info[0]) == 1: | |
94 (sym_type, this_routine) = symbol_info | |
95 sym_value = '' | |
96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16: | |
97 (sym_value, this_routine) = symbol_info | |
98 sym_type = ' ' | |
99 else: | |
100 raise ParsingException('Invalid output 1 from (eu-)nm.') | |
101 else: | |
102 raise ParsingException('Invalid output 2 from (eu-)nm.') | |
103 | 154 |
104 if sym_value == '': | 155 if sym_value[0] == ' ': |
105 continue | 156 continue |
106 | 157 |
107 start_val = int(sym_value, 16) | 158 start_val = int(sym_value, 16) |
108 | 159 |
109 # It's possible for two symbols to share the same address, if | 160 # It's possible for two symbols to share the same address, if |
110 # one is a zero-length variable (like __start_google_malloc) or | 161 # one is a zero-length variable (like __start_google_malloc) or |
111 # one symbol is a weak alias to another (like __libc_malloc). | 162 # one symbol is a weak alias to another (like __libc_malloc). |
112 # In such cases, we want to ignore all values except for the | 163 # In such cases, we want to ignore all values except for the |
113 # actual symbol, which in nm-speak has type "T". The logic | 164 # actual symbol, which in nm-speak has type "T". The logic |
114 # below does this, though it's a bit tricky: what happens when | 165 # below does this, though it's a bit tricky: what happens when |
115 # we have a series of lines with the same address, is the first | 166 # we have a series of lines with the same address, is the first |
116 # one gets queued up to be processed. However, it won't | 167 # one gets queued up to be processed. However, it won't |
117 # *actually* be processed until later, when we read a line with | 168 # *actually* be processed until later, when we read a line with |
118 # a different address. That means that as long as we're reading | 169 # a different address. That means that as long as we're reading |
119 # lines with the same address, we have a chance to replace that | 170 # lines with the same address, we have a chance to replace that |
120 # item in the queue, which we do whenever we see a 'T' entry -- | 171 # item in the queue, which we do whenever we see a 'T' entry -- |
121 # that is, a line with type 'T'. If we never see a 'T' entry, | 172 # that is, a line with type 'T'. If we never see a 'T' entry, |
122 # we'll just go ahead and process the first entry (which never | 173 # we'll just go ahead and process the first entry (which never |
123 # got touched in the queue), and ignore the others. | 174 # got touched in the queue), and ignore the others. |
124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): | 175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): |
125 # We are the 'T' symbol at this address, replace previous symbol. | 176 # We are the 'T' symbol at this address, replace previous symbol. |
126 routine = this_routine | 177 routine = sym_name |
127 continue | 178 continue |
128 elif start_val == last_start: | 179 elif start_val == last_start: |
129 # We're not the 'T' symbol at this address, so ignore us. | 180 # We're not the 'T' symbol at this address, so ignore us. |
130 continue | 181 continue |
131 | 182 |
132 # Tag this routine with the starting address in case the image | 183 # Tag this routine with the starting address in case the image |
133 # has multiple occurrences of this routine. We use a syntax | 184 # has multiple occurrences of this routine. We use a syntax |
134 # that resembles template paramters that are automatically | 185 # that resembles template paramters that are automatically |
135 # stripped out by ShortFunctionName() | 186 # stripped out by ShortFunctionName() |
136 this_routine += "<%016x>" % start_val | 187 sym_name += "<%016x>" % start_val |
137 | 188 |
138 if not mangled: | 189 if not mangled: |
139 routine = _get_short_function_name(routine) | 190 routine = _get_short_function_name(routine) |
140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) | 191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) |
141 | 192 |
142 last_start = start_val | 193 last_start = start_val |
143 routine = this_routine | 194 routine = sym_name |
144 | 195 |
145 if not mangled: | 196 if not mangled: |
146 routine = _get_short_function_name(routine) | 197 routine = _get_short_function_name(routine) |
147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) | 198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) |
148 return symbol_table | 199 return symbol_table |
OLD | NEW |