| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import bisect | 5 import bisect |
| 6 import json | |
| 7 import os | 6 import os |
| 8 import re | 7 import re |
| 9 import sys | 8 import sys |
| 10 | 9 |
| 11 from parse_proc_maps import parse_proc_maps | |
| 12 from util import executable_condition | |
| 13 | |
| 14 | 10 |
| 15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') | 11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') |
| 16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') | 12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') |
| 17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') | 13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') |
| 14 _READELF_SECTION_HEADER_PATTER = re.compile( |
| 15 '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+' |
| 16 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+' |
| 17 '([0-9]+)\s+([0-9]+)\s+([0-9]+)') |
| 18 | 18 |
| 19 | 19 |
| 20 class ParsingException(Exception): | 20 class ParsingException(Exception): |
| 21 def __str__(self): | 21 def __str__(self): |
| 22 return repr(self.args[0]) | 22 return repr(self.args[0]) |
| 23 | 23 |
| 24 | 24 |
| 25 class StaticSymbols(object): | 25 class AddressMapping(object): |
| 26 """Represents static symbol information.""" | 26 def __init__(self): |
| 27 self._symbol_map = {} |
| 27 | 28 |
| 28 def __init__(self, maps, procedure_boundaries): | 29 def append(self, start, entry): |
| 29 self.maps = maps | 30 self._symbol_map[start] = entry |
| 30 self.procedure_boundaries = procedure_boundaries | |
| 31 | 31 |
| 32 # TODO(dmikurube): It will be deprecated. | 32 def find(self, address): |
| 33 @staticmethod | 33 return self._symbol_map.get(address) |
| 34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename): | |
| 35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: | |
| 36 maps = parse_proc_maps(f) | |
| 37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f: | |
| 38 nm_files = json.load(f) | |
| 39 | |
| 40 symbol_tables = {} | |
| 41 for entry in maps.iter(executable_condition): | |
| 42 if nm_files.has_key(entry.name): | |
| 43 if nm_files[entry.name]['format'] == 'bsd': | |
| 44 with open(os.path.join(prepared_data_dir, | |
| 45 nm_files[entry.name]['file']), mode='r') as f: | |
| 46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( | |
| 47 f, nm_files[entry.name]['mangled']) | |
| 48 | |
| 49 return StaticSymbols(maps, symbol_tables) | |
| 50 | |
| 51 @staticmethod | |
| 52 def _load_files(prepared_data_dir, maps_filename, files_filename): | |
| 53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f: | |
| 54 maps = parse_proc_maps(f) | |
| 55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f: | |
| 56 files = json.load(f) | |
| 57 | |
| 58 symbol_tables = {} | |
| 59 for entry in maps.iter(executable_condition): | |
| 60 if entry.name in files: | |
| 61 if 'nm' in files[entry.name]: | |
| 62 nm_entry = files[entry.name]['nm'] | |
| 63 if nm_entry['format'] == 'bsd': | |
| 64 with open(os.path.join(prepared_data_dir, nm_entry['file']), | |
| 65 mode='r') as f: | |
| 66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd( | |
| 67 f, nm_entry['mangled']) | |
| 68 if 'readelf-e' in files: | |
| 69 readelf_entry = files[entry.name]['readelf-e'] | |
| 70 # TODO(dmikurube) Implement it. | |
| 71 | |
| 72 return StaticSymbols(maps, symbol_tables) | |
| 73 | |
| 74 @staticmethod | |
| 75 def load(prepared_data_dir): | |
| 76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')): | |
| 77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json') | |
| 78 else: | |
| 79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json') | |
| 80 | 34 |
| 81 | 35 |
| 82 class ProcedureBoundary(object): | 36 class RangeAddressMapping(AddressMapping): |
| 37 def __init__(self): |
| 38 AddressMapping.__init__(self) |
| 39 self._sorted_start_list = [] |
| 40 self._is_sorted = True |
| 41 |
| 42 def append(self, start, entry): |
| 43 if self._sorted_start_list: |
| 44 if self._sorted_start_list[-1] > start: |
| 45 self._is_sorted = False |
| 46 elif self._sorted_start_list[-1] == start: |
| 47 return |
| 48 self._sorted_start_list.append(start) |
| 49 self._symbol_map[start] = entry |
| 50 |
| 51 def find(self, address): |
| 52 if not self._is_sorted: |
| 53 self._sorted_start_list.sort() |
| 54 self._is_sorted = True |
| 55 found_index = bisect.bisect_left(self._sorted_start_list, address) |
| 56 found_start_address = self._sorted_start_list[found_index - 1] |
| 57 return self._symbol_map[found_start_address] |
| 58 |
| 59 |
| 60 class Procedure(object): |
| 83 """A class for a procedure symbol and an address range for the symbol.""" | 61 """A class for a procedure symbol and an address range for the symbol.""" |
| 84 | 62 |
| 85 def __init__(self, start, end, name): | 63 def __init__(self, start, end, name): |
| 86 self.start = start | 64 self.start = start |
| 87 self.end = end | 65 self.end = end |
| 88 self.name = name | 66 self.name = name |
| 89 | 67 |
| 68 def __eq__(self, other): |
| 69 return (self.start == other.start and |
| 70 self.end == other.end and |
| 71 self.name == other.name) |
| 90 | 72 |
| 91 class ProcedureBoundaryTable(object): | 73 def __ne__(self, other): |
| 92 """A class of a set of ProcedureBoundary.""" | 74 return not self.__eq__(other) |
| 93 | 75 |
| 94 def __init__(self): | 76 def __str__(self): |
| 95 self.sorted_value_list = [] | 77 return '%x-%x: %s' % (self.start, self.end, self.name) |
| 96 self.dictionary = {} | |
| 97 self.sorted = True | |
| 98 | |
| 99 def append(self, entry): | |
| 100 if self.sorted_value_list: | |
| 101 if self.sorted_value_list[-1] > entry.start: | |
| 102 self.sorted = False | |
| 103 elif self.sorted_value_list[-1] == entry.start: | |
| 104 return | |
| 105 self.sorted_value_list.append(entry.start) | |
| 106 self.dictionary[entry.start] = entry | |
| 107 | |
| 108 def find_procedure(self, address): | |
| 109 if not self.sorted: | |
| 110 self.sorted_value_list.sort() | |
| 111 self.sorted = True | |
| 112 found_index = bisect.bisect_left(self.sorted_value_list, address) | |
| 113 found_start_address = self.sorted_value_list[found_index - 1] | |
| 114 return self.dictionary[found_start_address] | |
| 115 | 78 |
| 116 | 79 |
| 117 def _get_short_function_name(function): | 80 class ElfSection(object): |
| 118 while True: | 81 """A class for an elf section header.""" |
| 119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) | 82 |
| 120 if not number: | 83 def __init__( |
| 121 break | 84 self, number, name, stype, address, offset, size, es, flg, lk, inf, al): |
| 122 while True: | 85 self.number = number |
| 123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) | 86 self.name = name |
| 124 if not number: | 87 self.stype = stype |
| 125 break | 88 self.address = address |
| 126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) | 89 self.offset = offset |
| 90 self.size = size |
| 91 self.es = es |
| 92 self.flg = flg |
| 93 self.lk = lk |
| 94 self.inf = inf |
| 95 self.al = al |
| 96 |
| 97 def __eq__(self, other): |
| 98 return (self.number == other.number and |
| 99 self.name == other.name and |
| 100 self.stype == other.stype and |
| 101 self.address == other.address and |
| 102 self.offset == other.offset and |
| 103 self.size == other.size and |
| 104 self.es == other.es and |
| 105 self.flg == other.flg and |
| 106 self.lk == other.lk and |
| 107 self.inf == other.inf and |
| 108 self.al == other.al) |
| 109 |
| 110 def __ne__(self, other): |
| 111 return not self.__eq__(other) |
| 112 |
| 113 def __str__(self): |
| 114 return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name) |
| 127 | 115 |
| 128 | 116 |
| 129 def _parse_nm_bsd_line(line): | 117 class StaticSymbolsInFile(object): |
| 130 if line[8] == ' ': | 118 """Represents static symbol information in a binary file.""" |
| 131 return line[0:8], line[9], line[11:] | |
| 132 elif line[16] == ' ': | |
| 133 return line[0:16], line[17], line[19:] | |
| 134 raise ParsingException('Invalid nm output.') | |
| 135 | 119 |
| 120 def __init__(self, my_name): |
| 121 self.my_name = my_name |
| 122 self._elf_sections = [] |
| 123 self._procedures = RangeAddressMapping() |
| 124 self._typeinfos = AddressMapping() |
| 136 | 125 |
| 137 def _get_static_symbols_from_nm_bsd(f, mangled=False): | 126 def _append_elf_section(self, elf_section): |
| 138 """Gets procedure boundaries from a result of nm -n --format bsd. | 127 self._elf_sections.append(elf_section) |
| 139 | 128 |
| 140 Args: | 129 def _append_procedure(self, start, procedure): |
| 141 f: A file object containing a result of nm. It must be sorted and | 130 self._procedures.append(start, procedure) |
| 142 in BSD-style. (Use "[eu-]nm -n --format bsd") | |
| 143 | 131 |
| 144 Returns: | 132 def _append_typeinfo(self, start, typeinfo): |
| 145 A result ProcedureBoundaryTable object. | 133 self._typeinfos.append(start, typeinfo) |
| 146 """ | |
| 147 symbol_table = ProcedureBoundaryTable() | |
| 148 | 134 |
| 149 last_start = 0 | 135 def _find_symbol_by_runtime_address(self, address, vma, target): |
| 150 routine = '' | 136 if not (vma.begin <= address < vma.end): |
| 137 return None |
| 151 | 138 |
| 152 for line in f: | 139 if vma.name != self.my_name: |
| 153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line) | 140 return None |
| 154 | 141 |
| 155 if sym_value[0] == ' ': | 142 file_offset = address - (vma.begin - vma.offset) |
| 156 continue | 143 elf_address = None |
| 144 for section in self._elf_sections: |
| 145 if section.offset <= file_offset < (section.offset + section.size): |
| 146 elf_address = section.address + file_offset - section.offset |
| 147 if not elf_address: |
| 148 return None |
| 157 | 149 |
| 158 start_val = int(sym_value, 16) | 150 return target.find(elf_address) |
| 159 | 151 |
| 160 # It's possible for two symbols to share the same address, if | 152 def find_procedure_by_runtime_address(self, address, vma): |
| 161 # one is a zero-length variable (like __start_google_malloc) or | 153 return self._find_symbol_by_runtime_address(address, vma, self._procedures) |
| 162 # one symbol is a weak alias to another (like __libc_malloc). | 154 |
| 163 # In such cases, we want to ignore all values except for the | 155 def find_typeinfo_by_runtime_address(self, address, vma): |
| 164 # actual symbol, which in nm-speak has type "T". The logic | 156 return self._find_symbol_by_runtime_address(address, vma, self._typeinfos) |
| 165 # below does this, though it's a bit tricky: what happens when | 157 |
| 166 # we have a series of lines with the same address, is the first | 158 def load_readelf_ew(self, f): |
| 167 # one gets queued up to be processed. However, it won't | 159 found_header = False |
| 168 # *actually* be processed until later, when we read a line with | 160 for line in f: |
| 169 # a different address. That means that as long as we're reading | 161 if line.rstrip() == 'Section Headers:': |
| 170 # lines with the same address, we have a chance to replace that | 162 found_header = True |
| 171 # item in the queue, which we do whenever we see a 'T' entry -- | 163 break |
| 172 # that is, a line with type 'T'. If we never see a 'T' entry, | 164 if not found_header: |
| 173 # we'll just go ahead and process the first entry (which never | 165 return None |
| 174 # got touched in the queue), and ignore the others. | 166 |
| 175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): | 167 for line in f: |
| 176 # We are the 'T' symbol at this address, replace previous symbol. | 168 line = line.rstrip() |
| 169 matched = _READELF_SECTION_HEADER_PATTER.match(line) |
| 170 if matched: |
| 171 self._append_elf_section(ElfSection( |
| 172 int(matched.group(1), 10), # number |
| 173 matched.group(2), # name |
| 174 matched.group(3), # stype |
| 175 int(matched.group(4), 16), # address |
| 176 int(matched.group(5), 16), # offset |
| 177 int(matched.group(6), 16), # size |
| 178 matched.group(7), # es |
| 179 matched.group(8), # flg |
| 180 matched.group(9), # lk |
| 181 matched.group(10), # inf |
| 182 matched.group(11) # al |
| 183 )) |
| 184 else: |
| 185 if line in ('Key to Flags:', 'Program Headers:'): |
| 186 break |
| 187 |
| 188 def _parse_nm_bsd_line(self, line): |
| 189 if line[8] == ' ': |
| 190 return line[0:8], line[9], line[11:] |
| 191 elif line[16] == ' ': |
| 192 return line[0:16], line[17], line[19:] |
| 193 raise ParsingException('Invalid nm output.') |
| 194 |
| 195 def _get_short_function_name(self, function): |
| 196 while True: |
| 197 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) |
| 198 if not number: |
| 199 break |
| 200 while True: |
| 201 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) |
| 202 if not number: |
| 203 break |
| 204 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) |
| 205 |
| 206 def load_nm_bsd(self, f, mangled=False): |
| 207 last_start = 0 |
| 208 routine = '' |
| 209 |
| 210 for line in f: |
| 211 line = line.rstrip() |
| 212 sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line) |
| 213 |
| 214 if sym_value[0] == ' ': |
| 215 continue |
| 216 |
| 217 start_val = int(sym_value, 16) |
| 218 |
| 219 if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and |
| 220 (not mangled and sym_name.startswith('typeinfo'))): |
| 221 self._append_typeinfo(start_val, sym_name) |
| 222 |
| 223 # It's possible for two symbols to share the same address, if |
| 224 # one is a zero-length variable (like __start_google_malloc) or |
| 225 # one symbol is a weak alias to another (like __libc_malloc). |
| 226 # In such cases, we want to ignore all values except for the |
| 227 # actual symbol, which in nm-speak has type "T". The logic |
| 228 # below does this, though it's a bit tricky: what happens when |
| 229 # we have a series of lines with the same address, is the first |
| 230 # one gets queued up to be processed. However, it won't |
| 231 # *actually* be processed until later, when we read a line with |
| 232 # a different address. That means that as long as we're reading |
| 233 # lines with the same address, we have a chance to replace that |
| 234 # item in the queue, which we do whenever we see a 'T' entry -- |
| 235 # that is, a line with type 'T'. If we never see a 'T' entry, |
| 236 # we'll just go ahead and process the first entry (which never |
| 237 # got touched in the queue), and ignore the others. |
| 238 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): |
| 239 # We are the 'T' symbol at this address, replace previous symbol. |
| 240 routine = sym_name |
| 241 continue |
| 242 elif start_val == last_start: |
| 243 # We're not the 'T' symbol at this address, so ignore us. |
| 244 continue |
| 245 |
| 246 # Tag this routine with the starting address in case the image |
| 247 # has multiple occurrences of this routine. We use a syntax |
| 248 # that resembles template paramters that are automatically |
| 249 # stripped out by ShortFunctionName() |
| 250 sym_name += "<%016x>" % start_val |
| 251 |
| 252 if not mangled: |
| 253 routine = self._get_short_function_name(routine) |
| 254 self._append_procedure( |
| 255 last_start, Procedure(last_start, start_val, routine)) |
| 256 |
| 257 last_start = start_val |
| 177 routine = sym_name | 258 routine = sym_name |
| 178 continue | |
| 179 elif start_val == last_start: | |
| 180 # We're not the 'T' symbol at this address, so ignore us. | |
| 181 continue | |
| 182 | |
| 183 # Tag this routine with the starting address in case the image | |
| 184 # has multiple occurrences of this routine. We use a syntax | |
| 185 # that resembles template paramters that are automatically | |
| 186 # stripped out by ShortFunctionName() | |
| 187 sym_name += "<%016x>" % start_val | |
| 188 | 259 |
| 189 if not mangled: | 260 if not mangled: |
| 190 routine = _get_short_function_name(routine) | 261 routine = self._get_short_function_name(routine) |
| 191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) | 262 self._append_procedure( |
| 192 | 263 last_start, Procedure(last_start, last_start, routine)) |
| 193 last_start = start_val | |
| 194 routine = sym_name | |
| 195 | |
| 196 if not mangled: | |
| 197 routine = _get_short_function_name(routine) | |
| 198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) | |
| 199 return symbol_table | |
| OLD | NEW |