OLD | NEW |
| (Empty) |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import bisect | |
6 import os | |
7 import re | |
8 import sys | |
9 | |
10 | |
11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') | |
12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') | |
13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') | |
14 | |
15 | |
16 class ParsingException(Exception): | |
17 def __str__(self): | |
18 return repr(self.args[0]) | |
19 | |
20 | |
21 class ProcedureBoundary(object): | |
22 """A class for a procedure symbol and an address range for the symbol.""" | |
23 | |
24 def __init__(self, start, end, name): | |
25 self.start = start | |
26 self.end = end | |
27 self.name = name | |
28 | |
29 | |
30 class ProcedureBoundaryTable(object): | |
31 """A class of a set of ProcedureBoundary.""" | |
32 | |
33 def __init__(self): | |
34 self.sorted_value_list = [] | |
35 self.dictionary = {} | |
36 self.sorted = True | |
37 | |
38 def append(self, entry): | |
39 if self.sorted_value_list: | |
40 if self.sorted_value_list[-1] > entry.start: | |
41 self.sorted = False | |
42 elif self.sorted_value_list[-1] == entry.start: | |
43 return | |
44 self.sorted_value_list.append(entry.start) | |
45 self.dictionary[entry.start] = entry | |
46 | |
47 def find_procedure(self, address): | |
48 if not self.sorted: | |
49 self.sorted_value_list.sort() | |
50 self.sorted = True | |
51 found_index = bisect.bisect_left(self.sorted_value_list, address) | |
52 found_start_address = self.sorted_value_list[found_index - 1] | |
53 return self.dictionary[found_start_address] | |
54 | |
55 | |
56 def _get_short_function_name(function): | |
57 while True: | |
58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) | |
59 if not number: | |
60 break | |
61 while True: | |
62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) | |
63 if not number: | |
64 break | |
65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) | |
66 | |
67 | |
68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False): | |
69 """Gets procedure boundaries from a result of nm -n --format bsd. | |
70 | |
71 Args: | |
72 f: A file object containing a result of nm. It must be sorted and | |
73 in BSD-style. (Use "[eu-]nm -n --format bsd") | |
74 | |
75 Returns: | |
76 A result ProcedureBoundaryTable object. | |
77 """ | |
78 symbol_table = ProcedureBoundaryTable() | |
79 | |
80 last_start = 0 | |
81 routine = '' | |
82 | |
83 for line in f: | |
84 symbol_info = line.rstrip().split(None, 2) | |
85 if len(symbol_info) == 3: | |
86 if len(symbol_info[0]) == 1: | |
87 symbol_info = line.split(None, 1) | |
88 (sym_type, this_routine) = symbol_info | |
89 sym_value = '' | |
90 else: | |
91 (sym_value, sym_type, this_routine) = symbol_info | |
92 elif len(symbol_info) == 2: | |
93 if len(symbol_info[0]) == 1: | |
94 (sym_type, this_routine) = symbol_info | |
95 sym_value = '' | |
96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16: | |
97 (sym_value, this_routine) = symbol_info | |
98 sym_type = ' ' | |
99 else: | |
100 raise ParsingException('Invalid output 1 from (eu-)nm.') | |
101 else: | |
102 raise ParsingException('Invalid output 2 from (eu-)nm.') | |
103 | |
104 if sym_value == '': | |
105 continue | |
106 | |
107 start_val = int(sym_value, 16) | |
108 | |
109 # It's possible for two symbols to share the same address, if | |
110 # one is a zero-length variable (like __start_google_malloc) or | |
111 # one symbol is a weak alias to another (like __libc_malloc). | |
112 # In such cases, we want to ignore all values except for the | |
113 # actual symbol, which in nm-speak has type "T". The logic | |
114 # below does this, though it's a bit tricky: what happens when | |
115 # we have a series of lines with the same address, is the first | |
116 # one gets queued up to be processed. However, it won't | |
117 # *actually* be processed until later, when we read a line with | |
118 # a different address. That means that as long as we're reading | |
119 # lines with the same address, we have a chance to replace that | |
120 # item in the queue, which we do whenever we see a 'T' entry -- | |
121 # that is, a line with type 'T'. If we never see a 'T' entry, | |
122 # we'll just go ahead and process the first entry (which never | |
123 # got touched in the queue), and ignore the others. | |
124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): | |
125 # We are the 'T' symbol at this address, replace previous symbol. | |
126 routine = this_routine | |
127 continue | |
128 elif start_val == last_start: | |
129 # We're not the 'T' symbol at this address, so ignore us. | |
130 continue | |
131 | |
132 # Tag this routine with the starting address in case the image | |
133 # has multiple occurrences of this routine. We use a syntax | |
134 # that resembles template paramters that are automatically | |
135 # stripped out by ShortFunctionName() | |
136 this_routine += "<%016x>" % start_val | |
137 | |
138 if not mangled: | |
139 routine = _get_short_function_name(routine) | |
140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) | |
141 | |
142 last_start = start_val | |
143 routine = this_routine | |
144 | |
145 if not mangled: | |
146 routine = _get_short_function_name(routine) | |
147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) | |
148 return symbol_table | |
OLD | NEW |