tools/find_runtime_symbols/static_symbols.py - Issue 10826008: Load static symbol information lazily with some clean-ups.

Side by Side Diff: tools/find_runtime_symbols/static_symbols.py

Issue 10826008: Load static symbol information lazily with some clean-ups. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: refine Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import bisect	5 import bisect

	6 import json

6 import os	7 import os

7 import re	8 import re

8 import sys	9 import sys

9	10

	11 from parse_proc_maps import parse_proc_maps

	12 from util import executable_condition

	13

10	14

11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')	15 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')

12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')	16 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')

13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')	17 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')

14	18

15	19

16 class ParsingException(Exception):	20 class ParsingException(Exception):

17 def __str__(self):	21 def __str__(self):

18 return repr(self.args[0])	22 return repr(self.args[0])

19	23

20	24

	25 class StaticSymbols(object):

	26 """Represents static symbol information."""

	27

	28 def __init__(self, maps, procedure_boundaries):

	29 self.maps = maps

	30 self.procedure_boundaries = procedure_boundaries

	31

	32 # TODO(dmikurube): It will be deprecated.

	33 @staticmethod

	34 def _load_nm(prepared_data_dir, maps_filename, nm_json_filename):

	35 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:

	36 maps = parse_proc_maps(f)

	37 with open(os.path.join(prepared_data_dir, nm_json_filename), mode='r') as f:

	38 nm_files = json.load(f)

	39

	40 symbol_tables = {}

	41 for entry in maps.iter(executable_condition):

	42 if nm_files.has_key(entry.name):

	43 if nm_files[entry.name]['format'] == 'bsd':

	44 with open(os.path.join(prepared_data_dir,

	45 nm_files[entry.name]['file']), mode='r') as f:

	46 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(

	47 f, nm_files[entry.name]['mangled'])

	48

	49 return StaticSymbols(maps, symbol_tables)

	50

	51 @staticmethod

	52 def _load_files(prepared_data_dir, maps_filename, files_filename):

	53 with open(os.path.join(prepared_data_dir, maps_filename), mode='r') as f:

	54 maps = parse_proc_maps(f)

	55 with open(os.path.join(prepared_data_dir, files_filename), mode='r') as f:

	56 files = json.load(f)

	57

	58 symbol_tables = {}

	59 for entry in maps.iter(executable_condition):

	60 if entry.name in files:

	61 if 'nm' in files[entry.name]:

	62 nm_entry = files[entry.name]['nm']

	63 if nm_entry['format'] == 'bsd':

	64 with open(os.path.join(prepared_data_dir, nm_entry['file']),

	65 mode='r') as f:

	66 symbol_tables[entry.name] = _get_static_symbols_from_nm_bsd(

	67 f, nm_entry['mangled'])

	68 if 'readelf-e' in files:

	69 readelf_entry = files[entry.name]['readelf-e']

	70 # TODO(dmikurube) Implement it.

	71

	72 return StaticSymbols(maps, symbol_tables)

	73

	74 @staticmethod

	75 def load(prepared_data_dir):

	76 if os.path.exists(os.path.join(prepared_data_dir, 'nm.json')):

	77 return StaticSymbols._load_nm(prepared_data_dir, 'maps', 'nm.json')

	78 else:

	79 return StaticSymbols._load_files(prepared_data_dir, 'maps', 'files.json')

	80

	81

21 class ProcedureBoundary(object):	82 class ProcedureBoundary(object):

22 """A class for a procedure symbol and an address range for the symbol."""	83 """A class for a procedure symbol and an address range for the symbol."""

23	84

24 def __init__(self, start, end, name):	85 def __init__(self, start, end, name):

25 self.start = start	86 self.start = start

26 self.end = end	87 self.end = end

27 self.name = name	88 self.name = name

28	89

29	90

30 class ProcedureBoundaryTable(object):	91 class ProcedureBoundaryTable(object):

(...skipping 27 matching lines...) Expand all Loading...
58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)	119 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)

59 if not number:	120 if not number:

60 break	121 break

61 while True:	122 while True:

62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)	123 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)

63 if not number:	124 if not number:

64 break	125 break

65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)	126 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

66	127

67	128

68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False):	129 def _parse_nm_bsd_line(line):

	130 if line[8] == ' ':

	131 return line[0:8], line[9], line[11:]

	132 elif line[16] == ' ':

	133 return line[0:16], line[17], line[19:]

	134 raise ParsingException('Invalid nm output.')

	135

	136

	137 def _get_static_symbols_from_nm_bsd(f, mangled=False):

69 """Gets procedure boundaries from a result of nm -n --format bsd.	138 """Gets procedure boundaries from a result of nm -n --format bsd.

70	139

71 Args:	140 Args:

72 f: A file object containing a result of nm. It must be sorted and	141 f: A file object containing a result of nm. It must be sorted and

73 in BSD-style. (Use "[eu-]nm -n --format bsd")	142 in BSD-style. (Use "[eu-]nm -n --format bsd")

74	143

75 Returns:	144 Returns:

76 A result ProcedureBoundaryTable object.	145 A result ProcedureBoundaryTable object.

77 """	146 """

78 symbol_table = ProcedureBoundaryTable()	147 symbol_table = ProcedureBoundaryTable()

79	148

80 last_start = 0	149 last_start = 0

81 routine = ''	150 routine = ''

82	151

83 for line in f:	152 for line in f:

84 symbol_info = line.rstrip().split(None, 2)	153 sym_value, sym_type, sym_name = _parse_nm_bsd_line(line)

85 if len(symbol_info) == 3:

86 if len(symbol_info[0]) == 1:

87 symbol_info = line.split(None, 1)

88 (sym_type, this_routine) = symbol_info

89 sym_value = ''

90 else:

91 (sym_value, sym_type, this_routine) = symbol_info

92 elif len(symbol_info) == 2:

93 if len(symbol_info[0]) == 1:

94 (sym_type, this_routine) = symbol_info

95 sym_value = ''

96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16:

97 (sym_value, this_routine) = symbol_info

98 sym_type = ' '

99 else:

100 raise ParsingException('Invalid output 1 from (eu-)nm.')

101 else:

102 raise ParsingException('Invalid output 2 from (eu-)nm.')

103	154

104 if sym_value == '':	155 if sym_value[0] == ' ':

105 continue	156 continue

106	157

107 start_val = int(sym_value, 16)	158 start_val = int(sym_value, 16)

108	159

109 # It's possible for two symbols to share the same address, if	160 # It's possible for two symbols to share the same address, if

110 # one is a zero-length variable (like __start_google_malloc) or	161 # one is a zero-length variable (like __start_google_malloc) or

111 # one symbol is a weak alias to another (like __libc_malloc).	162 # one symbol is a weak alias to another (like __libc_malloc).

112 # In such cases, we want to ignore all values except for the	163 # In such cases, we want to ignore all values except for the

113 # actual symbol, which in nm-speak has type "T". The logic	164 # actual symbol, which in nm-speak has type "T". The logic

114 # below does this, though it's a bit tricky: what happens when	165 # below does this, though it's a bit tricky: what happens when

115 # we have a series of lines with the same address, is the first	166 # we have a series of lines with the same address, is the first

116 # one gets queued up to be processed. However, it won't	167 # one gets queued up to be processed. However, it won't

117 # actually be processed until later, when we read a line with	168 # actually be processed until later, when we read a line with

118 # a different address. That means that as long as we're reading	169 # a different address. That means that as long as we're reading

119 # lines with the same address, we have a chance to replace that	170 # lines with the same address, we have a chance to replace that

120 # item in the queue, which we do whenever we see a 'T' entry --	171 # item in the queue, which we do whenever we see a 'T' entry --

121 # that is, a line with type 'T'. If we never see a 'T' entry,	172 # that is, a line with type 'T'. If we never see a 'T' entry,

122 # we'll just go ahead and process the first entry (which never	173 # we'll just go ahead and process the first entry (which never

123 # got touched in the queue), and ignore the others.	174 # got touched in the queue), and ignore the others.

124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):	175 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):

125 # We are the 'T' symbol at this address, replace previous symbol.	176 # We are the 'T' symbol at this address, replace previous symbol.

126 routine = this_routine	177 routine = sym_name

127 continue	178 continue

128 elif start_val == last_start:	179 elif start_val == last_start:

129 # We're not the 'T' symbol at this address, so ignore us.	180 # We're not the 'T' symbol at this address, so ignore us.

130 continue	181 continue

131	182

132 # Tag this routine with the starting address in case the image	183 # Tag this routine with the starting address in case the image

133 # has multiple occurrences of this routine. We use a syntax	184 # has multiple occurrences of this routine. We use a syntax

134 # that resembles template paramters that are automatically	185 # that resembles template paramters that are automatically

135 # stripped out by ShortFunctionName()	186 # stripped out by ShortFunctionName()

136 this_routine += "<%016x>" % start_val	187 sym_name += "<%016x>" % start_val

137	188

138 if not mangled:	189 if not mangled:

139 routine = _get_short_function_name(routine)	190 routine = _get_short_function_name(routine)

140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine))	191 symbol_table.append(ProcedureBoundary(last_start, start_val, routine))

141	192

142 last_start = start_val	193 last_start = start_val

143 routine = this_routine	194 routine = sym_name

144	195

145 if not mangled:	196 if not mangled:

146 routine = _get_short_function_name(routine)	197 routine = _get_short_function_name(routine)

147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))	198 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))

148 return symbol_table	199 return symbol_table

OLD	NEW

« no previous file with comments | « tools/find_runtime_symbols/procedure_boundaries.py ('k') | no next file » | no next file with comments »