OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Symbolize log file produced by cypgofile instrumentation. |
| 7 |
| 8 Given a log file and the binary being profiled (e.g. executable, shared |
| 9 library), the script can produce three different outputs: 1) symbols for the |
| 10 addresses, 2) function and line numbers for the addresses, or 3) an order file. |
| 11 """ |
| 12 |
| 13 import optparse |
| 14 import os |
| 15 import string |
| 16 import subprocess |
| 17 import sys |
| 18 |
| 19 |
| 20 def ParseLogLines(log_file_lines): |
| 21 """Parse a log file produced by the profiled run of clank. |
| 22 |
| 23 Args: |
| 24 log_file_lines: array of lines in log file produced by profiled run |
| 25 lib_name: library or executable containing symbols |
| 26 |
| 27 Below is an example of a small log file: |
| 28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
| 29 secs msecs pid:threadid func |
| 30 START |
| 31 1314897086 795828 3587:1074648168 0x509e105c |
| 32 1314897086 795874 3587:1074648168 0x509e0eb4 |
| 33 1314897086 796326 3587:1074648168 0x509e0e3c |
| 34 1314897086 796552 3587:1074648168 0x509e07bc |
| 35 END |
| 36 |
| 37 Returns: |
| 38 call_info list with list of tuples of the format (sec, msec, call id, |
| 39 function address called) |
| 40 """ |
| 41 call_lines = [] |
| 42 has_started = False |
| 43 vm_start = 0 |
| 44 line = log_file_lines[0] |
| 45 assert("r-xp" in line) |
| 46 end_index = line.find('-') |
| 47 vm_start = int(line[:end_index], 16) |
| 48 for line in log_file_lines[2:]: |
| 49 # print hex(vm_start) |
| 50 fields = line.split() |
| 51 if len(fields) == 4: |
| 52 call_lines.append(fields) |
| 53 |
| 54 # Convert strings to int in fields. |
| 55 call_info = [] |
| 56 for call_line in call_lines: |
| 57 (sec_timestamp, msec_timestamp) = map(int, call_line[0:2]) |
| 58 callee_id = call_line[2] |
| 59 addr = int(call_line[3], 16) |
| 60 if vm_start < addr: |
| 61 addr -= vm_start |
| 62 call_info.append((sec_timestamp, msec_timestamp, callee_id, addr)) |
| 63 |
| 64 return call_info |
| 65 |
| 66 |
| 67 def ParseLibSymbols(lib_file): |
| 68 """Get output from running nm and greping for text symbols. |
| 69 |
| 70 Args: |
| 71 lib_file: the library or executable that contains the profiled code |
| 72 |
| 73 Returns: |
| 74 list of sorted unique addresses and corresponding size of function symbols |
| 75 in lib_file and map of addresses to all symbols at a particular address |
| 76 """ |
| 77 cmd = ['nm', '-S', '-n', lib_file] |
| 78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 79 output = nm_p.communicate()[0] |
| 80 nm_lines = output.split('\n') |
| 81 |
| 82 nm_symbols = [] |
| 83 for nm_line in nm_lines: |
| 84 if any(str in nm_line for str in (' t ', ' W ', ' T ')): |
| 85 nm_symbols.append(nm_line) |
| 86 |
| 87 nm_index = 0 |
| 88 unique_addrs = [] |
| 89 address_map = {} |
| 90 while nm_index < len(nm_symbols): |
| 91 |
| 92 # If the length of the split line is not 4, then it does not contain all the |
| 93 # information needed to symbolize (i.e. address, size and symbol name). |
| 94 if len(nm_symbols[nm_index].split()) == 4: |
| 95 (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]] |
| 96 |
| 97 # Multiple symbols may be at the same address. This is do to aliasing |
| 98 # done by the compiler. Since there is no way to be sure which one was |
| 99 # called in profiled run, we will symbolize to include all symbol names at |
| 100 # a particular address. |
| 101 fnames = [] |
| 102 while (nm_index < len(nm_symbols) and |
| 103 addr == int(nm_symbols[nm_index].split()[0], 16)): |
| 104 if len(nm_symbols[nm_index].split()) == 4: |
| 105 fnames.append(nm_symbols[nm_index].split()[3]) |
| 106 nm_index += 1 |
| 107 address_map[addr] = fnames |
| 108 unique_addrs.append((addr, size)) |
| 109 else: |
| 110 nm_index += 1 |
| 111 |
| 112 return (unique_addrs, address_map) |
| 113 |
| 114 class SymbolNotFoundException(Exception): |
| 115 def __init__(self,value): |
| 116 self.value = value |
| 117 def __str__(self): |
| 118 return repr(self.value) |
| 119 |
| 120 def BinarySearchAddresses(addr, start, end, arr): |
| 121 """Find starting address of a symbol at a particular address. |
| 122 |
| 123 The reason we can not directly use the address provided by the log file is |
| 124 that the log file may give an address after the start of the symbol. The |
| 125 logged address is often one byte after the start. By using this search |
| 126 function rather than just subtracting one from the logged address allows |
| 127 the logging instrumentation to log any address in a function. |
| 128 |
| 129 Args: |
| 130 addr: the address being searched for |
| 131 start: the starting index for the binary search |
| 132 end: the ending index for the binary search |
| 133 arr: the list being searched containing tuple of address and size |
| 134 |
| 135 Returns: |
| 136 the starting address of the symbol at address addr |
| 137 |
| 138 Raises: |
| 139 Exception: if address not found. Functions expects all logged addresses |
| 140 to be found |
| 141 """ |
| 142 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end) |
| 143 if start >= end or start == end - 1: |
| 144 # arr[i] is a tuple of address and size. Check if addr inside range |
| 145 if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]: |
| 146 return arr[start][0] |
| 147 elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]: |
| 148 return arr[end][0] |
| 149 else: |
| 150 raise SymbolNotFoundException(addr) |
| 151 else: |
| 152 halfway = (start + end) / 2 |
| 153 (nm_addr, size) = arr[halfway] |
| 154 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway) |
| 155 if addr >= nm_addr and addr < nm_addr + size: |
| 156 return nm_addr |
| 157 elif addr < nm_addr: |
| 158 return BinarySearchAddresses(addr, start, halfway-1, arr) |
| 159 else: |
| 160 # Condition (addr >= nm_addr + size) must be true. |
| 161 return BinarySearchAddresses(addr, halfway+1, end, arr) |
| 162 |
| 163 |
| 164 def FindFunctions(addr, unique_addrs, address_map): |
| 165 """Find function symbol names at address addr.""" |
| 166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, |
| 167 unique_addrs)] |
| 168 |
| 169 |
| 170 def AddrToLine(addr, lib_file): |
| 171 """Use addr2line to determine line info of a particular address.""" |
| 172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] |
| 173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 174 output = (p.communicate()[0]).split('\n') |
| 175 line = output[0] |
| 176 index = 1 |
| 177 while index < len(output): |
| 178 line = line + ':' + output[index] |
| 179 index += 1 |
| 180 return line |
| 181 |
| 182 |
| 183 def main(): |
| 184 """Write output for profiled run to standard out. |
| 185 |
| 186 The format of the output depends on the output type specified as the third |
| 187 command line argument. The default output type is to symbolize the addresses |
| 188 of the functions called. |
| 189 """ |
| 190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') |
| 191 parser.add_option('-t', '--outputType', dest='output_type', |
| 192 default='symbolize', type='string', |
| 193 help='lineize or symbolize or orderfile') |
| 194 |
| 195 # Option for output type. The log file and lib file arguments are required |
| 196 # by the script and therefore are not options. |
| 197 (options, args) = parser.parse_args() |
| 198 if len(args) != 2: |
| 199 parser.error('expected 2 args: log_file lib_file') |
| 200 |
| 201 (log_file, lib_file) = args |
| 202 output_type = options.output_type |
| 203 |
| 204 lib_name = lib_file.split('/')[-1].strip() |
| 205 log_file_lines = map(string.rstrip, open(log_file).readlines()) |
| 206 call_info = ParseLogLines(log_file_lines) |
| 207 (unique_addrs, address_map) = ParseLibSymbols(lib_file) |
| 208 |
| 209 # Check for duplicate addresses in the log file, and print a warning if |
| 210 # duplicates are found. The instrumentation that produces the log file |
| 211 # should only print the first time a function is entered. |
| 212 addr_list = [] |
| 213 for call in call_info: |
| 214 addr = call[3] |
| 215 if addr not in addr_list: |
| 216 addr_list.append(addr) |
| 217 else: |
| 218 print('WARNING: Address ' + hex(addr) + ' (line= ' + |
| 219 AddrToLine(addr, lib_file) + ') already profiled.') |
| 220 |
| 221 for call in call_info: |
| 222 if output_type == 'lineize': |
| 223 symbol = AddrToLine(call[3], lib_file) |
| 224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
| 225 + symbol) |
| 226 elif output_type == 'orderfile': |
| 227 try: |
| 228 symbols = FindFunctions(call[3], unique_addrs, address_map) |
| 229 for symbol in symbols: |
| 230 print '.text.' + symbol |
| 231 print '' |
| 232 except SymbolNotFoundException as e: |
| 233 sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
| 234 + hex(addr) + '\n') |
| 235 else: |
| 236 try: |
| 237 symbols = FindFunctions(call[3], unique_addrs, address_map) |
| 238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
| 239 + symbols[0]) |
| 240 first_symbol = True |
| 241 for symbol in symbols: |
| 242 if not first_symbol: |
| 243 print '\t\t\t\t\t' + symbol |
| 244 else: |
| 245 first_symbol = False |
| 246 except SymbolNotFoundException as e: |
| 247 sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
| 248 + hex(addr) + '\n') |
| 249 |
| 250 if __name__ == '__main__': |
| 251 main() |
OLD | NEW |