tools/cygprofile/symbolize.py - Issue 16151006: Add scripts for using cygprofile to repository

Side by Side Diff: tools/cygprofile/symbolize.py

Issue 16151006: Add scripts for using cygprofile to repository (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Cygprofile scripts - reformat to Chromium standard Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Symbolize log file produced by cypgofile instrumentation.

	7

	8 Given a log file and the binary being profiled (e.g. executable, shared

	9 library), the script can produce three different outputs: 1) symbols for the

	10 addresses, 2) function and line numbers for the addresses, or 3) an order file.

	11 """

	12

	13 import optparse

	14 import os

	15 import string

	16 import subprocess

	17 import sys

	18

	19

	20 def ParseLogLines(log_file_lines):

	21 """Parse a log file produced by the profiled run of clank.

	22

	23 Args:

	24 log_file_lines: array of lines in log file produced by profiled run

	25 lib_name: library or executable containing symbols

	26

	27 Below is an example of a small log file:

	28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so

	29 secs msecs pid:threadid func

	30 START

	31 1314897086 795828 3587:1074648168 0x509e105c

	32 1314897086 795874 3587:1074648168 0x509e0eb4

	33 1314897086 796326 3587:1074648168 0x509e0e3c

	34 1314897086 796552 3587:1074648168 0x509e07bc

	35 END

	36

	37 Returns:

	38 call_info list with list of tuples of the format (sec, msec, call id,

	39 function address called)

	40 """

	41 call_lines = []

	42 has_started = False

	43 vm_start = 0

	44 line = log_file_lines[0]

	45 assert("r-xp" in line)

	46 end_index = line.find('-')

	47 vm_start = int(line[:end_index], 16)

	48 for line in log_file_lines[2:]:

	49 # print hex(vm_start)

	50 fields = line.split()

	51 if len(fields) == 4:

	52 call_lines.append(fields)

	53

	54 # Convert strings to int in fields.

	55 call_info = []

	56 for call_line in call_lines:

	57 (sec_timestamp, msec_timestamp) = map(int, call_line[0:2])

	58 callee_id = call_line[2]

	59 addr = int(call_line[3], 16)

	60 if vm_start < addr:

	61 addr -= vm_start

	62 call_info.append((sec_timestamp, msec_timestamp, callee_id, addr))

	63

	64 return call_info

	65

	66

	67 def ParseLibSymbols(lib_file):

	68 """Get output from running nm and greping for text symbols.

	69

	70 Args:

	71 lib_file: the library or executable that contains the profiled code

	72

	73 Returns:

	74 list of sorted unique addresses and corresponding size of function symbols

	75 in lib_file and map of addresses to all symbols at a particular address

	76 """

	77 cmd = ['nm', '-S', '-n', lib_file]

	78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE)

	79 output = nm_p.communicate()[0]

	80 nm_lines = output.split('\n')

	81

	82 nm_symbols = []

	83 for nm_line in nm_lines:

	84 if any(str in nm_line for str in (' t ', ' W ', ' T ')):

	85 nm_symbols.append(nm_line)

	86

	87 nm_index = 0

	88 unique_addrs = []

	89 address_map = {}

	90 while nm_index < len(nm_symbols):

	91

	92 # If the length of the split line is not 4, then it does not contain all the

	93 # information needed to symbolize (i.e. address, size and symbol name).

	94 if len(nm_symbols[nm_index].split()) == 4:

	95 (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]]

	96

	97 # Multiple symbols may be at the same address. This is do to aliasing

	98 # done by the compiler. Since there is no way to be sure which one was

	99 # called in profiled run, we will symbolize to include all symbol names at

	100 # a particular address.

	101 fnames = []

	102 while (nm_index < len(nm_symbols) and

	103 addr == int(nm_symbols[nm_index].split()[0], 16)):

	104 if len(nm_symbols[nm_index].split()) == 4:

	105 fnames.append(nm_symbols[nm_index].split()[3])

	106 nm_index += 1

	107 address_map[addr] = fnames

	108 unique_addrs.append((addr, size))

	109 else:

	110 nm_index += 1

	111

	112 return (unique_addrs, address_map)

	113

	114 class SymbolNotFoundException(Exception):

	115 def __init__(self,value):

	116 self.value = value

	117 def __str__(self):

	118 return repr(self.value)

	119

	120 def BinarySearchAddresses(addr, start, end, arr):

	121 """Find starting address of a symbol at a particular address.

	122

	123 The reason we can not directly use the address provided by the log file is

	124 that the log file may give an address after the start of the symbol. The

	125 logged address is often one byte after the start. By using this search

	126 function rather than just subtracting one from the logged address allows

	127 the logging instrumentation to log any address in a function.

	128

	129 Args:

	130 addr: the address being searched for

	131 start: the starting index for the binary search

	132 end: the ending index for the binary search

	133 arr: the list being searched containing tuple of address and size

	134

	135 Returns:

	136 the starting address of the symbol at address addr

	137

	138 Raises:

	139 Exception: if address not found. Functions expects all logged addresses

	140 to be found

	141 """

	142 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)

	143 if start >= end or start == end - 1:

	144 # arr[i] is a tuple of address and size. Check if addr inside range

	145 if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]:

	146 return arr[start][0]

	147 elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]:

	148 return arr[end][0]

	149 else:

	150 raise SymbolNotFoundException(addr)

	151 else:

	152 halfway = (start + end) / 2

	153 (nm_addr, size) = arr[halfway]

	154 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)

	155 if addr >= nm_addr and addr < nm_addr + size:

	156 return nm_addr

	157 elif addr < nm_addr:

	158 return BinarySearchAddresses(addr, start, halfway-1, arr)

	159 else:

	160 # Condition (addr >= nm_addr + size) must be true.

	161 return BinarySearchAddresses(addr, halfway+1, end, arr)

	162

	163

	164 def FindFunctions(addr, unique_addrs, address_map):

	165 """Find function symbol names at address addr."""

	166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,

	167 unique_addrs)]

	168

	169

	170 def AddrToLine(addr, lib_file):

	171 """Use addr2line to determine line info of a particular address."""

	172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]

	173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)

	174 output = (p.communicate()[0]).split('\n')

	175 line = output[0]

	176 index = 1

	177 while index < len(output):

	178 line = line + ':' + output[index]

	179 index += 1

	180 return line

	181

	182

	183 def main():

	184 """Write output for profiled run to standard out.

	185

	186 The format of the output depends on the output type specified as the third

	187 command line argument. The default output type is to symbolize the addresses

	188 of the functions called.

	189 """

	190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')

	191 parser.add_option('-t', '--outputType', dest='output_type',

	192 default='symbolize', type='string',

	193 help='lineize or symbolize or orderfile')

	194

	195 # Option for output type. The log file and lib file arguments are required

	196 # by the script and therefore are not options.

	197 (options, args) = parser.parse_args()

	198 if len(args) != 2:

	199 parser.error('expected 2 args: log_file lib_file')

	200

	201 (log_file, lib_file) = args

	202 output_type = options.output_type

	203

	204 lib_name = lib_file.split('/')[-1].strip()

	205 log_file_lines = map(string.rstrip, open(log_file).readlines())

	206 call_info = ParseLogLines(log_file_lines)

	207 (unique_addrs, address_map) = ParseLibSymbols(lib_file)

	208

	209 # Check for duplicate addresses in the log file, and print a warning if

	210 # duplicates are found. The instrumentation that produces the log file

	211 # should only print the first time a function is entered.

	212 addr_list = []

	213 for call in call_info:

	214 addr = call[3]

	215 if addr not in addr_list:

	216 addr_list.append(addr)

	217 else:

	218 print('WARNING: Address ' + hex(addr) + ' (line= ' +

	219 AddrToLine(addr, lib_file) + ') already profiled.')

	220

	221 for call in call_info:

	222 if output_type == 'lineize':

	223 symbol = AddrToLine(call[3], lib_file)

	224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'

	225 + symbol)

	226 elif output_type == 'orderfile':

	227 try:

	228 symbols = FindFunctions(call[3], unique_addrs, address_map)

	229 for symbol in symbols:

	230 print '.text.' + symbol

	231 print ''

	232 except SymbolNotFoundException as e:

	233 sys.stderr.write('WARNING: Did not find function in binary. addr: '

	234 + hex(addr) + '\n')

	235 else:

	236 try:

	237 symbols = FindFunctions(call[3], unique_addrs, address_map)

	238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'

	239 + symbols[0])

	240 first_symbol = True

	241 for symbol in symbols:

	242 if not first_symbol:

	243 print '\t\t\t\t\t' + symbol

	244 else:

	245 first_symbol = False

	246 except SymbolNotFoundException as e:

	247 sys.stderr.write('WARNING: Did not find function in binary. addr: '

	248 + hex(addr) + '\n')

	249

	250 if __name__ == '__main__':

	251 main()

OLD	NEW

« no previous file with comments | « tools/cygprofile/patch_orderfile.py ('k') | no next file » | no next file with comments »