Index: tools/cygprofile/symbolize.py |
diff --git a/tools/cygprofile/symbolize.py b/tools/cygprofile/symbolize.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..81a4b6d72b4e68c865c305cdd6d70a1e02360376 |
--- /dev/null |
+++ b/tools/cygprofile/symbolize.py |
@@ -0,0 +1,251 @@ |
+#!/usr/bin/python |
+# Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Symbolize log file produced by cypgofile instrumentation. |
+ |
+Given a log file and the binary being profiled (e.g. executable, shared |
+library), the script can produce three different outputs: 1) symbols for the |
+addresses, 2) function and line numbers for the addresses, or 3) an order file. |
+""" |
+ |
+import optparse |
+import os |
+import string |
+import subprocess |
+import sys |
+ |
+ |
+def ParseLogLines(log_file_lines): |
+ """Parse a log file produced by the profiled run of clank. |
+ |
+ Args: |
+ log_file_lines: array of lines in log file produced by profiled run |
+ lib_name: library or executable containing symbols |
+ |
+ Below is an example of a small log file: |
+ 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
+ secs msecs pid:threadid func |
+ START |
+ 1314897086 795828 3587:1074648168 0x509e105c |
+ 1314897086 795874 3587:1074648168 0x509e0eb4 |
+ 1314897086 796326 3587:1074648168 0x509e0e3c |
+ 1314897086 796552 3587:1074648168 0x509e07bc |
+ END |
+ |
+ Returns: |
+ call_info list with list of tuples of the format (sec, msec, call id, |
+ function address called) |
+ """ |
+ call_lines = [] |
+ has_started = False |
+ vm_start = 0 |
+ line = log_file_lines[0] |
+ assert("r-xp" in line) |
+ end_index = line.find('-') |
+ vm_start = int(line[:end_index], 16) |
+ for line in log_file_lines[2:]: |
+ # print hex(vm_start) |
+ fields = line.split() |
+ if len(fields) == 4: |
+ call_lines.append(fields) |
+ |
+ # Convert strings to int in fields. |
+ call_info = [] |
+ for call_line in call_lines: |
+ (sec_timestamp, msec_timestamp) = map(int, call_line[0:2]) |
+ callee_id = call_line[2] |
+ addr = int(call_line[3], 16) |
+ if vm_start < addr: |
+ addr -= vm_start |
+ call_info.append((sec_timestamp, msec_timestamp, callee_id, addr)) |
+ |
+ return call_info |
+ |
+ |
+def ParseLibSymbols(lib_file): |
+ """Get output from running nm and greping for text symbols. |
+ |
+ Args: |
+ lib_file: the library or executable that contains the profiled code |
+ |
+ Returns: |
+ list of sorted unique addresses and corresponding size of function symbols |
+ in lib_file and map of addresses to all symbols at a particular address |
+ """ |
+ cmd = ['nm', '-S', '-n', lib_file] |
+ nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
+ output = nm_p.communicate()[0] |
+ nm_lines = output.split('\n') |
+ |
+ nm_symbols = [] |
+ for nm_line in nm_lines: |
+ if any(str in nm_line for str in (' t ', ' W ', ' T ')): |
+ nm_symbols.append(nm_line) |
+ |
+ nm_index = 0 |
+ unique_addrs = [] |
+ address_map = {} |
+ while nm_index < len(nm_symbols): |
+ |
+ # If the length of the split line is not 4, then it does not contain all the |
+ # information needed to symbolize (i.e. address, size and symbol name). |
+ if len(nm_symbols[nm_index].split()) == 4: |
+ (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]] |
+ |
+ # Multiple symbols may be at the same address. This is do to aliasing |
+ # done by the compiler. Since there is no way to be sure which one was |
+ # called in profiled run, we will symbolize to include all symbol names at |
+ # a particular address. |
+ fnames = [] |
+ while (nm_index < len(nm_symbols) and |
+ addr == int(nm_symbols[nm_index].split()[0], 16)): |
+ if len(nm_symbols[nm_index].split()) == 4: |
+ fnames.append(nm_symbols[nm_index].split()[3]) |
+ nm_index += 1 |
+ address_map[addr] = fnames |
+ unique_addrs.append((addr, size)) |
+ else: |
+ nm_index += 1 |
+ |
+ return (unique_addrs, address_map) |
+ |
+class SymbolNotFoundException(Exception): |
+ def __init__(self,value): |
+ self.value = value |
+ def __str__(self): |
+ return repr(self.value) |
+ |
+def BinarySearchAddresses(addr, start, end, arr): |
+ """Find starting address of a symbol at a particular address. |
+ |
+ The reason we can not directly use the address provided by the log file is |
+ that the log file may give an address after the start of the symbol. The |
+ logged address is often one byte after the start. By using this search |
+ function rather than just subtracting one from the logged address allows |
+ the logging instrumentation to log any address in a function. |
+ |
+ Args: |
+ addr: the address being searched for |
+ start: the starting index for the binary search |
+ end: the ending index for the binary search |
+ arr: the list being searched containing tuple of address and size |
+ |
+ Returns: |
+ the starting address of the symbol at address addr |
+ |
+ Raises: |
+ Exception: if address not found. Functions expects all logged addresses |
+ to be found |
+ """ |
+ # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end) |
+ if start >= end or start == end - 1: |
+ # arr[i] is a tuple of address and size. Check if addr inside range |
+ if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]: |
+ return arr[start][0] |
+ elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]: |
+ return arr[end][0] |
+ else: |
+ raise SymbolNotFoundException(addr) |
+ else: |
+ halfway = (start + end) / 2 |
+ (nm_addr, size) = arr[halfway] |
+ # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway) |
+ if addr >= nm_addr and addr < nm_addr + size: |
+ return nm_addr |
+ elif addr < nm_addr: |
+ return BinarySearchAddresses(addr, start, halfway-1, arr) |
+ else: |
+ # Condition (addr >= nm_addr + size) must be true. |
+ return BinarySearchAddresses(addr, halfway+1, end, arr) |
+ |
+ |
+def FindFunctions(addr, unique_addrs, address_map): |
+ """Find function symbol names at address addr.""" |
+ return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, |
+ unique_addrs)] |
+ |
+ |
+def AddrToLine(addr, lib_file): |
+ """Use addr2line to determine line info of a particular address.""" |
+ cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] |
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
+ output = (p.communicate()[0]).split('\n') |
+ line = output[0] |
+ index = 1 |
+ while index < len(output): |
+ line = line + ':' + output[index] |
+ index += 1 |
+ return line |
+ |
+ |
+def main(): |
+ """Write output for profiled run to standard out. |
+ |
+ The format of the output depends on the output type specified as the third |
+ command line argument. The default output type is to symbolize the addresses |
+ of the functions called. |
+ """ |
+ parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') |
+ parser.add_option('-t', '--outputType', dest='output_type', |
+ default='symbolize', type='string', |
+ help='lineize or symbolize or orderfile') |
+ |
+ # Option for output type. The log file and lib file arguments are required |
+ # by the script and therefore are not options. |
+ (options, args) = parser.parse_args() |
+ if len(args) != 2: |
+ parser.error('expected 2 args: log_file lib_file') |
+ |
+ (log_file, lib_file) = args |
+ output_type = options.output_type |
+ |
+ lib_name = lib_file.split('/')[-1].strip() |
+ log_file_lines = map(string.rstrip, open(log_file).readlines()) |
+ call_info = ParseLogLines(log_file_lines) |
+ (unique_addrs, address_map) = ParseLibSymbols(lib_file) |
+ |
+ # Check for duplicate addresses in the log file, and print a warning if |
+ # duplicates are found. The instrumentation that produces the log file |
+ # should only print the first time a function is entered. |
+ addr_list = [] |
+ for call in call_info: |
+ addr = call[3] |
+ if addr not in addr_list: |
+ addr_list.append(addr) |
+ else: |
+ print('WARNING: Address ' + hex(addr) + ' (line= ' + |
+ AddrToLine(addr, lib_file) + ') already profiled.') |
+ |
+ for call in call_info: |
+ if output_type == 'lineize': |
+ symbol = AddrToLine(call[3], lib_file) |
+ print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
+ + symbol) |
+ elif output_type == 'orderfile': |
+ try: |
+ symbols = FindFunctions(call[3], unique_addrs, address_map) |
+ for symbol in symbols: |
+ print '.text.' + symbol |
+ print '' |
+ except SymbolNotFoundException as e: |
+ sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
+ + hex(addr) + '\n') |
+ else: |
+ try: |
+ symbols = FindFunctions(call[3], unique_addrs, address_map) |
+ print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
+ + symbols[0]) |
+ first_symbol = True |
+ for symbol in symbols: |
+ if not first_symbol: |
+ print '\t\t\t\t\t' + symbol |
+ else: |
+ first_symbol = False |
+ except SymbolNotFoundException as e: |
+ sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
+ + hex(addr) + '\n') |
+ |
+if __name__ == '__main__': |
+ main() |