OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import re |
| 6 import subprocess |
| 7 |
| 8 import utils |
| 9 |
| 10 |
| 11 class AssemblerError(Exception): |
| 12 pass |
| 13 |
| 14 |
| 15 def Assemble(bits, asm): |
| 16 # Instead of parsing object files properly, I put two distinct sequences, |
| 17 # begin_mark and end_mark, around code of interest. |
| 18 # I neglect possibility that they occur somewhere else in the file. |
| 19 begin_mark = 'begin mark qwert23904!>>>' |
| 20 end_mark = '<<<end mark qwe213908!!' |
| 21 |
| 22 with utils.TempFile(mode='w') as asm_file: |
| 23 with utils.TempFile(mode='w+b') as out_file: |
| 24 |
| 25 for c in begin_mark: |
| 26 asm_file.write('.byte %d\n' % ord(c)) |
| 27 asm_file.write('%s\n' % asm) |
| 28 for c in end_mark: |
| 29 asm_file.write('.byte %d\n' % ord(c)) |
| 30 asm_file.flush() |
| 31 |
| 32 # TODO(shcherbina): deal somehow with the fact that 'as' is only |
| 33 # available on Linux. |
| 34 result = subprocess.call([ |
| 35 'as', |
| 36 '--%s' % bits, |
| 37 asm_file.name, |
| 38 '-o', |
| 39 out_file.name, |
| 40 ]) |
| 41 if result != 0: |
| 42 raise AssemblerError("Can't assemble '%s'" % asm) |
| 43 |
| 44 data = out_file.read() |
| 45 |
| 46 # Extract the data between begin_mark and end_mark. |
| 47 begin = data.find(begin_mark) |
| 48 assert begin != -1, 'begin_mark is missing' |
| 49 begin += len(begin_mark) |
| 50 end = data.find(end_mark, begin) |
| 51 assert end != -1, 'end_mark is missing' |
| 52 return data[begin:end] |
| 53 |
| 54 |
| 55 def Disassemble(bits, data): |
| 56 if bits == 32: |
| 57 arch = '-Mi386' |
| 58 elif bits == 64: |
| 59 arch = '-Mx86-64' |
| 60 |
| 61 with utils.TempFile(mode='wb') as binary_file: |
| 62 binary_file.write(data) |
| 63 binary_file.flush() |
| 64 |
| 65 # TODO(shcherbina): objdump would only be available on Linux |
| 66 output = utils.CheckOutput([ |
| 67 'objdump', '-mi386', arch, '-D', '-b', 'binary', binary_file.name]) |
| 68 |
| 69 result = [] |
| 70 for line in output.split('\n'): |
| 71 # Parse disassembler output in the form |
| 72 # 0: 66 0f be 04 10 movsbw (%eax,%edx,1),%ax |
| 73 # and extract instruction ('movsbw (%eax,%edx,1),%ax' in this case). |
| 74 m = re.match(r'\s*[0-9a-f]+:\s([0-9a-f]{2}\s)+\s*(.*)$', |
| 75 line, |
| 76 re.IGNORECASE) |
| 77 if m is None: |
| 78 continue |
| 79 result.append(m.group(2).strip()) |
| 80 |
| 81 return '; '.join(result) |
| 82 |
| 83 |
| 84 def DisassembleReversibly(bits, data): |
| 85 """Try to disassemble in such a way that assembler would return input. |
| 86 |
| 87 Try to find such a that Assemble(bits, a) == data. Sometimes it's |
| 88 impossible, for instance when instruction prefixes come in inconventional |
| 89 order (in these cases None is returned). |
| 90 |
| 91 Args: |
| 92 bits: Bitness (32 or 64). |
| 93 data: String that is treated as machine code. |
| 94 |
| 95 Returns: |
| 96 Assembler instruction (or semicolon-separated instructions) |
| 97 as a string or None. |
| 98 """ |
| 99 |
| 100 a = Disassemble(bits, data) |
| 101 |
| 102 # Dirty hack to use relative offset in jumps. |
| 103 # We take instruction like |
| 104 # jmp 0x05 |
| 105 # and convert it to equivalent relative form |
| 106 # jmp .+5 |
| 107 m = re.match(r'(callq?|j..?)\s+0x([0-9a-f]+)$', a, re.IGNORECASE) |
| 108 if m is not None: |
| 109 offset = int(m.group(2), 16) |
| 110 # Make it signed. |
| 111 if offset >= 2 ** 31: |
| 112 offset -= 2 ** 32 |
| 113 a = '%s .%+d' % (m.group(1), offset) |
| 114 |
| 115 try: |
| 116 if Assemble(bits, a) == data: |
| 117 return a |
| 118 except AssemblerError: |
| 119 pass |
| 120 |
| 121 return None |
OLD | NEW |