Index: src/trusted/validator/x86/testing/tf/asm.py |
diff --git a/src/trusted/validator/x86/testing/tf/asm.py b/src/trusted/validator/x86/testing/tf/asm.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3b2efb65820c8d286117e7bf054ec42517d4b43d |
--- /dev/null |
+++ b/src/trusted/validator/x86/testing/tf/asm.py |
@@ -0,0 +1,121 @@ |
+# Copyright (c) 2012 The Native Client Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import re |
+import subprocess |
+ |
+import utils |
+ |
+ |
+class AssemblerError(Exception): |
+ pass |
+ |
+ |
+def Assemble(bits, asm): |
+ # Instead of parsing object files properly, I put two distinct sequences, |
+ # begin_mark and end_mark, around code of interest. |
+ # I neglect possibility that they occur somewhere else in the file. |
+ begin_mark = 'begin mark qwert23904!>>>' |
+ end_mark = '<<<end mark qwe213908!!' |
+ |
+ with utils.TempFile(mode='w') as asm_file: |
+ with utils.TempFile(mode='w+b') as out_file: |
+ |
+ for c in begin_mark: |
+ asm_file.write('.byte %d\n' % ord(c)) |
+ asm_file.write('%s\n' % asm) |
+ for c in end_mark: |
+ asm_file.write('.byte %d\n' % ord(c)) |
+ asm_file.flush() |
+ |
+ # TODO(shcherbina): deal somehow with the fact that 'as' is only |
+ # available on Linux. |
+ result = subprocess.call([ |
+ 'as', |
+ '--%s' % bits, |
+ asm_file.name, |
+ '-o', |
+ out_file.name, |
+ ]) |
+ if result != 0: |
+ raise AssemblerError("Can't assemble '%s'" % asm) |
+ |
+ data = out_file.read() |
+ |
+ # Extract the data between begin_mark and end_mark. |
+ begin = data.find(begin_mark) |
+ assert begin != -1, 'begin_mark is missing' |
+ begin += len(begin_mark) |
+ end = data.find(end_mark, begin) |
+ assert end != -1, 'end_mark is missing' |
+ return data[begin:end] |
+ |
+ |
+def Disassemble(bits, data): |
+ if bits == 32: |
+ arch = '-Mi386' |
+ elif bits == 64: |
+ arch = '-Mx86-64' |
+ |
+ with utils.TempFile(mode='wb') as binary_file: |
+ binary_file.write(data) |
+ binary_file.flush() |
+ |
+ # TODO(shcherbina): objdump would only be available on Linux |
+ output = utils.CheckOutput([ |
+ 'objdump', '-mi386', arch, '-D', '-b', 'binary', binary_file.name]) |
+ |
+ result = [] |
+ for line in output.split('\n'): |
+ # Parse disassembler output in the form |
+ # 0: 66 0f be 04 10 movsbw (%eax,%edx,1),%ax |
+ # and extract instruction ('movsbw (%eax,%edx,1),%ax' in this case). |
+ m = re.match(r'\s*[0-9a-f]+:\s([0-9a-f]{2}\s)+\s*(.*)$', |
+ line, |
+ re.IGNORECASE) |
+ if m is None: |
+ continue |
+ result.append(m.group(2).strip()) |
+ |
+ return '; '.join(result) |
+ |
+ |
+def DisassembleReversibly(bits, data): |
+ """Try to disassemble in such a way that assembler would return input. |
+ |
+ Try to find such a that Assemble(bits, a) == data. Sometimes it's |
+ impossible, for instance when instruction prefixes come in inconventional |
+ order (in these cases None is returned). |
+ |
+ Args: |
+ bits: Bitness (32 or 64). |
+ data: String that is treated as machine code. |
+ |
+ Returns: |
+ Assembler instruction (or semicolon-separated instructions) |
+ as a string or None. |
+ """ |
+ |
+ a = Disassemble(bits, data) |
+ |
+ # Dirty hack to use relative offset in jumps. |
+ # We take instruction like |
+ # jmp 0x05 |
+ # and convert it to equivalent relative form |
+ # jmp .+5 |
+ m = re.match(r'(callq?|j..?)\s+0x([0-9a-f]+)$', a, re.IGNORECASE) |
+ if m is not None: |
+ offset = int(m.group(2), 16) |
+ # Make it signed. |
+ if offset >= 2 ** 31: |
+ offset -= 2 ** 32 |
+ a = '%s .%+d' % (m.group(1), offset) |
+ |
+ try: |
+ if Assemble(bits, a) == data: |
+ return a |
+ except AssemblerError: |
+ pass |
+ |
+ return None |