Chromium Code Reviews| Index: src/trusted/validator_ragel/validator_x86_64.rl |
| =================================================================== |
| --- src/trusted/validator_ragel/validator_x86_64.rl (revision 11020) |
| +++ src/trusted/validator_ragel/validator_x86_64.rl (working copy) |
| @@ -20,7 +20,7 @@ |
| #include <string.h> |
| #include "native_client/src/trusted/validator_ragel/bitmap.h" |
| -#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h" |
| +#include "native_client/src/trusted/validator_ragel/validator_internal.h" |
| %%{ |
| machine x86_64_validator; |
| @@ -61,19 +61,15 @@ |
| include cpuid_actions |
| "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
| - action check_access { |
| - CheckAccess(instruction_begin - data, base, index, restricted_register, |
| - valid_targets, &instruction_info_collected); |
| + action check_memory_access { |
| + CheckMemoryAccess(instruction_begin - codeblock, |
| + base, |
| + index, |
| + restricted_register, |
| + valid_targets, |
| + &instruction_info_collected); |
| } |
| - # Action which marks last byte as not immediate. Most 3DNow! instructions, |
| - # some AVX and XOP instructions have this proerty. It's referenced by |
| - # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" |
| - # file. |
| - action last_byte_is_not_immediate { |
| - instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; |
| - } |
| - |
| action modifiable_instruction { |
| instruction_info_collected |= MODIFIABLE_INSTRUCTION; |
| } |
| @@ -134,7 +130,7 @@ |
| # But since these instructions are "second half" of the %rbp sandboxing they |
| # can be used *only* when %rbp is restricted. |
| # |
| - # That is (normal instruction): |
| + # Compare: |
| # mov %eax,%ebp |
| # mov %esi,%edi <- Error: %ebp is restricted |
| # vs |
| @@ -147,11 +143,13 @@ |
| # Check this precondition and mark the beginning of the instruction as |
| # invalid jump for target. |
| @{ if (restricted_register == REG_RBP) |
| + /* RESTRICTED_REGISTER_USED is informational flag used in tests. */ |
| instruction_info_collected |= RESTRICTED_REGISTER_USED; |
| else |
| + /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */ |
| instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
| restricted_register = NO_REG; |
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
| + UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
| }; |
| # Special %rsp modifications - the ones which don't need a sandboxing. |
| @@ -209,7 +207,7 @@ |
| else |
| instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
| restricted_register = NO_REG; |
| - UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
| + UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
| }; |
| # naclcall or nacljmp. These are three-instruction indirection-jump sequences. |
| @@ -217,7 +215,7 @@ |
| # and RBASE, %rXX |
| # jmpq *%rXX (or: callq *%rXX) |
| # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not |
| - # just as part of the naclcall/nacljmp, but also as a standolene instruction). |
| + # just as part of the naclcall/nacljmp, but also as a standalone instruction). |
| # |
| # This means that when naclcall_or_nacljmp ragel machine will be combined with |
| # "normal_instruction*" regular action process_1_operand_zero_extends will be |
| @@ -237,7 +235,7 @@ |
| # byte for the dst while last one uses field RM of the ModR/M byte for the src |
| # and field REG of the ModR/M byte for dst. Both should be allowed. |
| # |
| - # See AMD/Intel manual for clarification "add" instruction encoding. |
| + # See AMD/Intel manual for clarification about "add" instruction encoding. |
| # |
| # REGISTER USAGE ABBREVIATIONS: |
| # E86: legacy ia32 registers (all eight: %eax to %edi) |
| @@ -245,6 +243,9 @@ |
| # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
| # R64: new amd64 registers (only seven: %r8 to %r14) |
| # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) |
| + # |
| + # Note that in the actions below instruction_begin points to the start of the |
| + # "call" or "jmp" instruction and instruction_begin points to it's end. |
|
halyavin
2013/03/25 09:43:41
current_position points to its end.
khim
2013/03/25 11:33:48
Done.
|
| naclcall_or_nacljmp = |
| # This block encodes call and jump "superinstruction" of the following form: |
| # 0: 83 e_ e0 and $~0x1f,E86 |
| @@ -264,8 +265,10 @@ |
| (REX_WRX? 0xff b_11_100_xxx))) |
| @{ |
| ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, |
| - &instruction_begin, current_position, |
| - data, valid_targets); |
| + &instruction_begin, |
| + current_position, |
| + codeblock, |
| + valid_targets); |
| } | |
| # This block encodes call and jump "superinstruction" of the following form: |
| @@ -286,8 +289,10 @@ |
| (REX_WRX? 0xff b_11_100_xxx))) |
| @{ |
| ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, |
| - &instruction_begin, current_position, |
| - data, valid_targets); |
| + &instruction_begin, |
| + current_position, |
| + codeblock, |
| + valid_targets); |
| } | |
| # This block encodes call and jump "superinstruction" of the following form: |
| @@ -325,8 +330,10 @@ |
| (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
| @{ |
| ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, |
| - &instruction_begin, current_position, |
| - data, valid_targets); |
| + &instruction_begin, |
| + current_position, |
| + codeblock, |
| + valid_targets); |
| } | |
| # This block encodes call and jump "superinstruction" of the following form: |
| @@ -364,8 +371,10 @@ |
| (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
| @{ |
| ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, |
| - &instruction_begin, current_position, |
| - data, valid_targets); |
| + &instruction_begin, |
| + current_position, |
| + codeblock, |
| + valid_targets); |
| }; |
| # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand |
| @@ -393,19 +402,19 @@ |
| # String instructions which use only %ds:(%rsi) |
| string_instruction_rsi_no_rdi = |
| - (rep? 0xac | # lods %ds:(%rsi),%al |
| - data16rep 0xad | # lods %ds:(%rsi),%ax |
| - rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax |
| + (rep? 0xac | # lods %ds:(%rsi),%al |
| + data16rep 0xad | # lods %ds:(%rsi),%ax |
| + rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax |
| # String instructions which use only %ds:(%rdi) |
| string_instruction_rdi_no_rsi = |
| - condrep? 0xae | # scas %es:(%rdi),%al |
| - data16condrep 0xaf | # scas %es:(%rdi),%ax |
| - condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax |
| + condrep? 0xae | # scas %es:(%rdi),%al |
| + data16condrep 0xaf | # scas %es:(%rdi),%ax |
| + condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax |
| - rep? 0xaa | # stos %al,%es:(%rdi) |
| - data16rep 0xab | # stos %ax,%es:(%rdi) |
| - rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) |
| + rep? 0xaa | # stos %al,%es:(%rdi) |
| + data16rep 0xab | # stos %ax,%es:(%rdi) |
| + rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) |
| # String instructions which use both %ds:(%rsi) and %es:(%rdi) |
| string_instruction_rsi_rdi = |
| @@ -427,20 +436,26 @@ |
| # are two encodings for the register-to-register move (and since REG and RM |
| # are identical here only opcode differs). |
| sandbox_instruction_rsi_no_rdi = |
| - (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| + (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| string_instruction_rsi_no_rdi |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
| + 2 /* mov */ + 4 /* lea */, |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| } | |
| - REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| + REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| string_instruction_rsi_no_rdi |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
| + 3 /* mov */ + 4 /* lea */, |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| }; |
| # "Superinstruction" which includes %rdi sandboxing. |
| @@ -453,20 +468,26 @@ |
| # are two encodings for the register-to-register move (and since REG and RM |
| # are identical here only opcode differs). |
| sandbox_instruction_rdi_no_rsi = |
| - (0x89 | 0x8b) 0xff # mov %edi,%edi |
| - 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| + (0x89 | 0x8b) 0xff # mov %edi,%edi |
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| - 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
| + 2 /* mov */ + 4 /* lea */, |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| } | |
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| + REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi |
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| - 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
| + 3 /* mov */ + 4 /* lea */, |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| }; |
| @@ -481,15 +502,17 @@ |
| # for the register-to-register move (and since REG and RM are identical here |
| # only opcode differs). |
| sandbox_instruction_rsi_rdi = |
| - (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| - 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| - (0x89 | 0x8b) 0xff # mov %edi,%edi |
| - 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| + (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| + (0x89 | 0x8b) 0xff # mov %edi,%edi |
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, |
| - &instruction_begin, data, valid_targets); |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| } | |
| (((0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| @@ -506,25 +529,31 @@ |
| ExpandSuperinstructionBySandboxingBytes( |
| 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ |
| /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, |
| - &instruction_begin, data, valid_targets); |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| } | |
| - REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
| - 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
| - REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
| - 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
| + REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
| + 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
| + REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi |
| + 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
| string_instruction_rsi_rdi |
| @{ |
| ExpandSuperinstructionBySandboxingBytes( |
| 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, |
| - &instruction_begin, data, valid_targets); |
| + &instruction_begin, |
| + codeblock, |
| + valid_targets); |
| }; |
| # All the "special" instructions (== instructions which obey non-standard |
| # rules). Three groups: |
| - # * %rsp/%rsp related instructions (these instructions are special because |
| - # they must be in the range %r15...%r15+4294967295 except momentarily they |
| - # can be in the range 0...4294967295) |
| + # * %rsp/%rsp related instructions (these registers and operations which |
| + # operatein on them are special because registers must be in the range |
|
halyavin
2013/03/25 09:43:41
operatein->operate
khim
2013/03/25 11:33:48
Done.
|
| + # %r15...%r15+4294967295 except momentarily they can be in the range |
| + # 0...4294967295, but then the very next instruction MUST restore the |
| + # status quo). |
| # * string instructions (which can not use %r15 as base and thus need special |
| # handling both in compiler and validator) |
| # * naclcall/nacljmp (indirect jumps need special care) |
| @@ -547,36 +576,36 @@ |
| # Remove special instructions which are only allowed in special cases. |
| normal_instruction = one_instruction - special_instruction; |
| - # Check if call is properly aligned. |
| - # |
| - # For direct call we explicitly encode all variations. For indirect call |
| - # we accept all the special instructions which ends with register-addressed |
| - # indirect call. |
| + # For direct call we explicitly encode all variations. |
| + direct_call = (data16 REX_RXB? 0xe8 rel16) | |
| + (REX_WRXB? 0xe8 rel32) | |
| + (data16 REXW_RXB 0xe8 rel32); |
| + |
| + # For indirect call we accept only near register-addressed indirect call. |
| + indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers); |
| + |
| + # Ragel machine that accepts one call instruction or call superinstruction and |
| + # checks if call is properly aligned. |
| call_alignment = |
| - ((normal_instruction & |
| - # Direct call |
| - ((data16 REX_RXB? 0xe8 rel16) | |
| - (REX_WRXB? 0xe8 rel32) | |
| - (data16 REXW_RXB 0xe8 rel32))) | |
| - (special_instruction & |
| - # Indirect call |
| - (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & |
| - modrm_registers)))) |
| + ((normal_instruction & direct_call) | |
| + # For indirect calls we accept all the special instructions which ends with |
| + # register-addressed indirect call. |
| + (special_instruction & (any* indirect_call_register))) |
| # Call instruction must aligned to the end of bundle. Previously this was |
| # strict requirement, today it's just warning to aid with debugging. |
| @{ |
| - if (((current_position - data) & kBundleMask) != kBundleMask) |
| + if (((current_position - codeblock) & kBundleMask) != kBundleMask) |
| instruction_info_collected |= BAD_CALL_ALIGNMENT; |
| }; |
| - # This action calls user's callback (if needed) and cleans up validator's |
| + # This action calls users callback (if needed) and cleans up validator |
| # internal state. |
| # |
| - # We call the user callback if there are validation errors or if the |
| - # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. |
| + # We call the user callback either on validation errors or on every |
| + # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option. |
| # |
| # After that we move instruction_begin and clean all the variables which |
| - # only used in the processing of a single instruction (prefixes, operand |
| + # are only used in the processing of a single instruction (prefixes, operand |
| # states and instruction_info_collected). |
| action end_of_instruction_cleanup { |
| /* Call user-supplied callback. */ |
| @@ -595,11 +624,12 @@ |
| * causing error. */ |
| instruction_begin = instruction_end; |
| - /* Mark start of the next instruction as a valid target for jump. |
| - * Note: we mark start of the next instruction here, not start of the |
| - * current one because memory access check should be able to clear this |
| - * bit when restricted register is used. */ |
| - MarkValidJumpTarget(instruction_begin - data, valid_targets); |
| + /* |
| + * We may set instruction_begin at the first byte of the instruction instead |
| + * of here but in the case of incorrect one byte instructions user callback |
| + * may be called before instruction_begin is set. |
| + */ |
| + MarkValidJumpTarget(instruction_begin - codeblock, valid_targets); |
| /* Clear variables. */ |
| instruction_info_collected = 0; |
| @@ -628,7 +658,7 @@ |
| } |
| # This is main ragel machine: it does 99% of validation work. There are only |
| - # one thing to do with bundle if this machine accepts the bundle: |
| + # one thing to do with bundle if this ragel machine accepts the bundle: |
| # * check for the state of the restricted_register at the end of the bundle. |
| # It's an error is %rbp or %rsp is restricted at the end of the bundle. |
| # Additionally if all the bundles are fine you need to check that direct jumps |
| @@ -643,41 +673,66 @@ |
| }%% |
| +/* |
| + * The "write data" statement causes Ragel to emit the constant static data |
| + * needed by the ragel machine. |
| + */ |
| %% write data; |
| +/* |
| + * Operand's kind WRT sandboxing effect: no effect, can be used for sandboxing |
| + * and will make register invalid if used. |
| + * |
| + * No effect is the "initial state", 32bit stores can be used for sandboxing (in |
| + * that case he high 32-bit bits of the corresponding 64-bit register are set to |
| + * zero) and we do not distinguish modifications of 16bit and 64bit registers to |
| + * match the behavior of the old validator. |
| + * |
| + * 8bit operands must be distinguished from other types because the REX prefix |
| + * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
| + */ |
| enum OperandKind { |
| OPERAND_SANDBOX_IRRELEVANT = 0, |
| + /* 8bit register that is modified by instruction. */ |
| + OPERAND_SANDBOX_8BIT, |
| /* |
| - * Currently we do not distinguish 8bit and 16bit modifications from |
| - * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. |
| - * |
| - * 8bit operands must be distinguished from other types because the REX prefix |
| - * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
| + * 32-bit register that is modified by instruction. The high 32-bit bits of |
| + * the corresponding 64-bit register are set to zero. |
| */ |
| - OPERAND_SANDBOX_8BIT, |
| OPERAND_SANDBOX_RESTRICTED, |
| + /* 64-bit or 16-bit register that is modified by instruction. */ |
| OPERAND_SANDBOX_UNRESTRICTED |
| }; |
| -#define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) |
| -#define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N) |
| -#define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \ |
| - operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) |
| -#define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \ |
| - operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
| -#define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \ |
| - operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) |
| -#define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \ |
| - operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
| -#define CHECK_OPERAND(N, S, T) \ |
| - ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) |
| +/* |
| + * operand_states variable keeps one byte of information per operand in the |
| + * current instruction: |
| + * 2 bits for register kinds, |
|
halyavin
2013/03/25 09:43:41
the first 5 bits are used for register numbers (16
khim
2013/03/25 11:33:48
Done.
|
| + * 5 bits for register numbers (16 regs plus RIZ). |
| + * |
| + * Macroses below are used to access this data. |
| + */ |
| +#define SET_OPERAND_NAME(INDEX, REGISTER_NAME) \ |
| + operand_states |= ((REGISTER_NAME) << ((INDEX) << 3)) |
| +#define SET_OPERAND_FORMAT(INDEX, FORMAT) SET_OPERAND_FORMAT_ ## FORMAT(INDEX) |
| +#define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(INDEX) \ |
| + operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((INDEX) << 3)) |
| +#define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(INDEX) \ |
| + operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3)) |
| +#define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(INDEX) \ |
| + operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3)) |
| +#define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(INDEX) \ |
| + operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3)) |
| +#define CHECK_OPERAND(INDEX, REGISTER_NAME, KIND) \ |
| + ((operand_states & (0xff << ((INDEX) << 3))) == \ |
| + ((REGISTER_NAME | (KIND << 5)) << ((INDEX) << 3))) |
|
halyavin
2013/03/25 09:43:41
It is more natural to reverse order: (((KIND) << 5
khim
2013/03/25 11:33:48
Done.
|
| -static INLINE void CheckAccess(ptrdiff_t instruction_begin, |
| - enum OperandName base, |
| - enum OperandName index, |
| - uint8_t restricted_register, |
| - bitmap_word *valid_targets, |
| - uint32_t *instruction_info_collected) { |
| +static INLINE void CheckMemoryAccess(ptrdiff_t instruction_begin, |
| + enum OperandName base, |
| + enum OperandName index, |
| + uint8_t restricted_register, |
| + bitmap_word *valid_targets, |
| + uint32_t *instruction_info_collected) { |
| if ((base == REG_RIP) || (base == REG_R15) || |
| (base == REG_RSP) || (base == REG_RBP)) { |
| if ((index == NO_REG) || (index == REG_RIZ)) |
| @@ -692,16 +747,22 @@ |
| } |
| } |
| +static FORCEINLINE uint32_t CheckValidityOfRegularInstruction( |
| + enum OperandName restricted_register) { |
| + /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| + * instruction, not with regular instruction. */ |
| + if (restricted_register == REG_RBP) |
| + return RESTRICTED_RBP_UNPROCESSED; |
| + if (restricted_register == REG_RSP) |
| + return RESTRICTED_RSP_UNPROCESSED; |
| + return 0; |
| +} |
| static INLINE void Process0Operands(enum OperandName *restricted_register, |
| uint32_t *instruction_info_collected) { |
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| - * instruction, not with regular instruction. */ |
| - if (*restricted_register == REG_RSP) { |
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
| - } else if (*restricted_register == REG_RBP) { |
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
| - } |
| + *instruction_info_collected |= |
| + CheckValidityOfRegularInstruction(*restricted_register); |
| + /* Every instruction clears restricted register even if it is not modified. */ |
| *restricted_register = NO_REG; |
| } |
| @@ -709,14 +770,8 @@ |
| uint32_t *instruction_info_collected, |
| uint8_t rex_prefix, |
| uint32_t operand_states) { |
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| - * instruction, not with regular instruction. */ |
| - if (*restricted_register == REG_RSP) { |
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
| - } else if (*restricted_register == REG_RBP) { |
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
| - } |
| - *restricted_register = NO_REG; |
| + *instruction_info_collected |= |
| + CheckValidityOfRegularInstruction(*restricted_register); |
| if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
| @@ -724,12 +779,14 @@ |
| } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= BPL_MODIFIED; |
| + *instruction_info_collected |= BP_MODIFIED; |
| } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= SPL_MODIFIED; |
| + *instruction_info_collected |= SP_MODIFIED; |
| } |
| + /* Every instruction clears restricted register even if it is not modified. */ |
| + *restricted_register = NO_REG; |
| } |
| static INLINE void Process1OperandZeroExtends( |
| @@ -737,13 +794,9 @@ |
| uint32_t *instruction_info_collected, |
| uint8_t rex_prefix, |
| uint32_t operand_states) { |
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| - * instruction, not with regular instruction. */ |
| - if (*restricted_register == REG_RSP) { |
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
| - } else if (*restricted_register == REG_RBP) { |
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
| - } |
| + *instruction_info_collected |= |
| + CheckValidityOfRegularInstruction(*restricted_register); |
| + /* Every instruction clears restricted register even if it is not modified. */ |
| *restricted_register = NO_REG; |
| if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
| @@ -751,10 +804,10 @@ |
| *instruction_info_collected |= R15_MODIFIED; |
| } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= BPL_MODIFIED; |
| + *instruction_info_collected |= BP_MODIFIED; |
| } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= SPL_MODIFIED; |
| + *instruction_info_collected |= SP_MODIFIED; |
| /* Take 2 bits of operand type from operand_states as *restricted_register, |
| * make sure operand_states denotes a register (4th bit == 0). */ |
| } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { |
| @@ -766,14 +819,8 @@ |
| uint32_t *instruction_info_collected, |
| uint8_t rex_prefix, |
| uint32_t operand_states) { |
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| - * instruction, not with regular instruction. */ |
| - if (*restricted_register == REG_RSP) { |
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
| - } else if (*restricted_register == REG_RBP) { |
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
| - } |
| - *restricted_register = NO_REG; |
| + *instruction_info_collected |= |
| + CheckValidityOfRegularInstruction(*restricted_register); |
| if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || |
| @@ -787,15 +834,17 @@ |
| (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= BPL_MODIFIED; |
| + *instruction_info_collected |= BP_MODIFIED; |
| } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || |
| (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
| CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= SPL_MODIFIED; |
| + *instruction_info_collected |= SP_MODIFIED; |
| } |
| + /* Every instruction clears restricted register even if it is not modified. */ |
| + *restricted_register = NO_REG; |
| } |
| static INLINE void Process2OperandsZeroExtends( |
| @@ -803,13 +852,9 @@ |
| uint32_t *instruction_info_collected, |
| uint8_t rex_prefix, |
| uint32_t operand_states) { |
| - /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
| - * instruction, not with regular instruction. */ |
| - if (*restricted_register == REG_RSP) { |
| - *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
| - } else if (*restricted_register == REG_RBP) { |
| - *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
| - } |
| + *instruction_info_collected |= |
| + CheckValidityOfRegularInstruction(*restricted_register); |
| + /* Every instruction clears restricted register even if it is not modified. */ |
| *restricted_register = NO_REG; |
| if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
| CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
| @@ -822,12 +867,12 @@ |
| CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || |
| (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= BPL_MODIFIED; |
| + *instruction_info_collected |= BP_MODIFIED; |
| } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || |
| (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
| CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
| - *instruction_info_collected |= SPL_MODIFIED; |
| + *instruction_info_collected |= SP_MODIFIED; |
| /* Take 2 bits of operand type from operand_states as *restricted_register, |
| * make sure operand_states denotes a register (4th bit == 0). */ |
| } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { |
| @@ -851,7 +896,7 @@ |
| static INLINE void ExpandSuperinstructionBySandboxingBytes( |
| size_t sandbox_instructions_size, |
| const uint8_t **instruction_begin, |
| - const uint8_t *data, |
| + const uint8_t codeblock[], |
| bitmap_word *valid_targets) { |
| *instruction_begin -= sandbox_instructions_size; |
| /* |
| @@ -859,7 +904,7 @@ |
| * don't need to mark the beginning of the whole "superinstruction" - that's |
| * why we move start by one byte and don't change the length. |
| */ |
| - UnmarkValidJumpTargets((*instruction_begin + 1 - data), |
| + UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock), |
| sandbox_instructions_size, |
| valid_targets); |
| } |
| @@ -989,11 +1034,14 @@ |
| uint32_t *instruction_info_collected, |
| const uint8_t **instruction_begin, |
| const uint8_t *current_position, |
| - const uint8_t *data, |
| + const uint8_t codeblock[], |
| bitmap_word *valid_targets) { |
| if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
| ExpandSuperinstructionBySandboxingBytes( |
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
| + 3 /* and */ + 3 /* add */, |
| + instruction_begin, |
| + codeblock, |
| + valid_targets); |
| else |
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| } |
| @@ -1026,11 +1074,14 @@ |
| uint32_t *instruction_info_collected, |
| const uint8_t **instruction_begin, |
| const uint8_t *current_position, |
| - const uint8_t *data, |
| + const uint8_t codeblock[], |
| bitmap_word *valid_targets) { |
| if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
| ExpandSuperinstructionBySandboxingBytes( |
| - 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
| + 3 /* and */ + 3 /* add */, |
| + instruction_begin, |
| + codeblock, |
| + valid_targets); |
| else |
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| } |
| @@ -1069,11 +1120,14 @@ |
| uint32_t *instruction_info_collected, |
| const uint8_t **instruction_begin, |
| const uint8_t *current_position, |
| - const uint8_t *data, |
| + const uint8_t codeblock[], |
| bitmap_word *valid_targets) { |
| if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
| ExpandSuperinstructionBySandboxingBytes( |
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
| + 4 /* and */ + 3 /* add */, |
| + instruction_begin, |
| + codeblock, |
| + valid_targets); |
| else |
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| } |
| @@ -1112,17 +1166,21 @@ |
| uint32_t *instruction_info_collected, |
| const uint8_t **instruction_begin, |
| const uint8_t *current_position, |
| - const uint8_t *data, |
| + const uint8_t codeblock[], |
| bitmap_word *valid_targets) { |
| if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
| ExpandSuperinstructionBySandboxingBytes( |
| - 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
| + 4 /* and */ + 3 /* add */, |
| + instruction_begin, |
| + codeblock, |
| + valid_targets); |
| else |
| *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
| } |
| -Bool ValidateChunkAMD64(const uint8_t *data, size_t size, |
| +Bool ValidateChunkAMD64(const uint8_t codeblock[], |
| + size_t size, |
| uint32_t options, |
| const NaClCPUFeaturesX86 *cpu_features, |
| ValidationCallbackFunc user_callback, |
| @@ -1166,21 +1224,21 @@ |
| /* |
| * This option is usually used in tests: we will process the whole chunk |
| * in one pass. Usually each bundle is processed separately which means |
| - * instructions (and super-instructions) can not cross borders of the bundle. |
| + * instructions (and "superinstructions") can not cross borders of the bundle. |
| */ |
| if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
| - end_of_bundle = data + size; |
| + end_of_bundle = codeblock + size; |
| else |
| - end_of_bundle = data + kBundleSize; |
| + end_of_bundle = codeblock + kBundleSize; |
| /* |
| - * Main loop. Here we process the data array bundle-after-bundle. |
| + * Main loop. Here we process the codeblock array bundle-after-bundle. |
| * Ragel-produced DFA does all the checks with one exception: direct jumps. |
| * It collects the two arrays: valid_targets and jump_dests which are used |
| * to test direct jumps later. |
| */ |
| - for (current_position = data; |
| - current_position < data + size; |
| + for (current_position = codeblock; |
| + current_position < codeblock + size; |
| current_position = end_of_bundle, |
| end_of_bundle = current_position + kBundleSize) { |
| /* Start of the instruction being processed. */ |
| @@ -1189,9 +1247,12 @@ |
| const uint8_t *instruction_end; |
| int current_state; |
| uint32_t instruction_info_collected = 0; |
| - /* Keeps one byte of information per operand in the current instruction: |
| - * 2 bits for register kinds, |
| - * 5 bits for register numbers (16 regs plus RIZ). */ |
| + /* |
| + * Contains register number and type of register modification (see |
| + * OperandKind enum) for each operand that is changed in the instruction. |
| + * Information about read-only and memory operands is not saved in 64-bit |
| + * mode. |
| + */ |
| uint32_t operand_states = 0; |
| enum OperandName base = NO_REG; |
| enum OperandName index = NO_REG; |
| @@ -1202,7 +1263,15 @@ |
| uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; |
| uint8_t vex_prefix3 = 0x00; |
| + /* |
| + * The "write init" statement causes Ragel to emit initialization code. |
| + * This should be executed once before the ragel machine is started. |
| + */ |
| %% write init; |
| + /* |
| + * The "write exec" statement causes Ragel to emit the ragel machine's |
| + * execution code. |
| + */ |
| %% write exec; |
| /* |
| @@ -1225,8 +1294,12 @@ |
| * Check the direct jumps. All the targets from jump_dests must be in |
| * valid_targets. |
| */ |
| - result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, |
| - user_callback, callback_data); |
| + result &= ProcessInvalidJumpTargets(codeblock, |
| + size, |
| + valid_targets, |
| + jump_dests, |
| + user_callback, |
| + callback_data); |
| /* We only use malloc for a large code sequences */ |
| if (jump_dests != &jump_dests_small) free(jump_dests); |