Index: src/trusted/validator_ragel/validator_x86_64.rl |
=================================================================== |
--- src/trusted/validator_ragel/validator_x86_64.rl (revision 11020) |
+++ src/trusted/validator_ragel/validator_x86_64.rl (working copy) |
@@ -20,7 +20,7 @@ |
#include <string.h> |
#include "native_client/src/trusted/validator_ragel/bitmap.h" |
-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h" |
+#include "native_client/src/trusted/validator_ragel/validator_internal.h" |
%%{ |
machine x86_64_validator; |
@@ -61,19 +61,15 @@ |
include cpuid_actions |
"native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
- action check_access { |
- CheckAccess(instruction_begin - data, base, index, restricted_register, |
- valid_targets, &instruction_info_collected); |
+ action check_memory_access { |
+ CheckMemoryAccess(instruction_begin - codeblock, |
+ base, |
+ index, |
+ restricted_register, |
+ valid_targets, |
+ &instruction_info_collected); |
} |
- # Action which marks last byte as not immediate. Most 3DNow! instructions, |
- # some AVX and XOP instructions have this proerty. It's referenced by |
- # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" |
- # file. |
- action last_byte_is_not_immediate { |
- instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; |
- } |
- |
action modifiable_instruction { |
instruction_info_collected |= MODIFIABLE_INSTRUCTION; |
} |
@@ -134,7 +130,7 @@ |
# But since these instructions are "second half" of the %rbp sandboxing they |
# can be used *only* when %rbp is restricted. |
# |
- # That is (normal instruction): |
+ # Compare: |
# mov %eax,%ebp |
# mov %esi,%edi <- Error: %ebp is restricted |
# vs |
@@ -147,11 +143,13 @@ |
# Check this precondition and mark the beginning of the instruction as |
# invalid jump for target. |
@{ if (restricted_register == REG_RBP) |
+ /* RESTRICTED_REGISTER_USED is informational flag used in tests. */ |
instruction_info_collected |= RESTRICTED_REGISTER_USED; |
else |
+ /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */ |
instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
restricted_register = NO_REG; |
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
}; |
# Special %rsp modifications - the ones which don't need a sandboxing. |
@@ -209,7 +207,7 @@ |
else |
instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
restricted_register = NO_REG; |
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
}; |
# naclcall or nacljmp. These are three-instruction indirection-jump sequences. |
@@ -217,7 +215,7 @@ |
# and RBASE, %rXX |
# jmpq *%rXX (or: callq *%rXX) |
# Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not |
- # just as part of the naclcall/nacljmp, but also as a standolene instruction). |
+ # just as part of the naclcall/nacljmp, but also as a standalone instruction). |
# |
# This means that when naclcall_or_nacljmp ragel machine will be combined with |
# "normal_instruction*" regular action process_1_operand_zero_extends will be |
@@ -237,7 +235,7 @@ |
# byte for the dst while last one uses field RM of the ModR/M byte for the src |
# and field REG of the ModR/M byte for dst. Both should be allowed. |
# |
- # See AMD/Intel manual for clarification "add" instruction encoding. |
+ # See AMD/Intel manual for clarification about "add" instruction encoding. |
# |
# REGISTER USAGE ABBREVIATIONS: |
# E86: legacy ia32 registers (all eight: %eax to %edi) |
@@ -245,6 +243,9 @@ |
# E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
# R64: new amd64 registers (only seven: %r8 to %r14) |
# RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) |
+ # |
+ # Note that in the actions below instruction_begin points to the start of the |
+ # "call" or "jmp" instruction and current_position points to its end. |
naclcall_or_nacljmp = |
# This block encodes call and jump "superinstruction" of the following form: |
# 0: 83 e_ e0 and $~0x1f,E86 |
@@ -264,8 +265,10 @@ |
(REX_WRX? 0xff b_11_100_xxx))) |
@{ |
ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -286,8 +289,10 @@ |
(REX_WRX? 0xff b_11_100_xxx))) |
@{ |
ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -325,8 +330,10 @@ |
(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
@{ |
ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -364,8 +371,10 @@ |
(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
@{ |
ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
}; |
# EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand |
@@ -393,19 +402,19 @@ |
# String instructions which use only %ds:(%rsi) |
string_instruction_rsi_no_rdi = |
- (rep? 0xac | # lods %ds:(%rsi),%al |
- data16rep 0xad | # lods %ds:(%rsi),%ax |
- rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax |
+ (rep? 0xac | # lods %ds:(%rsi),%al |
+ data16rep 0xad | # lods %ds:(%rsi),%ax |
+ rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax |
# String instructions which use only %ds:(%rdi) |
string_instruction_rdi_no_rsi = |
- condrep? 0xae | # scas %es:(%rdi),%al |
- data16condrep 0xaf | # scas %es:(%rdi),%ax |
- condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax |
+ condrep? 0xae | # scas %es:(%rdi),%al |
+ data16condrep 0xaf | # scas %es:(%rdi),%ax |
+ condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax |
- rep? 0xaa | # stos %al,%es:(%rdi) |
- data16rep 0xab | # stos %ax,%es:(%rdi) |
- rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) |
+ rep? 0xaa | # stos %al,%es:(%rdi) |
+ data16rep 0xab | # stos %ax,%es:(%rdi) |
+ rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) |
# String instructions which use both %ds:(%rsi) and %es:(%rdi) |
string_instruction_rsi_rdi = |
@@ -427,20 +436,26 @@ |
# are two encodings for the register-to-register move (and since REG and RM |
# are identical here only opcode differs). |
sandbox_instruction_rsi_no_rdi = |
- (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
+ (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
string_instruction_rsi_no_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 2 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
- REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
+ REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
string_instruction_rsi_no_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 3 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
# "Superinstruction" which includes %rdi sandboxing. |
@@ -453,20 +468,26 @@ |
# are two encodings for the register-to-register move (and since REG and RM |
# are identical here only opcode differs). |
sandbox_instruction_rdi_no_rsi = |
- (0x89 | 0x8b) 0xff # mov %edi,%edi |
- 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
+ (0x89 | 0x8b) 0xff # mov %edi,%edi |
+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 2 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
+ REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi |
+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 3 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
@@ -481,15 +502,17 @@ |
# for the register-to-register move (and since REG and RM are identical here |
# only opcode differs). |
sandbox_instruction_rsi_rdi = |
- (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
- (0x89 | 0x8b) 0xff # mov %edi,%edi |
- 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
+ (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
+ (0x89 | 0x8b) 0xff # mov %edi,%edi |
+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
(((0x89 | 0x8b) 0xf6 # mov %esi,%esi |
@@ -506,25 +529,31 @@ |
ExpandSuperinstructionBySandboxingBytes( |
2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ |
/* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
- REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
+ REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
+ 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
+ REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi |
+ 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
string_instruction_rsi_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
# All the "special" instructions (== instructions which obey non-standard |
# rules). Three groups: |
- # * %rsp/%rsp related instructions (these instructions are special because |
- # they must be in the range %r15...%r15+4294967295 except momentarily they |
- # can be in the range 0...4294967295) |
+ # * %rsp/%rsp related instructions (these registers and operations which |
+ # operate on them are special because registers must be in the range |
+ # %r15...%r15+4294967295 except momentarily they can be in the range |
+ # 0...4294967295, but then the very next instruction MUST restore the |
+ # status quo). |
# * string instructions (which can not use %r15 as base and thus need special |
# handling both in compiler and validator) |
# * naclcall/nacljmp (indirect jumps need special care) |
@@ -547,36 +576,36 @@ |
# Remove special instructions which are only allowed in special cases. |
normal_instruction = one_instruction - special_instruction; |
- # Check if call is properly aligned. |
- # |
- # For direct call we explicitly encode all variations. For indirect call |
- # we accept all the special instructions which ends with register-addressed |
- # indirect call. |
+ # For direct call we explicitly encode all variations. |
+ direct_call = (data16 REX_RXB? 0xe8 rel16) | |
+ (REX_WRXB? 0xe8 rel32) | |
+ (data16 REXW_RXB 0xe8 rel32); |
+ |
+ # For indirect call we accept only near register-addressed indirect call. |
+ indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers); |
+ |
+ # Ragel machine that accepts one call instruction or call superinstruction and |
+ # checks if call is properly aligned. |
call_alignment = |
- ((normal_instruction & |
- # Direct call |
- ((data16 REX_RXB? 0xe8 rel16) | |
- (REX_WRXB? 0xe8 rel32) | |
- (data16 REXW_RXB 0xe8 rel32))) | |
- (special_instruction & |
- # Indirect call |
- (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & |
- modrm_registers)))) |
+ ((normal_instruction & direct_call) | |
+ # For indirect calls we accept all the special instructions which ends with |
+ # register-addressed indirect call. |
+ (special_instruction & (any* indirect_call_register))) |
# Call instruction must aligned to the end of bundle. Previously this was |
# strict requirement, today it's just warning to aid with debugging. |
@{ |
- if (((current_position - data) & kBundleMask) != kBundleMask) |
+ if (((current_position - codeblock) & kBundleMask) != kBundleMask) |
instruction_info_collected |= BAD_CALL_ALIGNMENT; |
}; |
- # This action calls user's callback (if needed) and cleans up validator's |
+ # This action calls users callback (if needed) and cleans up validator |
# internal state. |
# |
- # We call the user callback if there are validation errors or if the |
- # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. |
+ # We call the user callback either on validation errors or on every |
+ # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option. |
# |
# After that we move instruction_begin and clean all the variables which |
- # only used in the processing of a single instruction (prefixes, operand |
+ # are only used in the processing of a single instruction (prefixes, operand |
# states and instruction_info_collected). |
action end_of_instruction_cleanup { |
/* Call user-supplied callback. */ |
@@ -595,11 +624,12 @@ |
* causing error. */ |
instruction_begin = instruction_end; |
- /* Mark start of the next instruction as a valid target for jump. |
- * Note: we mark start of the next instruction here, not start of the |
- * current one because memory access check should be able to clear this |
- * bit when restricted register is used. */ |
- MarkValidJumpTarget(instruction_begin - data, valid_targets); |
+ /* |
+ * We may set instruction_begin at the first byte of the instruction instead |
+ * of here but in the case of incorrect one byte instructions user callback |
+ * may be called before instruction_begin is set. |
+ */ |
+ MarkValidJumpTarget(instruction_begin - codeblock, valid_targets); |
/* Clear variables. */ |
instruction_info_collected = 0; |
@@ -628,7 +658,7 @@ |
} |
# This is main ragel machine: it does 99% of validation work. There are only |
- # one thing to do with bundle if this machine accepts the bundle: |
+ # one thing to do with bundle if this ragel machine accepts the bundle: |
# * check for the state of the restricted_register at the end of the bundle. |
# It's an error is %rbp or %rsp is restricted at the end of the bundle. |
# Additionally if all the bundles are fine you need to check that direct jumps |
@@ -643,41 +673,100 @@ |
}%% |
+/* |
+ * The "write data" statement causes Ragel to emit the constant static data |
+ * needed by the ragel machine. |
+ */ |
%% write data; |
+/* |
+ * Operand's kind WRT sandboxing effect: no effect, can be used for sandboxing |
+ * and will make register invalid if used. |
+ * |
+ * No effect is the "initial state", 32bit stores can be used for sandboxing (in |
+ * that case he high 32-bit bits of the corresponding 64-bit register are set to |
+ * zero) and we do not distinguish modifications of 16bit and 64bit registers to |
+ * match the behavior of the old validator. |
+ * |
+ * 8bit operands must be distinguished from other types because the REX prefix |
+ * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
+ */ |
enum OperandKind { |
OPERAND_SANDBOX_IRRELEVANT = 0, |
+ /* 8bit register that is modified by instruction. */ |
+ OPERAND_SANDBOX_8BIT, |
/* |
- * Currently we do not distinguish 8bit and 16bit modifications from |
- * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. |
- * |
- * 8bit operands must be distinguished from other types because the REX prefix |
- * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
+ * 32-bit register that is modified by instruction. The high 32-bit bits of |
+ * the corresponding 64-bit register are set to zero. |
*/ |
- OPERAND_SANDBOX_8BIT, |
OPERAND_SANDBOX_RESTRICTED, |
+ /* 64-bit or 16-bit register that is modified by instruction. */ |
OPERAND_SANDBOX_UNRESTRICTED |
}; |
-#define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) |
-#define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N) |
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \ |
- operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) |
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \ |
- operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \ |
- operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) |
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \ |
- operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
-#define CHECK_OPERAND(N, S, T) \ |
- ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) |
+/* |
+ * operand_states variable keeps one byte of information per operand in the |
+ * current instruction: |
+ * * the first 5 bits (least significant ones) are for register numbers (16 |
+ posible registers regs plus RIZ), |
+ * * the next 2 bits for register kinds. |
+ * |
+ * Macroses below are used to access this data. |
+ */ |
+#define SET_OPERAND_NAME(INDEX, REGISTER_NAME) \ |
+ operand_states |= ((REGISTER_NAME) << ((INDEX) << 3)) |
+#define SET_OPERAND_FORMAT(INDEX, FORMAT) \ |
+ SET_OPERAND_FORMAT_ ## FORMAT(INDEX) |
+#define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(INDEX) \ |
+ operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((INDEX) << 3)) |
+#define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(INDEX) \ |
+ operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3)) |
+#define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(INDEX) \ |
+ operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3)) |
+#define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(INDEX) \ |
+ operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3)) |
+#define CHECK_OPERAND(INDEX, REGISTER_NAME, KIND) \ |
+ ((operand_states & (0xff << ((INDEX) << 3))) == \ |
+ ((((KIND) << 5) | (REGISTER_NAME)) << ((INDEX) << 3))) |
+#define CHECK_OPERAND_R15_MODIFIED(INDEX) \ |
+ (CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_8BIT) || \ |
+ CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_RESTRICTED) || \ |
+ CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) |
+/* |
+ * Note that macroses below access operand_states variable and also rex_prefix |
+ * variable. This is to distinguish %ah from %spl, as well as %ch from %bpl. |
+ */ |
+#define CHECK_OPERAND_BP_MODIFIED(INDEX) \ |
+ ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \ |
+ CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_RESTRICTED) || \ |
+ CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) |
+#define CHECK_OPERAND_SP_MODIFIED(INDEX) \ |
+ ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \ |
+ CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_RESTRICTED) || \ |
+ CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) \ |
+/* |
+ * This is for Process?OperandsZeroExtends functions: in this case %esp or %ebp |
+ * can be written to, but %spl/%sp/%rsp or %bpl/%bp/%rbp can not be modified. |
+ */ |
+#define CHECK_OPERAND_BP_INVALID_MODIFICATION(INDEX) \ |
+ ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \ |
+ CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) |
+#define CHECK_OPERAND_SP_INVALID_MODIFICATION(INDEX) \ |
+ ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \ |
+ CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) |
+#define CHECK_OPERAND_RESTRICTED(INDEX) \ |
+ /* Take 2 bits of operand type from operand_states as *restricted_register */\ |
+ /* and also make sure operand_states denotes a register (4th bit == 0). */\ |
+ (operand_states & (0x70 << ((INDEX) << 3))) == \ |
+ (OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3))) |
+#define GET_OPERAND_NAME(INDEX) ((operand_states >> ((INDEX) << 3)) & 0x1f) |
-static INLINE void CheckAccess(ptrdiff_t instruction_begin, |
- enum OperandName base, |
- enum OperandName index, |
- uint8_t restricted_register, |
- bitmap_word *valid_targets, |
- uint32_t *instruction_info_collected) { |
+static INLINE void CheckMemoryAccess(ptrdiff_t instruction_begin, |
+ enum OperandName base, |
+ enum OperandName index, |
+ uint8_t restricted_register, |
+ bitmap_word *valid_targets, |
+ uint32_t *instruction_info_collected) { |
if ((base == REG_RIP) || (base == REG_R15) || |
(base == REG_RSP) || (base == REG_RBP)) { |
if ((index == NO_REG) || (index == REG_RIZ)) |
@@ -692,16 +781,25 @@ |
} |
} |
+static FORCEINLINE uint32_t CheckValidityOfRegularInstruction( |
+ enum OperandName restricted_register) { |
+ /* |
+ * Restricted %rsp or %rbp must be %rsp or %rbp must be restored from |
+ * zero-extension state by appropriate "special" instruction, not with |
+ * regular instruction. |
+ */ |
+ if (restricted_register == REG_RBP) |
+ return RESTRICTED_RBP_UNPROCESSED; |
+ if (restricted_register == REG_RSP) |
+ return RESTRICTED_RSP_UNPROCESSED; |
+ return 0; |
+} |
static INLINE void Process0Operands(enum OperandName *restricted_register, |
uint32_t *instruction_info_collected) { |
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
- * instruction, not with regular instruction. */ |
- if (*restricted_register == REG_RSP) { |
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (*restricted_register == REG_RBP) { |
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
+ *instruction_info_collected |= |
+ CheckValidityOfRegularInstruction(*restricted_register); |
+ /* Every instruction clears restricted register even if it is not modified. */ |
*restricted_register = NO_REG; |
} |
@@ -709,27 +807,16 @@ |
uint32_t *instruction_info_collected, |
uint8_t rex_prefix, |
uint32_t operand_states) { |
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
- * instruction, not with regular instruction. */ |
- if (*restricted_register == REG_RSP) { |
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (*restricted_register == REG_RBP) { |
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
+ *instruction_info_collected |= |
+ CheckValidityOfRegularInstruction(*restricted_register); |
+ if (CHECK_OPERAND_R15_MODIFIED(0)) |
+ *instruction_info_collected |= R15_MODIFIED; |
+ if (CHECK_OPERAND_BP_MODIFIED(0)) |
+ *instruction_info_collected |= BP_MODIFIED; |
+ if (CHECK_OPERAND_SP_MODIFIED(0)) |
+ *instruction_info_collected |= SP_MODIFIED; |
+ /* Every instruction clears restricted register even if it is not modified. */ |
*restricted_register = NO_REG; |
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= R15_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= BPL_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= SPL_MODIFIED; |
- } |
} |
static INLINE void Process1OperandZeroExtends( |
@@ -737,65 +824,34 @@ |
uint32_t *instruction_info_collected, |
uint8_t rex_prefix, |
uint32_t operand_states) { |
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
- * instruction, not with regular instruction. */ |
- if (*restricted_register == REG_RSP) { |
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (*restricted_register == REG_RBP) { |
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
+ *instruction_info_collected |= |
+ CheckValidityOfRegularInstruction(*restricted_register); |
+ /* Every instruction clears restricted register even if it is not modified. */ |
*restricted_register = NO_REG; |
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
+ if (CHECK_OPERAND_R15_MODIFIED(0)) |
*instruction_info_collected |= R15_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= BPL_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= SPL_MODIFIED; |
- /* Take 2 bits of operand type from operand_states as *restricted_register, |
- * make sure operand_states denotes a register (4th bit == 0). */ |
- } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { |
- *restricted_register = operand_states & 0x0f; |
- } |
+ if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0)) |
+ *instruction_info_collected |= BP_MODIFIED; |
+ if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0)) |
+ *instruction_info_collected |= SP_MODIFIED; |
+ if (CHECK_OPERAND_RESTRICTED(0)) |
+ *restricted_register = GET_OPERAND_NAME(0); |
} |
static INLINE void Process2Operands(enum OperandName *restricted_register, |
uint32_t *instruction_info_collected, |
uint8_t rex_prefix, |
uint32_t operand_states) { |
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
- * instruction, not with regular instruction. */ |
- if (*restricted_register == REG_RSP) { |
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (*restricted_register == REG_RBP) { |
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
+ *instruction_info_collected |= |
+ CheckValidityOfRegularInstruction(*restricted_register); |
+ if (CHECK_OPERAND_R15_MODIFIED(0) || CHECK_OPERAND_R15_MODIFIED(1)) |
+ *instruction_info_collected |= R15_MODIFIED; |
+ if (CHECK_OPERAND_BP_MODIFIED(0) || CHECK_OPERAND_BP_MODIFIED(1)) |
+ *instruction_info_collected |= BP_MODIFIED; |
+ if (CHECK_OPERAND_SP_MODIFIED(0) || CHECK_OPERAND_SP_MODIFIED(1)) |
+ *instruction_info_collected |= SP_MODIFIED; |
+ /* Every instruction clears restricted register even if it is not modified. */ |
*restricted_register = NO_REG; |
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= R15_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || |
- (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= BPL_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || |
- (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= SPL_MODIFIED; |
- } |
} |
static INLINE void Process2OperandsZeroExtends( |
@@ -803,44 +859,32 @@ |
uint32_t *instruction_info_collected, |
uint8_t rex_prefix, |
uint32_t operand_states) { |
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
- * instruction, not with regular instruction. */ |
- if (*restricted_register == REG_RSP) { |
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (*restricted_register == REG_RBP) { |
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
+ *instruction_info_collected |= |
+ CheckValidityOfRegularInstruction(*restricted_register); |
+ /* Every instruction clears restricted register even if it is not modified. */ |
*restricted_register = NO_REG; |
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
+ if (CHECK_OPERAND_R15_MODIFIED(0) || |
+ CHECK_OPERAND_R15_MODIFIED(1)) |
*instruction_info_collected |= R15_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || |
- (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= BPL_MODIFIED; |
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || |
- (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
- *instruction_info_collected |= SPL_MODIFIED; |
- /* Take 2 bits of operand type from operand_states as *restricted_register, |
- * make sure operand_states denotes a register (4th bit == 0). */ |
- } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { |
- *restricted_register = operand_states & 0x0f; |
- if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) { |
+ if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0) || |
+ CHECK_OPERAND_BP_INVALID_MODIFICATION(1)) |
+ *instruction_info_collected |= R15_MODIFIED; |
halyavin
2013/03/25 14:18:36
BP_MODIFIED
khim
2013/03/25 14:24:45
Done.
|
+ if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0) || |
+ CHECK_OPERAND_SP_INVALID_MODIFICATION(1)) |
+ *instruction_info_collected |= R15_MODIFIED; |
halyavin
2013/03/25 14:18:36
SP_MODIFIED
khim
2013/03/25 14:24:45
Done.
|
+ if (CHECK_OPERAND_RESTRICTED(0)) { |
+ *restricted_register = GET_OPERAND_NAME(0); |
+ /* |
+ * If both operands are sandboxed, the second one doesn't count. We can't |
+ * ignore it completely though, since it can modify %rsp or %rbp which must |
+ * follow special rules. In this case NaCl forbids the instruction. |
+ */ |
+ if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) |
*instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
- } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) { |
+ if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) |
*instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
- } |
- /* Take 2 bits of operand type from operand_states as *restricted_register, |
- * make sure operand_states denotes a register (12th bit == 0). */ |
- } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) { |
- *restricted_register = (operand_states & 0x0f00) >> 8; |
+ } else if (CHECK_OPERAND_RESTRICTED(1)) { |
+ *restricted_register = GET_OPERAND_NAME(1); |
} |
} |
@@ -851,7 +895,7 @@ |
static INLINE void ExpandSuperinstructionBySandboxingBytes( |
size_t sandbox_instructions_size, |
const uint8_t **instruction_begin, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
*instruction_begin -= sandbox_instructions_size; |
/* |
@@ -859,7 +903,7 @@ |
* don't need to mark the beginning of the whole "superinstruction" - that's |
* why we move start by one byte and don't change the length. |
*/ |
- UnmarkValidJumpTargets((*instruction_begin + 1 - data), |
+ UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock), |
sandbox_instructions_size, |
valid_targets); |
} |
@@ -989,11 +1033,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 3 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1026,11 +1073,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 3 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1069,11 +1119,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 4 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1112,17 +1165,21 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 4 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
-Bool ValidateChunkAMD64(const uint8_t *data, size_t size, |
+Bool ValidateChunkAMD64(const uint8_t codeblock[], |
+ size_t size, |
uint32_t options, |
const NaClCPUFeaturesX86 *cpu_features, |
ValidationCallbackFunc user_callback, |
@@ -1166,21 +1223,21 @@ |
/* |
* This option is usually used in tests: we will process the whole chunk |
* in one pass. Usually each bundle is processed separately which means |
- * instructions (and super-instructions) can not cross borders of the bundle. |
+ * instructions (and "superinstructions") can not cross borders of the bundle. |
*/ |
if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
- end_of_bundle = data + size; |
+ end_of_bundle = codeblock + size; |
else |
- end_of_bundle = data + kBundleSize; |
+ end_of_bundle = codeblock + kBundleSize; |
/* |
- * Main loop. Here we process the data array bundle-after-bundle. |
+ * Main loop. Here we process the codeblock array bundle-after-bundle. |
* Ragel-produced DFA does all the checks with one exception: direct jumps. |
* It collects the two arrays: valid_targets and jump_dests which are used |
* to test direct jumps later. |
*/ |
- for (current_position = data; |
- current_position < data + size; |
+ for (current_position = codeblock; |
+ current_position < codeblock + size; |
current_position = end_of_bundle, |
end_of_bundle = current_position + kBundleSize) { |
/* Start of the instruction being processed. */ |
@@ -1189,9 +1246,12 @@ |
const uint8_t *instruction_end; |
int current_state; |
uint32_t instruction_info_collected = 0; |
- /* Keeps one byte of information per operand in the current instruction: |
- * 2 bits for register kinds, |
- * 5 bits for register numbers (16 regs plus RIZ). */ |
+ /* |
+ * Contains register number and type of register modification (see |
+ * OperandKind enum) for each operand that is changed in the instruction. |
+ * Information about read-only and memory operands is not saved in 64-bit |
+ * mode. |
+ */ |
uint32_t operand_states = 0; |
enum OperandName base = NO_REG; |
enum OperandName index = NO_REG; |
@@ -1202,7 +1262,15 @@ |
uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; |
uint8_t vex_prefix3 = 0x00; |
+ /* |
+ * The "write init" statement causes Ragel to emit initialization code. |
+ * This should be executed once before the ragel machine is started. |
+ */ |
%% write init; |
+ /* |
+ * The "write exec" statement causes Ragel to emit the ragel machine's |
+ * execution code. |
+ */ |
%% write exec; |
/* |
@@ -1225,8 +1293,12 @@ |
* Check the direct jumps. All the targets from jump_dests must be in |
* valid_targets. |
*/ |
- result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, |
- user_callback, callback_data); |
+ result &= ProcessInvalidJumpTargets(codeblock, |
+ size, |
+ valid_targets, |
+ jump_dests, |
+ user_callback, |
+ callback_data); |
/* We only use malloc for a large code sequences */ |
if (jump_dests != &jump_dests_small) free(jump_dests); |