Index: src/trusted/validator_ragel/validator_x86_64.rl |
=================================================================== |
--- src/trusted/validator_ragel/validator_x86_64.rl (revision 10976) |
+++ src/trusted/validator_ragel/validator_x86_64.rl (working copy) |
@@ -20,7 +20,7 @@ |
#include <string.h> |
#include "native_client/src/trusted/validator_ragel/bitmap.h" |
-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h" |
+#include "native_client/src/trusted/validator_ragel/validator_internal.h" |
%%{ |
machine x86_64_validator; |
@@ -64,18 +64,14 @@ |
"native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
action check_access { |
- CheckAccess(instruction_begin - data, base, index, restricted_register, |
- valid_targets, &instruction_info_collected); |
+ CheckAccess(instruction_begin - codeblock, |
halyavin
2013/03/20 09:07:22
CheckMemoryAccess?
khim
2013/03/21 14:38:17
Done.
|
+ base, |
+ index, |
+ restricted_register, |
+ valid_targets, |
+ &instruction_info_collected); |
} |
- # Action which marks last byte as not immediate. Most 3DNow! instructions, |
- # some AVX and XOP instructions have this proerty. It's referenced by |
- # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" |
- # file. |
- action last_byte_is_not_immediate { |
- instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; |
- } |
- |
action modifiable_instruction { |
instruction_info_collected |= MODIFIABLE_INSTRUCTION; |
} |
@@ -136,7 +132,7 @@ |
# But since these instructions are "second half" of the %rbp sandboxing they |
# can be used *only* when %rbp is restricted. |
# |
- # That is (normal instruction): |
+ # Compare: |
# mov %eax,%ebp |
# mov %esi,%edi <- Error: %ebp is restricted |
# vs |
@@ -149,11 +145,13 @@ |
# Check this precondition and mark the beginning of the instruction as |
# invalid jump for target. |
@{ if (restricted_register == REG_RBP) |
+ /* RESTRICTED_REGISTER_USED is informational flag used in tests. */ |
instruction_info_collected |= RESTRICTED_REGISTER_USED; |
else |
+ /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */ |
instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
restricted_register = NO_REG; |
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
}; |
# Special %rsp modifications - the ones which don't need a sandboxing. |
@@ -211,7 +209,7 @@ |
else |
instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
restricted_register = NO_REG; |
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
+ UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
}; |
# naclcall or nacljmp. These are three-instruction indirection-jump sequences. |
@@ -219,7 +217,7 @@ |
# and RBASE, %rXX |
# jmpq *%rXX (or: callq *%rXX) |
# Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not |
- # just as part of the naclcall/nacljmp, but also as a standolene instruction). |
+ # just as part of the naclcall/nacljmp, but also as a standalene instruction). |
halyavin
2013/03/20 09:07:22
standalone
khim
2013/03/21 14:38:17
Done.
|
# |
# This means that when naclcall_or_nacljmp ragel machine will be combined with |
# "normal_instruction*" regular action process_1_operand_zero_extends will be |
@@ -239,7 +237,7 @@ |
# byte for the dst while last one uses field RM of the ModR/M byte for the src |
# and field REG of the ModR/M byte for dst. Both should be allowed. |
# |
- # See AMD/Intel manual for clarification "add" instruction encoding. |
+ # See AMD/Intel manual for clarification about “add” instruction encoding. |
halyavin
2013/03/20 09:07:22
why are you adding unicode quotes?
khim
2013/03/21 14:38:17
Fixed.
|
# |
# REGISTER USAGE ABBREVIATIONS: |
# E86: legacy ia32 registers (all eight: %eax to %edi) |
@@ -266,8 +264,10 @@ |
(REX_WRX? 0xff b_11_100_xxx))) |
@{ |
halyavin
2013/03/20 09:07:22
where instruction_begin points to in this action?
khim
2013/03/21 14:38:17
It points to the start of the last instruction, ob
|
ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -288,8 +288,10 @@ |
(REX_WRX? 0xff b_11_100_xxx))) |
@{ |
ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -327,8 +329,10 @@ |
(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
@{ |
ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
} | |
# This block encodes call and jump "superinstruction" of the following form: |
@@ -366,8 +370,10 @@ |
(b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
@{ |
ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, |
- &instruction_begin, current_position, |
- data, valid_targets); |
+ &instruction_begin, |
+ current_position, |
+ codeblock, |
+ valid_targets); |
}; |
# EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand |
@@ -434,7 +440,10 @@ |
string_instruction_rsi_no_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 2 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
@@ -442,7 +451,10 @@ |
string_instruction_rsi_no_rdi |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 3 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
# "Superinstruction" which includes %rdi sandboxing. |
@@ -460,7 +472,10 @@ |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 2 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
halyavin
2013/03/20 09:07:22
what this dot means?
khim
2013/03/21 14:38:17
Concatenation. Removed since apparently it makes c
|
@@ -468,7 +483,10 @@ |
(string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); |
+ 3 /* mov */ + 4 /* lea */, |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
@@ -491,7 +509,9 @@ |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
(((0x89 | 0x8b) 0xf6 # mov %esi,%esi |
@@ -508,7 +528,9 @@ |
ExpandSuperinstructionBySandboxingBytes( |
2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ |
/* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
} | |
REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
@@ -519,7 +541,9 @@ |
@{ |
ExpandSuperinstructionBySandboxingBytes( |
3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, |
- &instruction_begin, data, valid_targets); |
+ &instruction_begin, |
+ codeblock, |
+ valid_targets); |
}; |
# All the "special" instructions (== instructions which obey non-standard |
@@ -549,21 +573,21 @@ |
# Remove special instructions which are only allowed in special cases. |
normal_instruction = one_instruction - special_instruction; |
- # Check if call is properly aligned. |
- # |
- # For direct call we explicitly encode all variations. For indirect call |
- # we accept all the special instructions which ends with register-addressed |
- # indirect call. |
+ # For direct call we explicitly encode all variations. |
+ direct_call = (data16 REX_RXB? 0xe8 rel16) | |
+ (REX_WRXB? 0xe8 rel32) | |
+ (data16 REXW_RXB 0xe8 rel32); |
+ |
+ # For indirect call we accept only near register-addressed indirect call. |
+ indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers); |
+ |
+ # Ragel machine that accepts one call instruction or call superinstruction and |
+ # checks if call is properly aligned. |
call_alignment = |
- ((normal_instruction & |
- # Direct call |
- ((data16 REX_RXB? 0xe8 rel16) | |
- (REX_WRXB? 0xe8 rel32) | |
- (data16 REXW_RXB 0xe8 rel32))) | |
- (special_instruction & |
- # Indirect call |
- (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & |
- modrm_registers)))) |
+ ((normal_instruction & direct_call) |
+ # For indirect calls we accept all the special instructions which ends with |
+ # register-addressed indirect call. |
+ (special_instruction & (any* indirect_call_register))) |
# Call instruction must aligned to the end of bundle. Previously this was |
# strict requirement, today it's just warning to aid with debugging. |
@{ |
@@ -580,6 +604,15 @@ |
# After that we move instruction_begin and clean all the variables which |
# only used in the processing of a single instruction (prefixes, operand |
# states and instruction_info_collected). |
halyavin
2013/03/20 09:07:22
Comment duplication.
khim
2013/03/21 14:38:17
Oops. Fixed.
|
+ # This action calls users callback (if needed) and cleans up validators |
+ # internal state. |
+ # |
+ # We call the user callback either on validation errors or on every |
+ # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option. |
+ # |
+ # After that we move instruction_begin and clean all the variables which |
+ # are only used in the processing of a single instruction (prefixes, operand |
+ # states and instruction_info_collected). |
action end_of_instruction_cleanup { |
/* Call user-supplied callback. */ |
instruction_end = current_position + 1; |
@@ -601,7 +634,7 @@ |
* Note: we mark start of the next instruction here, not start of the |
* current one because memory access check should be able to clear this |
* bit when restricted register is used. */ |
- MarkValidJumpTarget(instruction_begin - data, valid_targets); |
+ MarkValidJumpTarget(instruction_begin - codeblock, valid_targets); |
/* Clear variables. */ |
instruction_info_collected = 0; |
@@ -630,7 +663,7 @@ |
} |
# This is main ragel machine: it does 99% of validation work. There are only |
- # one thing to do with bundle if this machine accepts the bundle: |
+ # one thing to do with bundle if this ragel machine accepts the bundle: |
# * check for the state of the restricted_register at the end of the bundle. |
# It's an error is %rbp or %rsp is restricted at the end of the bundle. |
# Additionally if all the bundles are fine you need to check that direct jumps |
@@ -645,6 +678,10 @@ |
}%% |
+/* |
+ * The "write data" statement causes Ragel to emit the constant static data |
+ * needed by the ragel machine. |
+ */ |
%% write data; |
enum OperandKind { |
@@ -853,7 +890,7 @@ |
static INLINE void ExpandSuperinstructionBySandboxingBytes( |
size_t sandbox_instructions_size, |
const uint8_t **instruction_begin, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
*instruction_begin -= sandbox_instructions_size; |
/* |
@@ -861,7 +898,7 @@ |
* don't need to mark the beginning of the whole "superinstruction" - that's |
* why we move start by one byte and don't change the length. |
*/ |
- UnmarkValidJumpTargets((*instruction_begin + 1 - data), |
+ UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock), |
sandbox_instructions_size, |
valid_targets); |
} |
@@ -991,11 +1028,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 3 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1028,11 +1068,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 3 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1071,11 +1114,14 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 4 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
@@ -1114,17 +1160,21 @@ |
uint32_t *instruction_info_collected, |
const uint8_t **instruction_begin, |
const uint8_t *current_position, |
- const uint8_t *data, |
+ const uint8_t codeblock[], |
bitmap_word *valid_targets) { |
if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
ExpandSuperinstructionBySandboxingBytes( |
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); |
+ 4 /* and */ + 3 /* add */, |
+ instruction_begin, |
+ codeblock, |
+ valid_targets); |
else |
*instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
} |
-Bool ValidateChunkAMD64(const uint8_t *data, size_t size, |
+Bool ValidateChunkAMD64(const uint8_t codeblock[], |
+ size_t size, |
uint32_t options, |
const NaClCPUFeaturesX86 *cpu_features, |
ValidationCallbackFunc user_callback, |
@@ -1168,21 +1218,21 @@ |
/* |
* This option is usually used in tests: we will process the whole chunk |
* in one pass. Usually each bundle is processed separately which means |
- * instructions (and super-instructions) can not cross borders of the bundle. |
+ * instructions (and "superinstructions") can not cross borders of the bundle. |
*/ |
if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
- end_of_bundle = data + size; |
+ end_of_bundle = codeblock + size; |
else |
- end_of_bundle = data + kBundleSize; |
+ end_of_bundle = codeblock + kBundleSize; |
/* |
- * Main loop. Here we process the data array bundle-after-bundle. |
+ * Main loop. Here we process the codeblock array bundle-after-bundle. |
* Ragel-produced DFA does all the checks with one exception: direct jumps. |
* It collects the two arrays: valid_targets and jump_dests which are used |
* to test direct jumps later. |
*/ |
- for (current_position = data; |
- current_position < data + size; |
+ for (current_position = codeblock; |
+ current_position < codeblock + size; |
current_position = end_of_bundle, |
end_of_bundle = current_position + kBundleSize) { |
/* Start of the instruction being processed. */ |
@@ -1204,7 +1254,15 @@ |
uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; |
uint8_t vex_prefix3 = 0x00; |
+ /* |
+ * The "write init" statement causes Ragel to emit initialization code. |
+ * This should be executed once before the ragel machine is started. |
+ */ |
%% write init; |
+ /* |
+ * The "write exec" statement causes Ragel to emit the ragel machine's |
+ * execution code. |
+ */ |
%% write exec; |
/* |
@@ -1227,8 +1285,12 @@ |
* Check the direct jumps. All the targets from jump_dests must be in |
* valid_targets. |
*/ |
- result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, |
- user_callback, callback_data); |
+ result &= ProcessInvalidJumpTargets(codeblock, |
+ size, |
+ valid_targets, |
+ jump_dests, |
+ user_callback, |
+ callback_data); |
/* We only use malloc for a large code sequences */ |
if (jump_dests != &jump_dests_small) free(jump_dests); |