src/trusted/validator_ragel/validator_x86_64.rl - Issue 11000033: Move validator_x86_XX.rl out of unreviewed.

Side by Side Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.	2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.

3 * Use of this source code is governed by a BSD-style license that can be	3 * Use of this source code is governed by a BSD-style license that can be

4 * found in the LICENSE file.	4 * found in the LICENSE file.

5 */	5 */

6	6

7 /*	7 /*

8 * This is the core of amd64-mode validator. Please note that this file	8 * This is the core of amd64-mode validator. Please note that this file

9 * combines ragel machine description and C language actions. Please read	9 * combines ragel machine description and C language actions. Please read

10 * validator_internals.html first to understand how the whole thing is built:	10 * validator_internals.html first to understand how the whole thing is built:

11 * it explains how the byte sequences are constructed, what constructs like	11 * it explains how the byte sequences are constructed, what constructs like

12 * "@{}" or "REX_WRX?" mean, etc.	12 * "@{}" or "REX_WRX?" mean, etc.

13 */	13 */

14	14

15 #include <assert.h>	15 #include <assert.h>

16 #include <errno.h>	16 #include <errno.h>

17 #include <stddef.h>	17 #include <stddef.h>

18 #include <stdio.h>	18 #include <stdio.h>

19 #include <stdlib.h>	19 #include <stdlib.h>

20 #include <string.h>	20 #include <string.h>

21	21

22 #include "native_client/src/trusted/validator_ragel/bitmap.h"	22 #include "native_client/src/trusted/validator_ragel/bitmap.h"

23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h"	23 #include "native_client/src/trusted/validator_ragel/validator_internal.h"

24	24

25 %%{	25 %%{

26 machine x86_64_validator;	26 machine x86_64_validator;

27 alphtype unsigned char;	27 alphtype unsigned char;

28 variable p current_position;	28 variable p current_position;

29 variable pe end_of_bundle;	29 variable pe end_of_bundle;

30 variable eof end_of_bundle;	30 variable eof end_of_bundle;

31 variable cs current_state;	31 variable cs current_state;

32	32

33 include byte_machine "byte_machines.rl";	33 include byte_machine "byte_machines.rl";

(...skipping 28 matching lines...) Expand all Loading...
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";	62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

63 include cpuid_actions	63 include cpuid_actions

64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";	64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";

65	65

66 action check_access {	66 action check_access {

67 CheckAccess(instruction_begin - data, base, index, restricted_register,	67 CheckAccess(instruction_begin - data, base, index, restricted_register,

68 valid_targets, &instruction_info_collected);	68 valid_targets, &instruction_info_collected);

69 }	69 }

70	70

71 # Action which marks last byte as not immediate. Most 3DNow! instructions,	71 # Action which marks last byte as not immediate. Most 3DNow! instructions,

72 # some AVX and XOP instructions have this proerty. It's referenced by	72 # some AVX and XOP instructions have this property.

73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"	73 #

74 # file.	74 # This action is referenced by decode_x86_32 ragel machine in [autogenerated]

	75 # "validator_x86_64_instruction.rl" file.

75 action last_byte_is_not_immediate {	76 action last_byte_is_not_immediate {

76 instruction_info_collected \|= LAST_BYTE_IS_NOT_IMMEDIATE;	77 instruction_info_collected \|= LAST_BYTE_IS_NOT_IMMEDIATE;

77 }	78 }

78	79

79 action modifiable_instruction {	80 action modifiable_instruction {

80 instruction_info_collected \|= MODIFIABLE_INSTRUCTION;	81 instruction_info_collected \|= MODIFIABLE_INSTRUCTION;

81 }	82 }

82	83

83 action process_0_operands {	84 action process_0_operands {

84 Process0Operands(&restricted_register, &instruction_info_collected);	85 Process0Operands(&restricted_register, &instruction_info_collected);

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').	130 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').

130 0x4a 0x8d 0x6c 0x3d 0x00 \| # lea 0x00(%rbp,%r15,1),%rbp	131 0x4a 0x8d 0x6c 0x3d 0x00 \| # lea 0x00(%rbp,%r15,1),%rbp

131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp	132 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp

132 # Note: restricted_register keeps the restricted register as explained in	133 # Note: restricted_register keeps the restricted register as explained in

133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems	134 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems

134 #	135 #

135 # "Normal" instructions can not be used in a place where %rbp is restricted.	136 # "Normal" instructions can not be used in a place where %rbp is restricted.

136 # But since these instructions are "second half" of the %rbp sandboxing they	137 # But since these instructions are "second half" of the %rbp sandboxing they

137 # can be used only when %rbp is restricted.	138 # can be used only when %rbp is restricted.

138 #	139 #

139 # That is (normal instruction):	140 # Compare:

140 # mov %eax,%ebp	141 # mov %eax,%ebp

141 # mov %esi,%edi <- Error: %ebp is restricted	142 # mov %esi,%edi <- Error: %ebp is restricted

142 # vs	143 # vs

143 # mov %esi,%edi	144 # mov %esi,%edi

144 # add %r15,%rbp <- Error: %ebp is not restricted	145 # add %r15,%rbp <- Error: %ebp is not restricted

145 # vs	146 # vs

146 # mov %eax,%ebp	147 # mov %eax,%ebp

147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be	148 # add %r15,%rbp <- Ok: %rbp is restricted as it should be

148 #	149 #

149 # Check this precondition and mark the beginning of the instruction as	150 # Check this precondition and mark the beginning of the instruction as

150 # invalid jump for target.	151 # invalid jump for target.

151 @{ if (restricted_register == REG_RBP)	152 @{ if (restricted_register == REG_RBP)

	153 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */

152 instruction_info_collected \|= RESTRICTED_REGISTER_USED;	154 instruction_info_collected \|= RESTRICTED_REGISTER_USED;

153 else	155 else

	156 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */

154 instruction_info_collected \|= UNRESTRICTED_RBP_PROCESSED;	157 instruction_info_collected \|= UNRESTRICTED_RBP_PROCESSED;

155 restricted_register = NO_REG;	158 restricted_register = NO_REG;

156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);	159 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);

157 };	160 };

158	161

159 # Special %rsp modifications - the ones which don't need a sandboxing.	162 # Special %rsp modifications - the ones which don't need a sandboxing.

160 #	163 #

161 # Note that there are two different opcodes for "mov": in x86-64 there are two	164 # Note that there are two different opcodes for "mov": in x86-64 there are two

162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move	165 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move

163 # from REG field to RM or in the other direction thus there are two encodings	166 # from REG field to RM or in the other direction thus there are two encodings

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
212 instruction_info_collected \|= UNRESTRICTED_RSP_PROCESSED;	215 instruction_info_collected \|= UNRESTRICTED_RSP_PROCESSED;

213 restricted_register = NO_REG;	216 restricted_register = NO_REG;

214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);	217 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);

215 };	218 };

216	219

217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.	220 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.

218 # and $~0x1f, %eXX	221 # and $~0x1f, %eXX

219 # and RBASE, %rXX	222 # and RBASE, %rXX

220 # jmpq %rXX (or: callq %rXX)	223 # jmpq %rXX (or: callq %rXX)

221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not	224 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not

222 # just as part of the naclcall/nacljmp, but also as a standolene instruction).	225 # just as part of the naclcall/nacljmp, but also as a standalene instruction).

223 #	226 #

224 # This means that when naclcall_or_nacljmp ragel machine will be combined with	227 # This means that when naclcall_or_nacljmp ragel machine will be combined with

225 # "normal_instruction*" regular action process_1_operand_zero_extends will be	228 # "normal_instruction*" regular action process_1_operand_zero_extends will be

226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64	229 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64

227 # instruction. This action will check if %rbp/%rsp is legally modified thus	230 # instruction. This action will check if %rbp/%rsp is legally modified thus

228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.	231 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.

229 #	232 #

230 # There are number of variants present which differ by the REX prefix usage:	233 # There are number of variants present which differ by the REX prefix usage:

231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"	234 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"

232 # or "callq" is the same register and it's much simpler to do if one single	235 # or "callq" is the same register and it's much simpler to do if one single

233 # action handles only fixed number of bytes.	236 # action handles only fixed number of bytes.

234 #	237 #

235 # Additional complication arises because x86-64 contains two different "add"	238 # Additional complication arises because x86-64 contains two different "add"

236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction	239 # instruction: with "0x01" and "0x03" opcode. They differ in the direction

237 # used: both can encode "add %src_register, %dst_register", but the first one	240 # used: both can encode "add %src_register, %dst_register", but the first one

238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M	241 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M

239 # byte for the dst while last one uses field RM of the ModR/M byte for the src	242 # byte for the dst while last one uses field RM of the ModR/M byte for the src

240 # and field REG of the ModR/M byte for dst. Both should be allowed.	243 # and field REG of the ModR/M byte for dst. Both should be allowed.

241 #	244 #

242 # See AMD/Intel manual for clarification "add" instruction encoding.	245 # See AMD/Intel manual for clarification about “add” instruction encoding.

243 #	246 #

244 # REGISTER USAGE ABBREVIATIONS:	247 # REGISTER USAGE ABBREVIATIONS:

245 # E86: legacy ia32 registers (all eight: %eax to %edi)	248 # E86: legacy ia32 registers (all eight: %eax to %edi)

246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)	249 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)

247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)	250 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)

248 # R64: new amd64 registers (only seven: %r8 to %r14)	251 # R64: new amd64 registers (only seven: %r8 to %r14)

249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)	252 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)

250 naclcall_or_nacljmp =	253 naclcall_or_nacljmp =

251 # This block encodes call and jump "superinstruction" of the following form:	254 # This block encodes call and jump "superinstruction" of the following form:

252 # 0: 83 e_ e0 and $~0x1f,E86	255 # 0: 83 e_ e0 and $~0x1f,E86

(...skipping 289 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
542 # Mark the instruction as special - currently this information is used only	545 # Mark the instruction as special - currently this information is used only

543 # in tests, but in the future we may use it for dynamic code modification	546 # in tests, but in the future we may use it for dynamic code modification

544 # support.	547 # support.

545 @{	548 @{

546 instruction_info_collected \|= SPECIAL_INSTRUCTION;	549 instruction_info_collected \|= SPECIAL_INSTRUCTION;

547 };	550 };

548	551

549 # Remove special instructions which are only allowed in special cases.	552 # Remove special instructions which are only allowed in special cases.

550 normal_instruction = one_instruction - special_instruction;	553 normal_instruction = one_instruction - special_instruction;

551	554

552 # Check if call is properly aligned.	555 # Ragel machine which checks if call is properly aligned.

553 #	556 #

554 # For direct call we explicitly encode all variations. For indirect call	557 # For direct call we explicitly encode all variations. For indirect call

555 # we accept all the special instructions which ends with register-addressed	558 # we accept all the special instructions which ends with register-addressed

556 # indirect call.	559 # indirect call.

557 call_alignment =	560 call_alignment =

558 ((normal_instruction &	561 ((normal_instruction &

559 # Direct call	562 # Direct call

560 ((data16 REX_RXB? 0xe8 rel16) \|	563 ((data16 REX_RXB? 0xe8 rel16) \|

561 (REX_WRXB? 0xe8 rel32) \|	564 (REX_WRXB? 0xe8 rel32) \|

562 (data16 REXW_RXB 0xe8 rel32))) \|	565 (data16 REXW_RXB 0xe8 rel32))) \|

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
623 * the ValidateChunkAMD64 function.	626 * the ValidateChunkAMD64 function.

624 *	627 *

625 * It does not affect the case which we really care about (when code	628 * It does not affect the case which we really care about (when code

626 * is validatable), but makes it possible to detect more errors in one	629 * is validatable), but makes it possible to detect more errors in one

627 * run in tools like ncval.	630 * run in tools like ncval.

628 */	631 */

629 continue;	632 continue;

630 }	633 }

631	634

632 # This is main ragel machine: it does 99% of validation work. There are only	635 # This is main ragel machine: it does 99% of validation work. There are only

633 # one thing to do with bundle if this machine accepts the bundle:	636 # one thing to do with bundle if this ragel machine accepts the bundle:

634 # * check for the state of the restricted_register at the end of the bundle.	637 # * check for the state of the restricted_register at the end of the bundle.

635 # It's an error is %rbp or %rsp is restricted at the end of the bundle.	638 # It's an error is %rbp or %rsp is restricted at the end of the bundle.

636 # Additionally if all the bundles are fine you need to check that direct jumps	639 # Additionally if all the bundles are fine you need to check that direct jumps

637 # are corect. Thiis is done in the following way:	640 # are corect. Thiis is done in the following way:

638 # * DFA fills two arrays: valid_targets and jump_dests.	641 # * DFA fills two arrays: valid_targets and jump_dests.

639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".	642 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".

640 # All other checks are done here.	643 # All other checks are done here.

641	644

642 main := ((call_alignment \| normal_instruction \| special_instruction)	645 main := ((call_alignment \| normal_instruction \| special_instruction)

643 @end_of_instruction_cleanup)*	646 @end_of_instruction_cleanup)*

644 $!report_fatal_error;	647 $!report_fatal_error;

645	648

646 }%%	649 }%%

647	650

	651 /*

	652 * The "write data" statement causes Ragel to emit the constant static data

	653 * needed by the ragel machine.

	654 */

648 %% write data;	655 %% write data;

649	656

650 enum OperandKind {	657 enum OperandKind {

651 OPERAND_SANDBOX_IRRELEVANT = 0,	658 OPERAND_SANDBOX_IRRELEVANT = 0,

652 /*	659 /*

653 * Currently we do not distinguish 8bit and 16bit modifications from	660 * Currently we do not distinguish 8bit and 16bit modifications from

654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.	661 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.

655 *	662 *

656 * 8bit operands must be distinguished from other types because the REX prefix	663 * 8bit operands must be distinguished from other types because the REX prefix

657 * regulates the choice between %ah and %spl, as well as %ch and %bpl.	664 * regulates the choice between %ah and %spl, as well as %ch and %bpl.

(...skipping 503 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1161 free(jump_dests);	1168 free(jump_dests);

1162 free(valid_targets);	1169 free(valid_targets);

1163 errno = ENOMEM;	1170 errno = ENOMEM;

1164 return FALSE;	1171 return FALSE;

1165 }	1172 }

1166 }	1173 }

1167	1174

1168 /*	1175 /*

1169 * This option is usually used in tests: we will process the whole chunk	1176 * This option is usually used in tests: we will process the whole chunk

1170 * in one pass. Usually each bundle is processed separately which means	1177 * in one pass. Usually each bundle is processed separately which means

1171 * instructions (and super-instructions) can not cross borders of the bundle.	1178 * instructions (and "superinstructions") can not cross borders of the bundle.

1172 */	1179 */

1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)	1180 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)

1174 end_of_bundle = data + size;	1181 end_of_bundle = data + size;

1175 else	1182 else

1176 end_of_bundle = data + kBundleSize;	1183 end_of_bundle = data + kBundleSize;

1177	1184

1178 /*	1185 /*

1179 * Main loop. Here we process the data array bundle-after-bundle.	1186 * Main loop. Here we process the data array bundle-after-bundle.

1180 * Ragel-produced DFA does all the checks with one exception: direct jumps.	1187 * Ragel-produced DFA does all the checks with one exception: direct jumps.

1181 * It collects the two arrays: valid_targets and jump_dests which are used	1188 * It collects the two arrays: valid_targets and jump_dests which are used

(...skipping 15 matching lines...) Expand all Loading...
1197 uint32_t operand_states = 0;	1204 uint32_t operand_states = 0;

1198 enum OperandName base = NO_REG;	1205 enum OperandName base = NO_REG;

1199 enum OperandName index = NO_REG;	1206 enum OperandName index = NO_REG;

1200 enum OperandName restricted_register =	1207 enum OperandName restricted_register =

1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);	1208 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);

1202 uint8_t rex_prefix = FALSE;	1209 uint8_t rex_prefix = FALSE;

1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */	1210 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */

1204 uint8_t vex_prefix2 = VEX_R \| VEX_X \| VEX_B;	1211 uint8_t vex_prefix2 = VEX_R \| VEX_X \| VEX_B;

1205 uint8_t vex_prefix3 = 0x00;	1212 uint8_t vex_prefix3 = 0x00;

1206	1213

	1214 /*

	1215 * The "write init" statement causes Ragel to emit initialization code.

	1216 * This should be executed once before the ragel machine is started.

	1217 */

1207 %% write init;	1218 %% write init;

	1219 /*

	1220 * The "write exec" statement causes Ragel to emit the ragel machine's

	1221 * execution code.

	1222 */

1208 %% write exec;	1223 %% write exec;

1209	1224

1210 /*	1225 /*

1211 * Ragel DFA accepted the bundle, but we still need to make sure the last	1226 * Ragel DFA accepted the bundle, but we still need to make sure the last

1212 * instruction haven't left %rbp or %rsp in restricted state.	1227 * instruction haven't left %rbp or %rsp in restricted state.

1213 */	1228 */

1214 if (restricted_register == REG_RBP)	1229 if (restricted_register == REG_RBP)

1215 result &= user_callback(end_of_bundle, end_of_bundle,	1230 result &= user_callback(end_of_bundle, end_of_bundle,

1216 RESTRICTED_RBP_UNPROCESSED \|	1231 RESTRICTED_RBP_UNPROCESSED \|

1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &	1232 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &

(...skipping 11 matching lines...) Expand all Loading...
1229 */	1244 */

1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,	1245 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,

1231 user_callback, callback_data);	1246 user_callback, callback_data);

1232	1247

1233 /* We only use malloc for a large code sequences */	1248 /* We only use malloc for a large code sequences */

1234 if (jump_dests != &jump_dests_small) free(jump_dests);	1249 if (jump_dests != &jump_dests_small) free(jump_dests);

1235 if (valid_targets != &valid_targets_small) free(valid_targets);	1250 if (valid_targets != &valid_targets_small) free(valid_targets);

1236 if (!result) errno = EINVAL;	1251 if (!result) errno = EINVAL;

1237 return result;	1252 return result;

1238 }	1253 }

OLD	NEW

« src/trusted/validator_ragel/validator_x86_32.rl ('K') | « src/trusted/validator_ragel/validator_x86_32.rl ('k') | no next file » | no next file with comments »