Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be 3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file. 4 * found in the LICENSE file.
5 */ 5 */
6 6
7 /* 7 /*
8 * This is the core of amd64-mode validator. Please note that this file 8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read 9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built: 10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like 11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc. 12 * "@{}" or "REX_WRX?" mean, etc.
13 */ 13 */
14 14
15 #include <assert.h> 15 #include <assert.h>
16 #include <errno.h> 16 #include <errno.h>
17 #include <stddef.h> 17 #include <stddef.h>
18 #include <stdio.h> 18 #include <stdio.h>
19 #include <stdlib.h> 19 #include <stdlib.h>
20 #include <string.h> 20 #include <string.h>
21 21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" 22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h" 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h"
24 24
25 %%{ 25 %%{
26 machine x86_64_validator; 26 machine x86_64_validator;
27 alphtype unsigned char; 27 alphtype unsigned char;
28 variable p current_position; 28 variable p current_position;
29 variable pe end_of_bundle; 29 variable pe end_of_bundle;
30 variable eof end_of_bundle; 30 variable eof end_of_bundle;
31 variable cs current_state; 31 variable cs current_state;
32 32
33 include byte_machine "byte_machines.rl"; 33 include byte_machine "byte_machines.rl";
(...skipping 28 matching lines...) Expand all
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; 62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
63 include cpuid_actions 63 include cpuid_actions
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; 64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
65 65
66 action check_access { 66 action check_access {
67 CheckAccess(instruction_begin - data, base, index, restricted_register, 67 CheckAccess(instruction_begin - data, base, index, restricted_register,
68 valid_targets, &instruction_info_collected); 68 valid_targets, &instruction_info_collected);
69 } 69 }
70 70
71 # Action which marks last byte as not immediate. Most 3DNow! instructions, 71 # Action which marks last byte as not immediate. Most 3DNow! instructions,
72 # some AVX and XOP instructions have this proerty. It's referenced by 72 # some AVX and XOP instructions have this property.
73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" 73 #
74 # file. 74 # This action is referenced by decode_x86_32 ragel machine in [autogenerated]
75 # "validator_x86_64_instruction.rl" file.
75 action last_byte_is_not_immediate { 76 action last_byte_is_not_immediate {
76 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; 77 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
77 } 78 }
78 79
79 action modifiable_instruction { 80 action modifiable_instruction {
80 instruction_info_collected |= MODIFIABLE_INSTRUCTION; 81 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
81 } 82 }
82 83
83 action process_0_operands { 84 action process_0_operands {
84 Process0Operands(&restricted_register, &instruction_info_collected); 85 Process0Operands(&restricted_register, &instruction_info_collected);
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). 130 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
130 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp 131 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp 132 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
132 # Note: restricted_register keeps the restricted register as explained in 133 # Note: restricted_register keeps the restricted register as explained in
133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems 134 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
134 # 135 #
135 # "Normal" instructions can not be used in a place where %rbp is restricted. 136 # "Normal" instructions can not be used in a place where %rbp is restricted.
136 # But since these instructions are "second half" of the %rbp sandboxing they 137 # But since these instructions are "second half" of the %rbp sandboxing they
137 # can be used *only* when %rbp is restricted. 138 # can be used *only* when %rbp is restricted.
138 # 139 #
139 # That is (normal instruction): 140 # Compare:
140 # mov %eax,%ebp 141 # mov %eax,%ebp
141 # mov %esi,%edi <- Error: %ebp is restricted 142 # mov %esi,%edi <- Error: %ebp is restricted
142 # vs 143 # vs
143 # mov %esi,%edi 144 # mov %esi,%edi
144 # add %r15,%rbp <- Error: %ebp is *not* restricted 145 # add %r15,%rbp <- Error: %ebp is *not* restricted
145 # vs 146 # vs
146 # mov %eax,%ebp 147 # mov %eax,%ebp
147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be 148 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
148 # 149 #
149 # Check this precondition and mark the beginning of the instruction as 150 # Check this precondition and mark the beginning of the instruction as
150 # invalid jump for target. 151 # invalid jump for target.
151 @{ if (restricted_register == REG_RBP) 152 @{ if (restricted_register == REG_RBP)
153 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */
152 instruction_info_collected |= RESTRICTED_REGISTER_USED; 154 instruction_info_collected |= RESTRICTED_REGISTER_USED;
153 else 155 else
156 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */
154 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; 157 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
155 restricted_register = NO_REG; 158 restricted_register = NO_REG;
156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 159 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
157 }; 160 };
158 161
159 # Special %rsp modifications - the ones which don't need a sandboxing. 162 # Special %rsp modifications - the ones which don't need a sandboxing.
160 # 163 #
161 # Note that there are two different opcodes for "mov": in x86-64 there are two 164 # Note that there are two different opcodes for "mov": in x86-64 there are two
162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move 165 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
163 # from REG field to RM or in the other direction thus there are two encodings 166 # from REG field to RM or in the other direction thus there are two encodings
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
212 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; 215 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
213 restricted_register = NO_REG; 216 restricted_register = NO_REG;
214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 217 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
215 }; 218 };
216 219
217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. 220 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
218 # and $~0x1f, %eXX 221 # and $~0x1f, %eXX
219 # and RBASE, %rXX 222 # and RBASE, %rXX
220 # jmpq *%rXX (or: callq *%rXX) 223 # jmpq *%rXX (or: callq *%rXX)
221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not 224 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
222 # just as part of the naclcall/nacljmp, but also as a standolene instruction). 225 # just as part of the naclcall/nacljmp, but also as a standalene instruction).
223 # 226 #
224 # This means that when naclcall_or_nacljmp ragel machine will be combined with 227 # This means that when naclcall_or_nacljmp ragel machine will be combined with
225 # "normal_instruction*" regular action process_1_operand_zero_extends will be 228 # "normal_instruction*" regular action process_1_operand_zero_extends will be
226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 229 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
227 # instruction. This action will check if %rbp/%rsp is legally modified thus 230 # instruction. This action will check if %rbp/%rsp is legally modified thus
228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. 231 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
229 # 232 #
230 # There are number of variants present which differ by the REX prefix usage: 233 # There are number of variants present which differ by the REX prefix usage:
231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" 234 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
232 # or "callq" is the same register and it's much simpler to do if one single 235 # or "callq" is the same register and it's much simpler to do if one single
233 # action handles only fixed number of bytes. 236 # action handles only fixed number of bytes.
234 # 237 #
235 # Additional complication arises because x86-64 contains two different "add" 238 # Additional complication arises because x86-64 contains two different "add"
236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction 239 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
237 # used: both can encode "add %src_register, %dst_register", but the first one 240 # used: both can encode "add %src_register, %dst_register", but the first one
238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M 241 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
239 # byte for the dst while last one uses field RM of the ModR/M byte for the src 242 # byte for the dst while last one uses field RM of the ModR/M byte for the src
240 # and field REG of the ModR/M byte for dst. Both should be allowed. 243 # and field REG of the ModR/M byte for dst. Both should be allowed.
241 # 244 #
242 # See AMD/Intel manual for clarification "add" instruction encoding. 245 # See AMD/Intel manual for clarification about “add instruction encoding.
243 # 246 #
244 # REGISTER USAGE ABBREVIATIONS: 247 # REGISTER USAGE ABBREVIATIONS:
245 # E86: legacy ia32 registers (all eight: %eax to %edi) 248 # E86: legacy ia32 registers (all eight: %eax to %edi)
246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) 249 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) 250 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
248 # R64: new amd64 registers (only seven: %r8 to %r14) 251 # R64: new amd64 registers (only seven: %r8 to %r14)
249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) 252 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
250 naclcall_or_nacljmp = 253 naclcall_or_nacljmp =
251 # This block encodes call and jump "superinstruction" of the following form: 254 # This block encodes call and jump "superinstruction" of the following form:
252 # 0: 83 e_ e0 and $~0x1f,E86 255 # 0: 83 e_ e0 and $~0x1f,E86
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after
542 # Mark the instruction as special - currently this information is used only 545 # Mark the instruction as special - currently this information is used only
543 # in tests, but in the future we may use it for dynamic code modification 546 # in tests, but in the future we may use it for dynamic code modification
544 # support. 547 # support.
545 @{ 548 @{
546 instruction_info_collected |= SPECIAL_INSTRUCTION; 549 instruction_info_collected |= SPECIAL_INSTRUCTION;
547 }; 550 };
548 551
549 # Remove special instructions which are only allowed in special cases. 552 # Remove special instructions which are only allowed in special cases.
550 normal_instruction = one_instruction - special_instruction; 553 normal_instruction = one_instruction - special_instruction;
551 554
552 # Check if call is properly aligned. 555 # Ragel machine which checks if call is properly aligned.
553 # 556 #
554 # For direct call we explicitly encode all variations. For indirect call 557 # For direct call we explicitly encode all variations. For indirect call
555 # we accept all the special instructions which ends with register-addressed 558 # we accept all the special instructions which ends with register-addressed
556 # indirect call. 559 # indirect call.
557 call_alignment = 560 call_alignment =
558 ((normal_instruction & 561 ((normal_instruction &
559 # Direct call 562 # Direct call
560 ((data16 REX_RXB? 0xe8 rel16) | 563 ((data16 REX_RXB? 0xe8 rel16) |
561 (REX_WRXB? 0xe8 rel32) | 564 (REX_WRXB? 0xe8 rel32) |
562 (data16 REXW_RXB 0xe8 rel32))) | 565 (data16 REXW_RXB 0xe8 rel32))) |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
623 * the ValidateChunkAMD64 function. 626 * the ValidateChunkAMD64 function.
624 * 627 *
625 * It does not affect the case which we really care about (when code 628 * It does not affect the case which we really care about (when code
626 * is validatable), but makes it possible to detect more errors in one 629 * is validatable), but makes it possible to detect more errors in one
627 * run in tools like ncval. 630 * run in tools like ncval.
628 */ 631 */
629 continue; 632 continue;
630 } 633 }
631 634
632 # This is main ragel machine: it does 99% of validation work. There are only 635 # This is main ragel machine: it does 99% of validation work. There are only
633 # one thing to do with bundle if this machine accepts the bundle: 636 # one thing to do with bundle if this ragel machine accepts the bundle:
634 # * check for the state of the restricted_register at the end of the bundle. 637 # * check for the state of the restricted_register at the end of the bundle.
635 # It's an error is %rbp or %rsp is restricted at the end of the bundle. 638 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
636 # Additionally if all the bundles are fine you need to check that direct jumps 639 # Additionally if all the bundles are fine you need to check that direct jumps
637 # are corect. Thiis is done in the following way: 640 # are corect. Thiis is done in the following way:
638 # * DFA fills two arrays: valid_targets and jump_dests. 641 # * DFA fills two arrays: valid_targets and jump_dests.
639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". 642 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
640 # All other checks are done here. 643 # All other checks are done here.
641 644
642 main := ((call_alignment | normal_instruction | special_instruction) 645 main := ((call_alignment | normal_instruction | special_instruction)
643 @end_of_instruction_cleanup)* 646 @end_of_instruction_cleanup)*
644 $!report_fatal_error; 647 $!report_fatal_error;
645 648
646 }%% 649 }%%
647 650
651 /*
652 * The "write data" statement causes Ragel to emit the constant static data
653 * needed by the ragel machine.
654 */
648 %% write data; 655 %% write data;
649 656
650 enum OperandKind { 657 enum OperandKind {
651 OPERAND_SANDBOX_IRRELEVANT = 0, 658 OPERAND_SANDBOX_IRRELEVANT = 0,
652 /* 659 /*
653 * Currently we do not distinguish 8bit and 16bit modifications from 660 * Currently we do not distinguish 8bit and 16bit modifications from
654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. 661 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
655 * 662 *
656 * 8bit operands must be distinguished from other types because the REX prefix 663 * 8bit operands must be distinguished from other types because the REX prefix
657 * regulates the choice between %ah and %spl, as well as %ch and %bpl. 664 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
(...skipping 503 matching lines...) Expand 10 before | Expand all | Expand 10 after
1161 free(jump_dests); 1168 free(jump_dests);
1162 free(valid_targets); 1169 free(valid_targets);
1163 errno = ENOMEM; 1170 errno = ENOMEM;
1164 return FALSE; 1171 return FALSE;
1165 } 1172 }
1166 } 1173 }
1167 1174
1168 /* 1175 /*
1169 * This option is usually used in tests: we will process the whole chunk 1176 * This option is usually used in tests: we will process the whole chunk
1170 * in one pass. Usually each bundle is processed separately which means 1177 * in one pass. Usually each bundle is processed separately which means
1171 * instructions (and super-instructions) can not cross borders of the bundle. 1178 * instructions (and "superinstructions") can not cross borders of the bundle.
1172 */ 1179 */
1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) 1180 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1174 end_of_bundle = data + size; 1181 end_of_bundle = data + size;
1175 else 1182 else
1176 end_of_bundle = data + kBundleSize; 1183 end_of_bundle = data + kBundleSize;
1177 1184
1178 /* 1185 /*
1179 * Main loop. Here we process the data array bundle-after-bundle. 1186 * Main loop. Here we process the data array bundle-after-bundle.
1180 * Ragel-produced DFA does all the checks with one exception: direct jumps. 1187 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1181 * It collects the two arrays: valid_targets and jump_dests which are used 1188 * It collects the two arrays: valid_targets and jump_dests which are used
(...skipping 15 matching lines...) Expand all
1197 uint32_t operand_states = 0; 1204 uint32_t operand_states = 0;
1198 enum OperandName base = NO_REG; 1205 enum OperandName base = NO_REG;
1199 enum OperandName index = NO_REG; 1206 enum OperandName index = NO_REG;
1200 enum OperandName restricted_register = 1207 enum OperandName restricted_register =
1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); 1208 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1202 uint8_t rex_prefix = FALSE; 1209 uint8_t rex_prefix = FALSE;
1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ 1210 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1204 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; 1211 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1205 uint8_t vex_prefix3 = 0x00; 1212 uint8_t vex_prefix3 = 0x00;
1206 1213
1214 /*
1215 * The "write init" statement causes Ragel to emit initialization code.
1216 * This should be executed once before the ragel machine is started.
1217 */
1207 %% write init; 1218 %% write init;
1219 /*
1220 * The "write exec" statement causes Ragel to emit the ragel machine's
1221 * execution code.
1222 */
1208 %% write exec; 1223 %% write exec;
1209 1224
1210 /* 1225 /*
1211 * Ragel DFA accepted the bundle, but we still need to make sure the last 1226 * Ragel DFA accepted the bundle, but we still need to make sure the last
1212 * instruction haven't left %rbp or %rsp in restricted state. 1227 * instruction haven't left %rbp or %rsp in restricted state.
1213 */ 1228 */
1214 if (restricted_register == REG_RBP) 1229 if (restricted_register == REG_RBP)
1215 result &= user_callback(end_of_bundle, end_of_bundle, 1230 result &= user_callback(end_of_bundle, end_of_bundle,
1216 RESTRICTED_RBP_UNPROCESSED | 1231 RESTRICTED_RBP_UNPROCESSED |
1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & 1232 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
(...skipping 11 matching lines...) Expand all
1229 */ 1244 */
1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, 1245 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
1231 user_callback, callback_data); 1246 user_callback, callback_data);
1232 1247
1233 /* We only use malloc for a large code sequences */ 1248 /* We only use malloc for a large code sequences */
1234 if (jump_dests != &jump_dests_small) free(jump_dests); 1249 if (jump_dests != &jump_dests_small) free(jump_dests);
1235 if (valid_targets != &valid_targets_small) free(valid_targets); 1250 if (valid_targets != &valid_targets_small) free(valid_targets);
1236 if (!result) errno = EINVAL; 1251 if (!result) errno = EINVAL;
1237 return result; 1252 return result;
1238 } 1253 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698