OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. |
3 * Use of this source code is governed by a BSD-style license that can be | 3 * Use of this source code is governed by a BSD-style license that can be |
4 * found in the LICENSE file. | 4 * found in the LICENSE file. |
5 */ | 5 */ |
6 | 6 |
7 /* | 7 /* |
8 * This is the core of amd64-mode validator. Please note that this file | 8 * This is the core of amd64-mode validator. Please note that this file |
9 * combines ragel machine description and C language actions. Please read | 9 * combines ragel machine description and C language actions. Please read |
10 * validator_internals.html first to understand how the whole thing is built: | 10 * validator_internals.html first to understand how the whole thing is built: |
11 * it explains how the byte sequences are constructed, what constructs like | 11 * it explains how the byte sequences are constructed, what constructs like |
12 * "@{}" or "REX_WRX?" mean, etc. | 12 * "@{}" or "REX_WRX?" mean, etc. |
13 */ | 13 */ |
14 | 14 |
15 #include <assert.h> | 15 #include <assert.h> |
16 #include <errno.h> | 16 #include <errno.h> |
17 #include <stddef.h> | 17 #include <stddef.h> |
18 #include <stdio.h> | 18 #include <stdio.h> |
19 #include <stdlib.h> | 19 #include <stdlib.h> |
20 #include <string.h> | 20 #include <string.h> |
21 | 21 |
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" | 22 #include "native_client/src/trusted/validator_ragel/bitmap.h" |
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna
l.h" | 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h" |
24 | 24 |
25 %%{ | 25 %%{ |
26 machine x86_64_validator; | 26 machine x86_64_validator; |
27 alphtype unsigned char; | 27 alphtype unsigned char; |
28 variable p current_position; | 28 variable p current_position; |
29 variable pe end_of_bundle; | 29 variable pe end_of_bundle; |
30 variable eof end_of_bundle; | 30 variable eof end_of_bundle; |
31 variable cs current_state; | 31 variable cs current_state; |
32 | 32 |
33 include byte_machine "byte_machines.rl"; | 33 include byte_machine "byte_machines.rl"; |
(...skipping 28 matching lines...) Expand all Loading... |
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
63 include cpuid_actions | 63 include cpuid_actions |
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
65 | 65 |
66 action check_access { | 66 action check_access { |
67 CheckAccess(instruction_begin - data, base, index, restricted_register, | 67 CheckAccess(instruction_begin - data, base, index, restricted_register, |
68 valid_targets, &instruction_info_collected); | 68 valid_targets, &instruction_info_collected); |
69 } | 69 } |
70 | 70 |
71 # Action which marks last byte as not immediate. Most 3DNow! instructions, | 71 # Action which marks last byte as not immediate. Most 3DNow! instructions, |
72 # some AVX and XOP instructions have this proerty. It's referenced by | 72 # some AVX and XOP instructions have this property. |
73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" | 73 # |
74 # file. | 74 # This action is referenced by decode_x86_32 ragel machine in [autogenerated] |
| 75 # "validator_x86_64_instruction.rl" file. |
75 action last_byte_is_not_immediate { | 76 action last_byte_is_not_immediate { |
76 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; | 77 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; |
77 } | 78 } |
78 | 79 |
79 action modifiable_instruction { | 80 action modifiable_instruction { |
80 instruction_info_collected |= MODIFIABLE_INSTRUCTION; | 81 instruction_info_collected |= MODIFIABLE_INSTRUCTION; |
81 } | 82 } |
82 | 83 |
83 action process_0_operands { | 84 action process_0_operands { |
84 Process0Operands(&restricted_register, &instruction_info_collected); | 85 Process0Operands(&restricted_register, &instruction_info_collected); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). | 130 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). |
130 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp | 131 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp |
131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp | 132 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp |
132 # Note: restricted_register keeps the restricted register as explained in | 133 # Note: restricted_register keeps the restricted register as explained in |
133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems | 134 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems |
134 # | 135 # |
135 # "Normal" instructions can not be used in a place where %rbp is restricted. | 136 # "Normal" instructions can not be used in a place where %rbp is restricted. |
136 # But since these instructions are "second half" of the %rbp sandboxing they | 137 # But since these instructions are "second half" of the %rbp sandboxing they |
137 # can be used *only* when %rbp is restricted. | 138 # can be used *only* when %rbp is restricted. |
138 # | 139 # |
139 # That is (normal instruction): | 140 # Compare: |
140 # mov %eax,%ebp | 141 # mov %eax,%ebp |
141 # mov %esi,%edi <- Error: %ebp is restricted | 142 # mov %esi,%edi <- Error: %ebp is restricted |
142 # vs | 143 # vs |
143 # mov %esi,%edi | 144 # mov %esi,%edi |
144 # add %r15,%rbp <- Error: %ebp is *not* restricted | 145 # add %r15,%rbp <- Error: %ebp is *not* restricted |
145 # vs | 146 # vs |
146 # mov %eax,%ebp | 147 # mov %eax,%ebp |
147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be | 148 # add %r15,%rbp <- Ok: %rbp is restricted as it should be |
148 # | 149 # |
149 # Check this precondition and mark the beginning of the instruction as | 150 # Check this precondition and mark the beginning of the instruction as |
150 # invalid jump for target. | 151 # invalid jump for target. |
151 @{ if (restricted_register == REG_RBP) | 152 @{ if (restricted_register == REG_RBP) |
| 153 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */ |
152 instruction_info_collected |= RESTRICTED_REGISTER_USED; | 154 instruction_info_collected |= RESTRICTED_REGISTER_USED; |
153 else | 155 else |
| 156 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */ |
154 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; | 157 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
155 restricted_register = NO_REG; | 158 restricted_register = NO_REG; |
156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | 159 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
157 }; | 160 }; |
158 | 161 |
159 # Special %rsp modifications - the ones which don't need a sandboxing. | 162 # Special %rsp modifications - the ones which don't need a sandboxing. |
160 # | 163 # |
161 # Note that there are two different opcodes for "mov": in x86-64 there are two | 164 # Note that there are two different opcodes for "mov": in x86-64 there are two |
162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | 165 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move |
163 # from REG field to RM or in the other direction thus there are two encodings | 166 # from REG field to RM or in the other direction thus there are two encodings |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; | 215 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
213 restricted_register = NO_REG; | 216 restricted_register = NO_REG; |
214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | 217 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); |
215 }; | 218 }; |
216 | 219 |
217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. | 220 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. |
218 # and $~0x1f, %eXX | 221 # and $~0x1f, %eXX |
219 # and RBASE, %rXX | 222 # and RBASE, %rXX |
220 # jmpq *%rXX (or: callq *%rXX) | 223 # jmpq *%rXX (or: callq *%rXX) |
221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not | 224 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not |
222 # just as part of the naclcall/nacljmp, but also as a standolene instruction). | 225 # just as part of the naclcall/nacljmp, but also as a standalene instruction). |
223 # | 226 # |
224 # This means that when naclcall_or_nacljmp ragel machine will be combined with | 227 # This means that when naclcall_or_nacljmp ragel machine will be combined with |
225 # "normal_instruction*" regular action process_1_operand_zero_extends will be | 228 # "normal_instruction*" regular action process_1_operand_zero_extends will be |
226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 | 229 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 |
227 # instruction. This action will check if %rbp/%rsp is legally modified thus | 230 # instruction. This action will check if %rbp/%rsp is legally modified thus |
228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. | 231 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. |
229 # | 232 # |
230 # There are number of variants present which differ by the REX prefix usage: | 233 # There are number of variants present which differ by the REX prefix usage: |
231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" | 234 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" |
232 # or "callq" is the same register and it's much simpler to do if one single | 235 # or "callq" is the same register and it's much simpler to do if one single |
233 # action handles only fixed number of bytes. | 236 # action handles only fixed number of bytes. |
234 # | 237 # |
235 # Additional complication arises because x86-64 contains two different "add" | 238 # Additional complication arises because x86-64 contains two different "add" |
236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction | 239 # instruction: with "0x01" and "0x03" opcode. They differ in the direction |
237 # used: both can encode "add %src_register, %dst_register", but the first one | 240 # used: both can encode "add %src_register, %dst_register", but the first one |
238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M | 241 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M |
239 # byte for the dst while last one uses field RM of the ModR/M byte for the src | 242 # byte for the dst while last one uses field RM of the ModR/M byte for the src |
240 # and field REG of the ModR/M byte for dst. Both should be allowed. | 243 # and field REG of the ModR/M byte for dst. Both should be allowed. |
241 # | 244 # |
242 # See AMD/Intel manual for clarification "add" instruction encoding. | 245 # See AMD/Intel manual for clarification about “add” instruction encoding. |
243 # | 246 # |
244 # REGISTER USAGE ABBREVIATIONS: | 247 # REGISTER USAGE ABBREVIATIONS: |
245 # E86: legacy ia32 registers (all eight: %eax to %edi) | 248 # E86: legacy ia32 registers (all eight: %eax to %edi) |
246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) | 249 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) |
247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) | 250 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
248 # R64: new amd64 registers (only seven: %r8 to %r14) | 251 # R64: new amd64 registers (only seven: %r8 to %r14) |
249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) | 252 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) |
250 naclcall_or_nacljmp = | 253 naclcall_or_nacljmp = |
251 # This block encodes call and jump "superinstruction" of the following form: | 254 # This block encodes call and jump "superinstruction" of the following form: |
252 # 0: 83 e_ e0 and $~0x1f,E86 | 255 # 0: 83 e_ e0 and $~0x1f,E86 |
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
542 # Mark the instruction as special - currently this information is used only | 545 # Mark the instruction as special - currently this information is used only |
543 # in tests, but in the future we may use it for dynamic code modification | 546 # in tests, but in the future we may use it for dynamic code modification |
544 # support. | 547 # support. |
545 @{ | 548 @{ |
546 instruction_info_collected |= SPECIAL_INSTRUCTION; | 549 instruction_info_collected |= SPECIAL_INSTRUCTION; |
547 }; | 550 }; |
548 | 551 |
549 # Remove special instructions which are only allowed in special cases. | 552 # Remove special instructions which are only allowed in special cases. |
550 normal_instruction = one_instruction - special_instruction; | 553 normal_instruction = one_instruction - special_instruction; |
551 | 554 |
552 # Check if call is properly aligned. | 555 # Ragel machine which checks if call is properly aligned. |
553 # | 556 # |
554 # For direct call we explicitly encode all variations. For indirect call | 557 # For direct call we explicitly encode all variations. For indirect call |
555 # we accept all the special instructions which ends with register-addressed | 558 # we accept all the special instructions which ends with register-addressed |
556 # indirect call. | 559 # indirect call. |
557 call_alignment = | 560 call_alignment = |
558 ((normal_instruction & | 561 ((normal_instruction & |
559 # Direct call | 562 # Direct call |
560 ((data16 REX_RXB? 0xe8 rel16) | | 563 ((data16 REX_RXB? 0xe8 rel16) | |
561 (REX_WRXB? 0xe8 rel32) | | 564 (REX_WRXB? 0xe8 rel32) | |
562 (data16 REXW_RXB 0xe8 rel32))) | | 565 (data16 REXW_RXB 0xe8 rel32))) | |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
623 * the ValidateChunkAMD64 function. | 626 * the ValidateChunkAMD64 function. |
624 * | 627 * |
625 * It does not affect the case which we really care about (when code | 628 * It does not affect the case which we really care about (when code |
626 * is validatable), but makes it possible to detect more errors in one | 629 * is validatable), but makes it possible to detect more errors in one |
627 * run in tools like ncval. | 630 * run in tools like ncval. |
628 */ | 631 */ |
629 continue; | 632 continue; |
630 } | 633 } |
631 | 634 |
632 # This is main ragel machine: it does 99% of validation work. There are only | 635 # This is main ragel machine: it does 99% of validation work. There are only |
633 # one thing to do with bundle if this machine accepts the bundle: | 636 # one thing to do with bundle if this ragel machine accepts the bundle: |
634 # * check for the state of the restricted_register at the end of the bundle. | 637 # * check for the state of the restricted_register at the end of the bundle. |
635 # It's an error is %rbp or %rsp is restricted at the end of the bundle. | 638 # It's an error is %rbp or %rsp is restricted at the end of the bundle. |
636 # Additionally if all the bundles are fine you need to check that direct jumps | 639 # Additionally if all the bundles are fine you need to check that direct jumps |
637 # are corect. Thiis is done in the following way: | 640 # are corect. Thiis is done in the following way: |
638 # * DFA fills two arrays: valid_targets and jump_dests. | 641 # * DFA fills two arrays: valid_targets and jump_dests. |
639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". | 642 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". |
640 # All other checks are done here. | 643 # All other checks are done here. |
641 | 644 |
642 main := ((call_alignment | normal_instruction | special_instruction) | 645 main := ((call_alignment | normal_instruction | special_instruction) |
643 @end_of_instruction_cleanup)* | 646 @end_of_instruction_cleanup)* |
644 $!report_fatal_error; | 647 $!report_fatal_error; |
645 | 648 |
646 }%% | 649 }%% |
647 | 650 |
| 651 /* |
| 652 * The "write data" statement causes Ragel to emit the constant static data |
| 653 * needed by the ragel machine. |
| 654 */ |
648 %% write data; | 655 %% write data; |
649 | 656 |
650 enum OperandKind { | 657 enum OperandKind { |
651 OPERAND_SANDBOX_IRRELEVANT = 0, | 658 OPERAND_SANDBOX_IRRELEVANT = 0, |
652 /* | 659 /* |
653 * Currently we do not distinguish 8bit and 16bit modifications from | 660 * Currently we do not distinguish 8bit and 16bit modifications from |
654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. | 661 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. |
655 * | 662 * |
656 * 8bit operands must be distinguished from other types because the REX prefix | 663 * 8bit operands must be distinguished from other types because the REX prefix |
657 * regulates the choice between %ah and %spl, as well as %ch and %bpl. | 664 * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
(...skipping 503 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1161 free(jump_dests); | 1168 free(jump_dests); |
1162 free(valid_targets); | 1169 free(valid_targets); |
1163 errno = ENOMEM; | 1170 errno = ENOMEM; |
1164 return FALSE; | 1171 return FALSE; |
1165 } | 1172 } |
1166 } | 1173 } |
1167 | 1174 |
1168 /* | 1175 /* |
1169 * This option is usually used in tests: we will process the whole chunk | 1176 * This option is usually used in tests: we will process the whole chunk |
1170 * in one pass. Usually each bundle is processed separately which means | 1177 * in one pass. Usually each bundle is processed separately which means |
1171 * instructions (and super-instructions) can not cross borders of the bundle. | 1178 * instructions (and "superinstructions") can not cross borders of the bundle. |
1172 */ | 1179 */ |
1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) | 1180 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
1174 end_of_bundle = data + size; | 1181 end_of_bundle = data + size; |
1175 else | 1182 else |
1176 end_of_bundle = data + kBundleSize; | 1183 end_of_bundle = data + kBundleSize; |
1177 | 1184 |
1178 /* | 1185 /* |
1179 * Main loop. Here we process the data array bundle-after-bundle. | 1186 * Main loop. Here we process the data array bundle-after-bundle. |
1180 * Ragel-produced DFA does all the checks with one exception: direct jumps. | 1187 * Ragel-produced DFA does all the checks with one exception: direct jumps. |
1181 * It collects the two arrays: valid_targets and jump_dests which are used | 1188 * It collects the two arrays: valid_targets and jump_dests which are used |
(...skipping 15 matching lines...) Expand all Loading... |
1197 uint32_t operand_states = 0; | 1204 uint32_t operand_states = 0; |
1198 enum OperandName base = NO_REG; | 1205 enum OperandName base = NO_REG; |
1199 enum OperandName index = NO_REG; | 1206 enum OperandName index = NO_REG; |
1200 enum OperandName restricted_register = | 1207 enum OperandName restricted_register = |
1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); | 1208 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); |
1202 uint8_t rex_prefix = FALSE; | 1209 uint8_t rex_prefix = FALSE; |
1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | 1210 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ |
1204 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; | 1211 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; |
1205 uint8_t vex_prefix3 = 0x00; | 1212 uint8_t vex_prefix3 = 0x00; |
1206 | 1213 |
| 1214 /* |
| 1215 * The "write init" statement causes Ragel to emit initialization code. |
| 1216 * This should be executed once before the ragel machine is started. |
| 1217 */ |
1207 %% write init; | 1218 %% write init; |
| 1219 /* |
| 1220 * The "write exec" statement causes Ragel to emit the ragel machine's |
| 1221 * execution code. |
| 1222 */ |
1208 %% write exec; | 1223 %% write exec; |
1209 | 1224 |
1210 /* | 1225 /* |
1211 * Ragel DFA accepted the bundle, but we still need to make sure the last | 1226 * Ragel DFA accepted the bundle, but we still need to make sure the last |
1212 * instruction haven't left %rbp or %rsp in restricted state. | 1227 * instruction haven't left %rbp or %rsp in restricted state. |
1213 */ | 1228 */ |
1214 if (restricted_register == REG_RBP) | 1229 if (restricted_register == REG_RBP) |
1215 result &= user_callback(end_of_bundle, end_of_bundle, | 1230 result &= user_callback(end_of_bundle, end_of_bundle, |
1216 RESTRICTED_RBP_UNPROCESSED | | 1231 RESTRICTED_RBP_UNPROCESSED | |
1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & | 1232 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & |
(...skipping 11 matching lines...) Expand all Loading... |
1229 */ | 1244 */ |
1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, | 1245 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, |
1231 user_callback, callback_data); | 1246 user_callback, callback_data); |
1232 | 1247 |
1233 /* We only use malloc for a large code sequences */ | 1248 /* We only use malloc for a large code sequences */ |
1234 if (jump_dests != &jump_dests_small) free(jump_dests); | 1249 if (jump_dests != &jump_dests_small) free(jump_dests); |
1235 if (valid_targets != &valid_targets_small) free(valid_targets); | 1250 if (valid_targets != &valid_targets_small) free(valid_targets); |
1236 if (!result) errno = EINVAL; | 1251 if (!result) errno = EINVAL; |
1237 return result; | 1252 return result; |
1238 } | 1253 } |
OLD | NEW |