OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. |
3 * Use of this source code is governed by a BSD-style license that can be | 3 * Use of this source code is governed by a BSD-style license that can be |
4 * found in the LICENSE file. | 4 * found in the LICENSE file. |
5 */ | 5 */ |
6 | 6 |
7 /* | 7 /* |
8 * This is the core of ia32-mode validator. Please note that this file | 8 * This is the core of ia32-mode validator. Please note that this file |
9 * combines ragel machine description and C language actions. Please read | 9 * combines ragel machine description and C language actions. Please read |
10 * validator_internals.html first to understand how the whole thing is built: | 10 * validator_internals.html first to understand how the whole thing is built: |
11 * it explains how the byte sequences are constructed, what constructs like | 11 * it explains how the byte sequences are constructed, what constructs like |
12 * "@{}" or "REX_WRX?" mean, etc. | 12 * "@{}" or "REX_WRX?" mean, etc. |
13 */ | 13 */ |
14 | 14 |
15 #include <assert.h> | 15 #include <assert.h> |
16 #include <errno.h> | 16 #include <errno.h> |
17 #include <stddef.h> | 17 #include <stddef.h> |
18 #include <stdio.h> | 18 #include <stdio.h> |
19 #include <stdlib.h> | 19 #include <stdlib.h> |
20 #include <string.h> | 20 #include <string.h> |
21 | 21 |
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" | 22 #include "native_client/src/trusted/validator_ragel/bitmap.h" |
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna
l.h" | 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h" |
24 | 24 |
25 /* Ignore this information: it's not used by security model in IA32 mode. */ | 25 /* Ignore this information: it's not used by security model in IA32 mode. */ |
| 26 /* TODO(khim): change gen_dfa to remove needs for these lines. */ |
26 #undef GET_VEX_PREFIX3 | 27 #undef GET_VEX_PREFIX3 |
27 #define GET_VEX_PREFIX3 0 | 28 #define GET_VEX_PREFIX3 0 |
28 #undef SET_VEX_PREFIX3 | 29 #undef SET_VEX_PREFIX3 |
29 #define SET_VEX_PREFIX3(P) | 30 #define SET_VEX_PREFIX3(PREFIX_BYTE) |
30 | 31 |
31 %%{ | 32 %%{ |
32 machine x86_32_validator; | 33 machine x86_32_validator; |
33 alphtype unsigned char; | 34 alphtype unsigned char; |
34 variable p current_position; | 35 variable p current_position; |
35 variable pe end_of_bundle; | 36 variable pe end_of_bundle; |
36 variable eof end_of_bundle; | 37 variable eof end_of_bundle; |
37 variable cs current_state; | 38 variable cs current_state; |
38 | 39 |
39 include byte_machine "byte_machines.rl"; | 40 include byte_machine "byte_machines.rl"; |
(...skipping 10 matching lines...) Expand all Loading... |
50 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | 51 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
51 include immediate_fields_parsing | 52 include immediate_fields_parsing |
52 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | 53 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
53 include relative_fields_validator_actions | 54 include relative_fields_validator_actions |
54 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | 55 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
55 include relative_fields_parsing | 56 include relative_fields_parsing |
56 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | 57 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
57 include cpuid_actions | 58 include cpuid_actions |
58 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; | 59 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; |
59 | 60 |
60 # Action which marks last byte as not immediate. Most 3DNow! instructions, | |
61 # some AVX and XOP instructions have this property. It's referenced by | |
62 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" | |
63 # file. | |
64 action last_byte_is_not_immediate { | |
65 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; | |
66 } | |
67 | |
68 include decode_x86_32 "validator_x86_32_instruction.rl"; | 61 include decode_x86_32 "validator_x86_32_instruction.rl"; |
69 | 62 |
70 special_instruction = | 63 special_instruction = |
71 # and $~0x1f, %eXX call %eXX | 64 # and $~0x1f, %eXX call %eXX |
72 # vvvvvvvvvv | 65 # vvvvvvvvvv |
73 (0x83 0xe0 0xe0 0xff (0xd0|0xe0) | # naclcall/jmp %eax | 66 (0x83 0xe0 0xe0 0xff (0xd0|0xe0) | # naclcall/jmp %eax |
74 0x83 0xe1 0xe0 0xff (0xd1|0xe1) | # naclcall/jmp %ecx | 67 0x83 0xe1 0xe0 0xff (0xd1|0xe1) | # naclcall/jmp %ecx |
75 0x83 0xe2 0xe0 0xff (0xd2|0xe2) | # naclcall/jmp %edx | 68 0x83 0xe2 0xe0 0xff (0xd2|0xe2) | # naclcall/jmp %edx |
76 0x83 0xe3 0xe0 0xff (0xd3|0xe3) | # naclcall/jmp %ebx | 69 0x83 0xe3 0xe0 0xff (0xd3|0xe3) | # naclcall/jmp %ebx |
77 0x83 0xe4 0xe0 0xff (0xd4|0xe4) | # naclcall/jmp %esp | 70 0x83 0xe4 0xe0 0xff (0xd4|0xe4) | # naclcall/jmp %esp |
78 0x83 0xe5 0xe0 0xff (0xd5|0xe5) | # naclcall/jmp %ebp | 71 0x83 0xe5 0xe0 0xff (0xd5|0xe5) | # naclcall/jmp %ebp |
79 0x83 0xe6 0xe0 0xff (0xd6|0xe6) | # naclcall/jmp %esi | 72 0x83 0xe6 0xe0 0xff (0xd6|0xe6) | # naclcall/jmp %esi |
80 0x83 0xe7 0xe0 0xff (0xd7|0xe7)) # naclcall/jmp %edi | 73 0x83 0xe7 0xe0 0xff (0xd7|0xe7)) # naclcall/jmp %edi |
81 # ^^^^ ^^^^ | 74 # ^^^^ ^^^^ |
82 # and $~0x1f, %eXX jmp %eXX | 75 # and $~0x1f, %eXX jmp %eXX |
83 @{ | 76 @{ |
84 UnmarkValidJumpTarget((current_position - data) - 1, valid_targets); | 77 UnmarkValidJumpTarget((current_position - codeblock) - 1, valid_targets); |
85 instruction_begin -= 3; | 78 instruction_begin -= 3; |
86 instruction_info_collected |= SPECIAL_INSTRUCTION; | 79 instruction_info_collected |= SPECIAL_INSTRUCTION; |
87 } | | 80 } | |
88 (0x65 0xa1 (0x00|0x04) 0x00 0x00 0x00 | # mov %gs:0x0/0x4,%eax | 81 (0x65 0xa1 (0x00|0x04) 0x00 0x00 0x00 | # mov %gs:0x0/0x4,%eax |
89 0x65 0x8b (0x05|0x0d|0x015|0x1d|0x25|0x2d|0x35|0x3d) | 82 0x65 0x8b (0x05|0x0d|0x015|0x1d|0x25|0x2d|0x35|0x3d) |
90 (0x00|0x04) 0x00 0x00 0x00); # mov %gs:0x0/0x4,%reg | 83 (0x00|0x04) 0x00 0x00 0x00); # mov %gs:0x0/0x4,%reg |
91 | 84 |
92 # Check if call is properly aligned | 85 # For direct call we explicitly encode all variations. |
93 # | 86 direct_call = (data16 0xe8 rel16) | (0xe8 rel32); |
94 # For direct call we explicitly encode all variations. For indirect call | 87 |
95 # we accept all the special instructions which ends with register-addressed | 88 # For indirect call we accept only near register-addressed indirect call. |
96 # indirect call. | 89 indirect_call_register = data16? 0xff (opcode_2 & modrm_registers); |
| 90 |
| 91 # Ragel machine that accepts one call instruction or call superinstruction and |
| 92 # checks if call is properly aligned. |
97 call_alignment = | 93 call_alignment = |
98 ((one_instruction & | 94 ((one_instruction & direct_call) | |
99 # Direct call | 95 # For indirect calls we accept all the special instructions which ends with |
100 ((data16 0xe8 rel16) | | 96 # register-addressed indirect call. |
101 (0xe8 rel32))) | | 97 (special_instruction & (any* indirect_call_register))) |
102 (special_instruction & | |
103 # Indirect call | |
104 (any* data16? 0xff ((opcode_2 | opcode_3) any* & | |
105 modrm_registers)))) | |
106 # Call instruction must aligned to the end of bundle. Previously this was | 98 # Call instruction must aligned to the end of bundle. Previously this was |
107 # strict requirement, today it's just warning to aid with debugging. | 99 # strict requirement, today it's just warning to aid with debugging. |
108 @{ | 100 @{ |
109 if (((current_position - data) & kBundleMask) != kBundleMask) | 101 if (((current_position - codeblock) & kBundleMask) != kBundleMask) |
110 instruction_info_collected |= BAD_CALL_ALIGNMENT; | 102 instruction_info_collected |= BAD_CALL_ALIGNMENT; |
111 }; | 103 }; |
112 | 104 |
113 # This action calls user's callback (if needed) and cleans up validator's | 105 # This action calls user callback (if needed) and cleans up validator |
114 # internal state. | 106 # internal state. |
115 # | 107 # |
116 # We call the user callback if there are validation errors or if the | 108 # We call the user callback either on validation errors or on every |
117 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. | 109 # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option. |
118 # | 110 # |
119 # After that we move instruction_begin and clean all the variables which | 111 # After that we move instruction_begin and clean all the variables which |
120 # only used in the processing of a single instruction (prefixes, operand | 112 # are only used in the processing of a single instruction (here it's just |
121 # states and instruction_info_collected). | 113 # instruction_info_collected, there are more state in x86-64 case). |
122 action end_of_instruction_cleanup { | 114 action end_of_instruction_cleanup { |
123 /* Mark start of this instruction as a valid target for jump. */ | 115 /* Mark start of this instruction as a valid target for jump. */ |
124 MarkValidJumpTarget(instruction_begin - data, valid_targets); | 116 MarkValidJumpTarget(instruction_begin - codeblock, valid_targets); |
125 | 117 |
126 /* Call user-supplied callback. */ | 118 /* Call user-supplied callback. */ |
127 instruction_end = current_position + 1; | 119 instruction_end = current_position + 1; |
128 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || | 120 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || |
129 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { | 121 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { |
130 result &= user_callback(instruction_begin, instruction_end, | 122 result &= user_callback(instruction_begin, instruction_end, |
131 instruction_info_collected, callback_data); | 123 instruction_info_collected, callback_data); |
132 } | 124 } |
133 | 125 |
134 /* On successful match the instruction_begin must point to the next byte | 126 /* |
135 * to be able to report the new offset as the start of instruction | 127 * We may set instruction_begin at the first byte of the instruction instead |
136 * causing error. */ | 128 * of here but in the case of incorrect one byte instructions user callback |
| 129 * may be called before instruction_begin is set. |
| 130 */ |
137 instruction_begin = instruction_end; | 131 instruction_begin = instruction_end; |
138 | 132 |
139 /* Clear variables (well, one variable currently). */ | 133 /* Clear variables (well, one variable currently). */ |
140 instruction_info_collected = 0; | 134 instruction_info_collected = 0; |
141 } | 135 } |
142 | 136 |
143 # This action reports fatal error detected by DFA. | 137 # This action reports fatal error detected by DFA. |
144 action report_fatal_error { | 138 action report_fatal_error { |
145 result &= user_callback(instruction_begin, current_position, | 139 result &= user_callback(instruction_begin, current_position, |
146 UNRECOGNIZED_INSTRUCTION, callback_data); | 140 UNRECOGNIZED_INSTRUCTION, callback_data); |
147 /* | 141 /* |
148 * Process the next bundle: "continue" here is for the "for" cycle in | 142 * Process the next bundle: "continue" here is for the "for" cycle in |
149 * the ValidateChunkIA32 function. | 143 * the ValidateChunkIA32 function. |
150 * | 144 * |
151 * It does not affect the case which we really care about (when code | 145 * It does not affect the case which we really care about (when code |
152 * is validatable), but makes it possible to detect more errors in one | 146 * is validatable), but makes it possible to detect more errors in one |
153 * run in tools like ncval. | 147 * run in tools like ncval. |
154 */ | 148 */ |
155 continue; | 149 continue; |
156 } | 150 } |
157 | 151 |
158 # This is main ragel machine: it does 99% of validation work. There are only | 152 # This is main ragel machine: it does 99% of validation work. There are only |
159 # one thing to do if this machine accepts the bundles - check that direct | 153 # one thing to do if this ragel machine accepts the bundles - check that |
160 # jumps are correct. This is done in the following way: | 154 # direct jumps are correct. This is done in the following way: |
161 # * DFA fills two arrays: valid_targets and jump_dests. | 155 # * DFA fills two arrays: valid_targets and jump_dests. |
162 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". | 156 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". |
163 # All other checks are done here. | 157 # All other checks are done here. |
164 main := ((call_alignment | one_instruction | special_instruction) | 158 main := ((call_alignment | one_instruction | special_instruction) |
165 @end_of_instruction_cleanup)* | 159 @end_of_instruction_cleanup)* |
166 $!report_fatal_error; | 160 $!report_fatal_error; |
167 | 161 |
168 }%% | 162 }%% |
169 | 163 |
| 164 /* |
| 165 * The "write data" statement causes Ragel to emit the constant static data |
| 166 * needed by the ragel machine. |
| 167 */ |
170 %% write data; | 168 %% write data; |
171 | 169 |
172 | 170 Bool ValidateChunkIA32(const uint8_t codeblock[], |
173 Bool ValidateChunkIA32(const uint8_t *data, size_t size, | 171 size_t size, |
174 uint32_t options, | 172 uint32_t options, |
175 const NaClCPUFeaturesX86 *cpu_features, | 173 const NaClCPUFeaturesX86 *cpu_features, |
176 ValidationCallbackFunc user_callback, | 174 ValidationCallbackFunc user_callback, |
177 void *callback_data) { | 175 void *callback_data) { |
178 bitmap_word valid_targets_small; | 176 bitmap_word valid_targets_small; |
179 bitmap_word jump_dests_small; | 177 bitmap_word jump_dests_small; |
180 bitmap_word *valid_targets; | 178 bitmap_word *valid_targets; |
181 bitmap_word *jump_dests; | 179 bitmap_word *jump_dests; |
182 const uint8_t *current_position; | 180 const uint8_t *current_position; |
183 const uint8_t *end_of_bundle; | 181 const uint8_t *end_of_bundle; |
(...skipping 15 matching lines...) Expand all Loading... |
199 free(jump_dests); | 197 free(jump_dests); |
200 free(valid_targets); | 198 free(valid_targets); |
201 errno = ENOMEM; | 199 errno = ENOMEM; |
202 return FALSE; | 200 return FALSE; |
203 } | 201 } |
204 } | 202 } |
205 | 203 |
206 /* | 204 /* |
207 * This option is usually used in tests: we will process the whole chunk | 205 * This option is usually used in tests: we will process the whole chunk |
208 * in one pass. Usually each bundle is processed separately which means | 206 * in one pass. Usually each bundle is processed separately which means |
209 * instructions (and super-instructions) can not cross borders of the bundle. | 207 * instructions (and "superinstructions") can not cross borders of the bundle. |
210 */ | 208 */ |
211 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) | 209 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
212 end_of_bundle = data + size; | 210 end_of_bundle = codeblock + size; |
213 else | 211 else |
214 end_of_bundle = data + kBundleSize; | 212 end_of_bundle = codeblock + kBundleSize; |
215 | 213 |
216 /* | 214 /* |
217 * Main loop. Here we process the data array bundle-after-bundle. | 215 * Main loop. Here we process the data array bundle-after-bundle. |
218 * Ragel-produced DFA does all the checks with one exception: direct jumps. | 216 * Ragel-produced DFA does all the checks with one exception: direct jumps. |
219 * It collects the two arrays: valid_targets and jump_dests which are used | 217 * It collects the two arrays: valid_targets and jump_dests which are used |
220 * to test direct jumps later. | 218 * to test direct jumps later. |
221 */ | 219 */ |
222 for (current_position = data; | 220 for (current_position = codeblock; |
223 current_position < data + size; | 221 current_position < codeblock + size; |
224 current_position = end_of_bundle, | 222 current_position = end_of_bundle, |
225 end_of_bundle = current_position + kBundleSize) { | 223 end_of_bundle = current_position + kBundleSize) { |
226 /* Start of the instruction being processed. */ | 224 /* Start of the instruction being processed. */ |
227 const uint8_t *instruction_begin = current_position; | 225 const uint8_t *instruction_begin = current_position; |
228 /* Only used locally in the end_of_instruction_cleanup action. */ | 226 /* Only used locally in the end_of_instruction_cleanup action. */ |
229 const uint8_t *instruction_end; | 227 const uint8_t *instruction_end; |
230 uint32_t instruction_info_collected = 0; | 228 uint32_t instruction_info_collected = 0; |
231 int current_state; | 229 int current_state; |
232 | 230 |
| 231 /* |
| 232 * The "write init" statement causes Ragel to emit initialization code. |
| 233 * This should be executed once before the ragel machine is started. |
| 234 */ |
233 %% write init; | 235 %% write init; |
| 236 /* |
| 237 * The "write exec" statement causes Ragel to emit the ragel machine's |
| 238 * execution code. |
| 239 */ |
234 %% write exec; | 240 %% write exec; |
235 } | 241 } |
236 | 242 |
237 /* | 243 /* |
238 * Check the direct jumps. All the targets from jump_dests must be in | 244 * Check the direct jumps. All the targets from jump_dests must be in |
239 * valid_targets. | 245 * valid_targets. |
240 */ | 246 */ |
241 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, | 247 result &= ProcessInvalidJumpTargets(codeblock, |
242 user_callback, callback_data); | 248 size, |
| 249 valid_targets, |
| 250 jump_dests, |
| 251 user_callback, |
| 252 callback_data); |
243 | 253 |
244 /* We only use malloc for a large code sequences */ | 254 /* We only use malloc for a large code sequences */ |
245 if (jump_dests != &jump_dests_small) free(jump_dests); | 255 if (jump_dests != &jump_dests_small) free(jump_dests); |
246 if (valid_targets != &valid_targets_small) free(valid_targets); | 256 if (valid_targets != &valid_targets_small) free(valid_targets); |
247 if (!result) errno = EINVAL; | 257 if (!result) errno = EINVAL; |
248 return result; | 258 return result; |
249 } | 259 } |
OLD | NEW |