OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. |
3 * Use of this source code is governed by a BSD-style license that can be | 3 * Use of this source code is governed by a BSD-style license that can be |
4 * found in the LICENSE file. | 4 * found in the LICENSE file. |
5 */ | 5 */ |
6 | 6 |
7 /* | 7 /* |
8 * This is the core of amd64-mode validator. Please note that this file | 8 * This is the core of amd64-mode validator. Please note that this file |
9 * combines ragel machine description and C language actions. Please read | 9 * combines ragel machine description and C language actions. Please read |
10 * validator_internals.html first to understand how the whole thing is built: | 10 * validator_internals.html first to understand how the whole thing is built: |
11 * it explains how the byte sequences are constructed, what constructs like | 11 * it explains how the byte sequences are constructed, what constructs like |
12 * "@{}" or "REX_WRX?" mean, etc. | 12 * "@{}" or "REX_WRX?" mean, etc. |
13 */ | 13 */ |
14 | 14 |
15 #include <assert.h> | 15 #include <assert.h> |
16 #include <errno.h> | 16 #include <errno.h> |
17 #include <stddef.h> | 17 #include <stddef.h> |
18 #include <stdio.h> | 18 #include <stdio.h> |
19 #include <stdlib.h> | 19 #include <stdlib.h> |
20 #include <string.h> | 20 #include <string.h> |
21 | 21 |
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" | 22 #include "native_client/src/trusted/validator_ragel/bitmap.h" |
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h" | 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h" |
24 | 24 |
25 %%{ | 25 %%{ |
26 machine x86_64_validator; | 26 machine x86_64_validator; |
27 alphtype unsigned char; | 27 alphtype unsigned char; |
28 variable p current_position; | 28 variable p current_position; |
29 variable pe end_of_bundle; | 29 variable pe end_of_bundle; |
30 variable eof end_of_bundle; | 30 variable eof end_of_bundle; |
31 variable cs current_state; | 31 variable cs current_state; |
32 | 32 |
33 include byte_machine "byte_machines.rl"; | 33 include byte_machine "byte_machines.rl"; |
(...skipping 23 matching lines...) Expand all Loading... | |
57 include immediate_fields_parsing_amd64 | 57 include immediate_fields_parsing_amd64 |
58 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 58 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
59 include relative_fields_validator_actions | 59 include relative_fields_validator_actions |
60 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 60 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
61 include relative_fields_parsing | 61 include relative_fields_parsing |
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
63 include cpuid_actions | 63 include cpuid_actions |
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | 64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; |
65 | 65 |
66 action check_access { | 66 action check_access { |
67 CheckAccess(instruction_begin - data, base, index, restricted_register, | 67 CheckAccess(instruction_begin - codeblock, |
halyavin
2013/03/20 09:07:22
CheckMemoryAccess?
khim
2013/03/21 14:38:17
Done.
| |
68 valid_targets, &instruction_info_collected); | 68 base, |
69 } | 69 index, |
70 | 70 restricted_register, |
71 # Action which marks last byte as not immediate. Most 3DNow! instructions, | 71 valid_targets, |
72 # some AVX and XOP instructions have this proerty. It's referenced by | 72 &instruction_info_collected); |
73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" | |
74 # file. | |
75 action last_byte_is_not_immediate { | |
76 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; | |
77 } | 73 } |
78 | 74 |
79 action modifiable_instruction { | 75 action modifiable_instruction { |
80 instruction_info_collected |= MODIFIABLE_INSTRUCTION; | 76 instruction_info_collected |= MODIFIABLE_INSTRUCTION; |
81 } | 77 } |
82 | 78 |
83 action process_0_operands { | 79 action process_0_operands { |
84 Process0Operands(&restricted_register, &instruction_info_collected); | 80 Process0Operands(&restricted_register, &instruction_info_collected); |
85 } | 81 } |
86 action process_1_operand { | 82 action process_1_operand { |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). | 125 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). |
130 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp | 126 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp |
131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp | 127 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp |
132 # Note: restricted_register keeps the restricted register as explained in | 128 # Note: restricted_register keeps the restricted register as explained in |
133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems | 129 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems |
134 # | 130 # |
135 # "Normal" instructions can not be used in a place where %rbp is restricted. | 131 # "Normal" instructions can not be used in a place where %rbp is restricted. |
136 # But since these instructions are "second half" of the %rbp sandboxing they | 132 # But since these instructions are "second half" of the %rbp sandboxing they |
137 # can be used *only* when %rbp is restricted. | 133 # can be used *only* when %rbp is restricted. |
138 # | 134 # |
139 # That is (normal instruction): | 135 # Compare: |
140 # mov %eax,%ebp | 136 # mov %eax,%ebp |
141 # mov %esi,%edi <- Error: %ebp is restricted | 137 # mov %esi,%edi <- Error: %ebp is restricted |
142 # vs | 138 # vs |
143 # mov %esi,%edi | 139 # mov %esi,%edi |
144 # add %r15,%rbp <- Error: %ebp is *not* restricted | 140 # add %r15,%rbp <- Error: %ebp is *not* restricted |
145 # vs | 141 # vs |
146 # mov %eax,%ebp | 142 # mov %eax,%ebp |
147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be | 143 # add %r15,%rbp <- Ok: %rbp is restricted as it should be |
148 # | 144 # |
149 # Check this precondition and mark the beginning of the instruction as | 145 # Check this precondition and mark the beginning of the instruction as |
150 # invalid jump for target. | 146 # invalid jump for target. |
151 @{ if (restricted_register == REG_RBP) | 147 @{ if (restricted_register == REG_RBP) |
148 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */ | |
152 instruction_info_collected |= RESTRICTED_REGISTER_USED; | 149 instruction_info_collected |= RESTRICTED_REGISTER_USED; |
153 else | 150 else |
151 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */ | |
154 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; | 152 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; |
155 restricted_register = NO_REG; | 153 restricted_register = NO_REG; |
156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | 154 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
157 }; | 155 }; |
158 | 156 |
159 # Special %rsp modifications - the ones which don't need a sandboxing. | 157 # Special %rsp modifications - the ones which don't need a sandboxing. |
160 # | 158 # |
161 # Note that there are two different opcodes for "mov": in x86-64 there are two | 159 # Note that there are two different opcodes for "mov": in x86-64 there are two |
162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | 160 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move |
163 # from REG field to RM or in the other direction thus there are two encodings | 161 # from REG field to RM or in the other direction thus there are two encodings |
164 # for the register-to-register move. | 162 # for the register-to-register move. |
165 rsp_modifications = | 163 rsp_modifications = |
166 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp | 164 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
204 # mov %eax,%esp | 202 # mov %eax,%esp |
205 # add %r15,%rsp <- Ok: %rsp is restricted as it should be | 203 # add %r15,%rsp <- Ok: %rsp is restricted as it should be |
206 # | 204 # |
207 # Check this precondition and mark the beginning of the instruction as | 205 # Check this precondition and mark the beginning of the instruction as |
208 # invalid jump for target. | 206 # invalid jump for target. |
209 @{ if (restricted_register == REG_RSP) | 207 @{ if (restricted_register == REG_RSP) |
210 instruction_info_collected |= RESTRICTED_REGISTER_USED; | 208 instruction_info_collected |= RESTRICTED_REGISTER_USED; |
211 else | 209 else |
212 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; | 210 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; |
213 restricted_register = NO_REG; | 211 restricted_register = NO_REG; |
214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | 212 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets); |
215 }; | 213 }; |
216 | 214 |
217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. | 215 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. |
218 # and $~0x1f, %eXX | 216 # and $~0x1f, %eXX |
219 # and RBASE, %rXX | 217 # and RBASE, %rXX |
220 # jmpq *%rXX (or: callq *%rXX) | 218 # jmpq *%rXX (or: callq *%rXX) |
221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not | 219 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not |
222 # just as part of the naclcall/nacljmp, but also as a standolene instruction). | 220 # just as part of the naclcall/nacljmp, but also as a standalene instruction). |
halyavin
2013/03/20 09:07:22
standalone
khim
2013/03/21 14:38:17
Done.
| |
223 # | 221 # |
224 # This means that when naclcall_or_nacljmp ragel machine will be combined with | 222 # This means that when naclcall_or_nacljmp ragel machine will be combined with |
225 # "normal_instruction*" regular action process_1_operand_zero_extends will be | 223 # "normal_instruction*" regular action process_1_operand_zero_extends will be |
226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 | 224 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 |
227 # instruction. This action will check if %rbp/%rsp is legally modified thus | 225 # instruction. This action will check if %rbp/%rsp is legally modified thus |
228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. | 226 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. |
229 # | 227 # |
230 # There are number of variants present which differ by the REX prefix usage: | 228 # There are number of variants present which differ by the REX prefix usage: |
231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" | 229 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" |
232 # or "callq" is the same register and it's much simpler to do if one single | 230 # or "callq" is the same register and it's much simpler to do if one single |
233 # action handles only fixed number of bytes. | 231 # action handles only fixed number of bytes. |
234 # | 232 # |
235 # Additional complication arises because x86-64 contains two different "add" | 233 # Additional complication arises because x86-64 contains two different "add" |
236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction | 234 # instruction: with "0x01" and "0x03" opcode. They differ in the direction |
237 # used: both can encode "add %src_register, %dst_register", but the first one | 235 # used: both can encode "add %src_register, %dst_register", but the first one |
238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M | 236 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M |
239 # byte for the dst while last one uses field RM of the ModR/M byte for the src | 237 # byte for the dst while last one uses field RM of the ModR/M byte for the src |
240 # and field REG of the ModR/M byte for dst. Both should be allowed. | 238 # and field REG of the ModR/M byte for dst. Both should be allowed. |
241 # | 239 # |
242 # See AMD/Intel manual for clarification "add" instruction encoding. | 240 # See AMD/Intel manual for clarification about “add” instruction encoding. |
halyavin
2013/03/20 09:07:22
why are you adding unicode quotes?
khim
2013/03/21 14:38:17
Fixed.
| |
243 # | 241 # |
244 # REGISTER USAGE ABBREVIATIONS: | 242 # REGISTER USAGE ABBREVIATIONS: |
245 # E86: legacy ia32 registers (all eight: %eax to %edi) | 243 # E86: legacy ia32 registers (all eight: %eax to %edi) |
246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) | 244 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) |
247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) | 245 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) |
248 # R64: new amd64 registers (only seven: %r8 to %r14) | 246 # R64: new amd64 registers (only seven: %r8 to %r14) |
249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) | 247 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) |
250 naclcall_or_nacljmp = | 248 naclcall_or_nacljmp = |
251 # This block encodes call and jump "superinstruction" of the following form: | 249 # This block encodes call and jump "superinstruction" of the following form: |
252 # 0: 83 e_ e0 and $~0x1f,E86 | 250 # 0: 83 e_ e0 and $~0x1f,E86 |
253 # 3: 4_ 01 f_ add RBASE,R86 | 251 # 3: 4_ 01 f_ add RBASE,R86 |
254 # 6: ff e_ jmpq *R86 | 252 # 6: ff e_ jmpq *R86 |
255 #### INSTRUCTION ONE (three bytes) | 253 #### INSTRUCTION ONE (three bytes) |
256 # and $~0x1f, E86 | 254 # and $~0x1f, E86 |
257 (0x83 b_11_100_xxx 0xe0 | 255 (0x83 b_11_100_xxx 0xe0 |
258 #### INSTRUCTION TWO (three bytes) | 256 #### INSTRUCTION TWO (three bytes) |
259 # add RBASE, R86 (0x01 opcode) | 257 # add RBASE, R86 (0x01 opcode) |
260 b_0100_11x0 0x01 b_11_111_xxx | 258 b_0100_11x0 0x01 b_11_111_xxx |
261 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | 259 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) |
262 # callq R86 | 260 # callq R86 |
263 ((REX_WRX? 0xff b_11_010_xxx) | | 261 ((REX_WRX? 0xff b_11_010_xxx) | |
264 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | 262 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) |
265 # jmpq R86 | 263 # jmpq R86 |
266 (REX_WRX? 0xff b_11_100_xxx))) | 264 (REX_WRX? 0xff b_11_100_xxx))) |
267 @{ | 265 @{ |
halyavin
2013/03/20 09:07:22
where instruction_begin points to in this action?
khim
2013/03/21 14:38:17
It points to the start of the last instruction, ob
| |
268 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, | 266 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, |
269 &instruction_begin, current_position, | 267 &instruction_begin, |
270 data, valid_targets); | 268 current_position, |
269 codeblock, | |
270 valid_targets); | |
271 } | | 271 } | |
272 | 272 |
273 # This block encodes call and jump "superinstruction" of the following form: | 273 # This block encodes call and jump "superinstruction" of the following form: |
274 # 0: 83 e_ e0 and $~0x1f,E86 | 274 # 0: 83 e_ e0 and $~0x1f,E86 |
275 # 3: 4_ 03 _f add RBASE,R86 | 275 # 3: 4_ 03 _f add RBASE,R86 |
276 # 6: ff e_ jmpq *R86 | 276 # 6: ff e_ jmpq *R86 |
277 #### INSTRUCTION ONE (three bytes) | 277 #### INSTRUCTION ONE (three bytes) |
278 # and $~0x1f, E86 | 278 # and $~0x1f, E86 |
279 (0x83 b_11_100_xxx 0xe0 | 279 (0x83 b_11_100_xxx 0xe0 |
280 #### INSTRUCTION TWO (three bytes) | 280 #### INSTRUCTION TWO (three bytes) |
281 # add RBASE, R86 (0x03 opcode) | 281 # add RBASE, R86 (0x03 opcode) |
282 b_0100_10x1 0x03 b_11_xxx_111 | 282 b_0100_10x1 0x03 b_11_xxx_111 |
283 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | 283 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) |
284 # callq R86 | 284 # callq R86 |
285 ((REX_WRX? 0xff b_11_010_xxx) | | 285 ((REX_WRX? 0xff b_11_010_xxx) | |
286 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | 286 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) |
287 # jmpq R86 | 287 # jmpq R86 |
288 (REX_WRX? 0xff b_11_100_xxx))) | 288 (REX_WRX? 0xff b_11_100_xxx))) |
289 @{ | 289 @{ |
290 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, | 290 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, |
291 &instruction_begin, current_position, | 291 &instruction_begin, |
292 data, valid_targets); | 292 current_position, |
293 codeblock, | |
294 valid_targets); | |
293 } | | 295 } | |
294 | 296 |
295 # This block encodes call and jump "superinstruction" of the following form: | 297 # This block encodes call and jump "superinstruction" of the following form: |
296 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | 298 # 0: 4_ 83 e_ e0 and $~0x1f,E86 |
297 # 4: 4_ 01 f_ add RBASE,R86 | 299 # 4: 4_ 01 f_ add RBASE,R86 |
298 # 7: ff e_ jmpq *R86 | 300 # 7: ff e_ jmpq *R86 |
299 #### INSTRUCTION ONE (four bytes) | 301 #### INSTRUCTION ONE (four bytes) |
300 # and $~0x1f, E86 | 302 # and $~0x1f, E86 |
301 ((REX_RX 0x83 b_11_100_xxx 0xe0 | 303 ((REX_RX 0x83 b_11_100_xxx 0xe0 |
302 #### INSTRUCTION TWO (three bytes) | 304 #### INSTRUCTION TWO (three bytes) |
(...skipping 17 matching lines...) Expand all Loading... | |
320 # add RBASE, R64 (0x01 opcode) | 322 # add RBASE, R64 (0x01 opcode) |
321 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) | 323 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) |
322 #### INSTRUCTION THREE: call (three bytes) | 324 #### INSTRUCTION THREE: call (three bytes) |
323 # callq R64 | 325 # callq R64 |
324 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | 326 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | |
325 #### INSTRUCTION THREE: jmp (three bytes) | 327 #### INSTRUCTION THREE: jmp (three bytes) |
326 # jmpq R64 | 328 # jmpq R64 |
327 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | 329 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
328 @{ | 330 @{ |
329 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, | 331 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, |
330 &instruction_begin, current_position, | 332 &instruction_begin, |
331 data, valid_targets); | 333 current_position, |
334 codeblock, | |
335 valid_targets); | |
332 } | | 336 } | |
333 | 337 |
334 # This block encodes call and jump "superinstruction" of the following form: | 338 # This block encodes call and jump "superinstruction" of the following form: |
335 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | 339 # 0: 4_ 83 e_ e0 and $~0x1f,E86 |
336 # 4: 4_ 03 _f add RBASE,R86 | 340 # 4: 4_ 03 _f add RBASE,R86 |
337 # 7: ff e_ jmpq *R86 | 341 # 7: ff e_ jmpq *R86 |
338 #### INSTRUCTION ONE (four bytes) | 342 #### INSTRUCTION ONE (four bytes) |
339 # and $~0x1f, E86 | 343 # and $~0x1f, E86 |
340 ((REX_RX 0x83 b_11_100_xxx 0xe0 | 344 ((REX_RX 0x83 b_11_100_xxx 0xe0 |
341 #### INSTRUCTION TWO (three bytes) | 345 #### INSTRUCTION TWO (three bytes) |
(...skipping 17 matching lines...) Expand all Loading... | |
359 # add RBASE, R64 (0x03 opcode) | 363 # add RBASE, R64 (0x03 opcode) |
360 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) | 364 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) |
361 #### INSTRUCTION THREE: call (three bytes) | 365 #### INSTRUCTION THREE: call (three bytes) |
362 # callq R64 | 366 # callq R64 |
363 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | 367 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | |
364 #### INSTRUCTION THREE: jmp (three bytes) | 368 #### INSTRUCTION THREE: jmp (three bytes) |
365 # jmpq R64 | 369 # jmpq R64 |
366 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | 370 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) |
367 @{ | 371 @{ |
368 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, | 372 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, |
369 &instruction_begin, current_position, | 373 &instruction_begin, |
370 data, valid_targets); | 374 current_position, |
375 codeblock, | |
376 valid_targets); | |
371 }; | 377 }; |
372 | 378 |
373 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand | 379 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand |
374 | 380 |
375 # maskmovq %mmX,%mmY (EMMX or SSE) | 381 # maskmovq %mmX,%mmY (EMMX or SSE) |
376 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; | 382 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; |
377 | 383 |
378 # maskmovdqu %xmmX, %xmmY (SSE2) | 384 # maskmovdqu %xmmX, %xmmY (SSE2) |
379 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; | 385 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; |
380 | 386 |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
427 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | 433 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may |
428 # be used to move from REG field to RM or in the other direction thus there | 434 # be used to move from REG field to RM or in the other direction thus there |
429 # are two encodings for the register-to-register move (and since REG and RM | 435 # are two encodings for the register-to-register move (and since REG and RM |
430 # are identical here only opcode differs). | 436 # are identical here only opcode differs). |
431 sandbox_instruction_rsi_no_rdi = | 437 sandbox_instruction_rsi_no_rdi = |
432 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | 438 (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
433 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | 439 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
434 string_instruction_rsi_no_rdi | 440 string_instruction_rsi_no_rdi |
435 @{ | 441 @{ |
436 ExpandSuperinstructionBySandboxingBytes( | 442 ExpandSuperinstructionBySandboxingBytes( |
437 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | 443 2 /* mov */ + 4 /* lea */, |
444 &instruction_begin, | |
445 codeblock, | |
446 valid_targets); | |
438 } | | 447 } | |
439 | 448 |
440 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | 449 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
441 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | 450 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
442 string_instruction_rsi_no_rdi | 451 string_instruction_rsi_no_rdi |
443 @{ | 452 @{ |
444 ExpandSuperinstructionBySandboxingBytes( | 453 ExpandSuperinstructionBySandboxingBytes( |
445 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | 454 3 /* mov */ + 4 /* lea */, |
455 &instruction_begin, | |
456 codeblock, | |
457 valid_targets); | |
446 }; | 458 }; |
447 | 459 |
448 # "Superinstruction" which includes %rdi sandboxing. | 460 # "Superinstruction" which includes %rdi sandboxing. |
449 # | 461 # |
450 # There are two variants which handle spurious REX prefixes. | 462 # There are two variants which handle spurious REX prefixes. |
451 # | 463 # |
452 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 | 464 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 |
453 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | 465 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may |
454 # be used to move from REG field to RM or in the other direction thus there | 466 # be used to move from REG field to RM or in the other direction thus there |
455 # are two encodings for the register-to-register move (and since REG and RM | 467 # are two encodings for the register-to-register move (and since REG and RM |
456 # are identical here only opcode differs). | 468 # are identical here only opcode differs). |
457 sandbox_instruction_rdi_no_rsi = | 469 sandbox_instruction_rdi_no_rsi = |
458 (0x89 | 0x8b) 0xff # mov %edi,%edi | 470 (0x89 | 0x8b) 0xff # mov %edi,%edi |
459 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | 471 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
460 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | 472 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
461 @{ | 473 @{ |
462 ExpandSuperinstructionBySandboxingBytes( | 474 ExpandSuperinstructionBySandboxingBytes( |
463 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | 475 2 /* mov */ + 4 /* lea */, |
476 &instruction_begin, | |
477 codeblock, | |
478 valid_targets); | |
464 } | | 479 } | |
465 | 480 |
466 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | 481 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
halyavin
2013/03/20 09:07:22
what this dot means?
khim
2013/03/21 14:38:17
Concatenation. Removed since apparently it makes c
| |
467 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | 482 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
468 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | 483 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) |
469 @{ | 484 @{ |
470 ExpandSuperinstructionBySandboxingBytes( | 485 ExpandSuperinstructionBySandboxingBytes( |
471 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | 486 3 /* mov */ + 4 /* lea */, |
487 &instruction_begin, | |
488 codeblock, | |
489 valid_targets); | |
472 }; | 490 }; |
473 | 491 |
474 | 492 |
475 # "Superinstruction" which includes both %rsi and %rdi sandboxing. | 493 # "Superinstruction" which includes both %rsi and %rdi sandboxing. |
476 # | 494 # |
477 # There are four variants which handle spurious REX prefixes. | 495 # There are four variants which handle spurious REX prefixes. |
478 # | 496 # |
479 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both | 497 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both |
480 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two | 498 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two |
481 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | 499 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move |
482 # from REG field to RM or in the other direction thus there are two encodings | 500 # from REG field to RM or in the other direction thus there are two encodings |
483 # for the register-to-register move (and since REG and RM are identical here | 501 # for the register-to-register move (and since REG and RM are identical here |
484 # only opcode differs). | 502 # only opcode differs). |
485 sandbox_instruction_rsi_rdi = | 503 sandbox_instruction_rsi_rdi = |
486 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | 504 (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
487 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | 505 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
488 (0x89 | 0x8b) 0xff # mov %edi,%edi | 506 (0x89 | 0x8b) 0xff # mov %edi,%edi |
489 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | 507 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi |
490 string_instruction_rsi_rdi | 508 string_instruction_rsi_rdi |
491 @{ | 509 @{ |
492 ExpandSuperinstructionBySandboxingBytes( | 510 ExpandSuperinstructionBySandboxingBytes( |
493 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, | 511 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, |
494 &instruction_begin, data, valid_targets); | 512 &instruction_begin, |
513 codeblock, | |
514 valid_targets); | |
495 } | | 515 } | |
496 | 516 |
497 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi | 517 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi |
498 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | 518 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
499 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi | 519 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi |
500 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi | 520 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi |
501 | 521 |
502 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | 522 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi |
503 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | 523 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi |
504 (0x89 | 0x8b) 0xff # mov %edi,%edi | 524 (0x89 | 0x8b) 0xff # mov %edi,%edi |
505 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi | 525 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi |
506 string_instruction_rsi_rdi | 526 string_instruction_rsi_rdi |
507 @{ | 527 @{ |
508 ExpandSuperinstructionBySandboxingBytes( | 528 ExpandSuperinstructionBySandboxingBytes( |
509 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ | 529 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ |
510 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, | 530 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, |
511 &instruction_begin, data, valid_targets); | 531 &instruction_begin, |
532 codeblock, | |
533 valid_targets); | |
512 } | | 534 } | |
513 | 535 |
514 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi | 536 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi |
515 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi | 537 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi |
516 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | 538 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi |
517 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | 539 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi |
518 string_instruction_rsi_rdi | 540 string_instruction_rsi_rdi |
519 @{ | 541 @{ |
520 ExpandSuperinstructionBySandboxingBytes( | 542 ExpandSuperinstructionBySandboxingBytes( |
521 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, | 543 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, |
522 &instruction_begin, data, valid_targets); | 544 &instruction_begin, |
545 codeblock, | |
546 valid_targets); | |
523 }; | 547 }; |
524 | 548 |
525 # All the "special" instructions (== instructions which obey non-standard | 549 # All the "special" instructions (== instructions which obey non-standard |
526 # rules). Three groups: | 550 # rules). Three groups: |
527 # * %rsp/%rsp related instructions (these instructions are special because | 551 # * %rsp/%rsp related instructions (these instructions are special because |
528 # they must be in the range %r15...%r15+4294967295 except momentarily they | 552 # they must be in the range %r15...%r15+4294967295 except momentarily they |
halyavin
2013/03/20 09:07:22
they=instructions or registers?
khim
2013/03/21 14:38:17
Reformulated more clearly.
| |
529 # can be in the range 0...4294967295) | 553 # can be in the range 0...4294967295) |
530 # * string instructions (which can not use %r15 as base and thus need special | 554 # * string instructions (which can not use %r15 as base and thus need special |
531 # handling both in compiler and validator) | 555 # handling both in compiler and validator) |
532 # * naclcall/nacljmp (indirect jumps need special care) | 556 # * naclcall/nacljmp (indirect jumps need special care) |
533 special_instruction = | 557 special_instruction = |
534 (rbp_modifications | | 558 (rbp_modifications | |
535 rsp_modifications | | 559 rsp_modifications | |
536 rbp_sandboxing | | 560 rbp_sandboxing | |
537 rsp_sandboxing | | 561 rsp_sandboxing | |
538 sandbox_instruction_rsi_no_rdi | | 562 sandbox_instruction_rsi_no_rdi | |
539 sandbox_instruction_rdi_no_rsi | | 563 sandbox_instruction_rdi_no_rsi | |
540 sandbox_instruction_rsi_rdi | | 564 sandbox_instruction_rsi_rdi | |
541 naclcall_or_nacljmp) | 565 naclcall_or_nacljmp) |
542 # Mark the instruction as special - currently this information is used only | 566 # Mark the instruction as special - currently this information is used only |
543 # in tests, but in the future we may use it for dynamic code modification | 567 # in tests, but in the future we may use it for dynamic code modification |
544 # support. | 568 # support. |
halyavin
2013/03/20 09:07:22
This comment is incorrect. We do use it for dynami
khim
2013/03/21 14:38:17
This comment is correct. We don't use this flag in
| |
545 @{ | 569 @{ |
546 instruction_info_collected |= SPECIAL_INSTRUCTION; | 570 instruction_info_collected |= SPECIAL_INSTRUCTION; |
547 }; | 571 }; |
548 | 572 |
549 # Remove special instructions which are only allowed in special cases. | 573 # Remove special instructions which are only allowed in special cases. |
550 normal_instruction = one_instruction - special_instruction; | 574 normal_instruction = one_instruction - special_instruction; |
551 | 575 |
552 # Check if call is properly aligned. | 576 # For direct call we explicitly encode all variations. |
553 # | 577 direct_call = (data16 REX_RXB? 0xe8 rel16) | |
554 # For direct call we explicitly encode all variations. For indirect call | 578 (REX_WRXB? 0xe8 rel32) | |
555 # we accept all the special instructions which ends with register-addressed | 579 (data16 REXW_RXB 0xe8 rel32); |
556 # indirect call. | 580 |
581 # For indirect call we accept only near register-addressed indirect call. | |
582 indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers); | |
583 | |
584 # Ragel machine that accepts one call instruction or call superinstruction and | |
585 # checks if call is properly aligned. | |
557 call_alignment = | 586 call_alignment = |
558 ((normal_instruction & | 587 ((normal_instruction & direct_call) |
559 # Direct call | 588 # For indirect calls we accept all the special instructions which ends with |
560 ((data16 REX_RXB? 0xe8 rel16) | | 589 # register-addressed indirect call. |
561 (REX_WRXB? 0xe8 rel32) | | 590 (special_instruction & (any* indirect_call_register))) |
562 (data16 REXW_RXB 0xe8 rel32))) | | |
563 (special_instruction & | |
564 # Indirect call | |
565 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & | |
566 modrm_registers)))) | |
567 # Call instruction must aligned to the end of bundle. Previously this was | 591 # Call instruction must aligned to the end of bundle. Previously this was |
568 # strict requirement, today it's just warning to aid with debugging. | 592 # strict requirement, today it's just warning to aid with debugging. |
569 @{ | 593 @{ |
570 if (((current_position - data) & kBundleMask) != kBundleMask) | 594 if (((current_position - data) & kBundleMask) != kBundleMask) |
571 instruction_info_collected |= BAD_CALL_ALIGNMENT; | 595 instruction_info_collected |= BAD_CALL_ALIGNMENT; |
572 }; | 596 }; |
573 | 597 |
574 # This action calls user's callback (if needed) and cleans up validator's | 598 # This action calls user's callback (if needed) and cleans up validator's |
575 # internal state. | 599 # internal state. |
576 # | 600 # |
577 # We call the user callback if there are validation errors or if the | 601 # We call the user callback if there are validation errors or if the |
578 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. | 602 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. |
579 # | 603 # |
580 # After that we move instruction_begin and clean all the variables which | 604 # After that we move instruction_begin and clean all the variables which |
581 # only used in the processing of a single instruction (prefixes, operand | 605 # only used in the processing of a single instruction (prefixes, operand |
582 # states and instruction_info_collected). | 606 # states and instruction_info_collected). |
halyavin
2013/03/20 09:07:22
Comment duplication.
khim
2013/03/21 14:38:17
Oops. Fixed.
| |
607 # This action calls users callback (if needed) and cleans up validators | |
608 # internal state. | |
609 # | |
610 # We call the user callback either on validation errors or on every | |
611 # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option. | |
612 # | |
613 # After that we move instruction_begin and clean all the variables which | |
614 # are only used in the processing of a single instruction (prefixes, operand | |
615 # states and instruction_info_collected). | |
583 action end_of_instruction_cleanup { | 616 action end_of_instruction_cleanup { |
584 /* Call user-supplied callback. */ | 617 /* Call user-supplied callback. */ |
585 instruction_end = current_position + 1; | 618 instruction_end = current_position + 1; |
586 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || | 619 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || |
587 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { | 620 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { |
588 result &= user_callback( | 621 result &= user_callback( |
589 instruction_begin, instruction_end, | 622 instruction_begin, instruction_end, |
590 instruction_info_collected | | 623 instruction_info_collected | |
591 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & | 624 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & |
592 RESTRICTED_REGISTER_MASK), callback_data); | 625 RESTRICTED_REGISTER_MASK), callback_data); |
593 } | 626 } |
594 | 627 |
595 /* On successful match the instruction_begin must point to the next byte | 628 /* On successful match the instruction_begin must point to the next byte |
596 * to be able to report the new offset as the start of instruction | 629 * to be able to report the new offset as the start of instruction |
halyavin
2013/03/20 09:07:22
Change as in 32-bit validator.
khim
2013/03/21 14:38:17
Done.
| |
597 * causing error. */ | 630 * causing error. */ |
598 instruction_begin = instruction_end; | 631 instruction_begin = instruction_end; |
599 | 632 |
600 /* Mark start of the next instruction as a valid target for jump. | 633 /* Mark start of the next instruction as a valid target for jump. |
601 * Note: we mark start of the next instruction here, not start of the | 634 * Note: we mark start of the next instruction here, not start of the |
602 * current one because memory access check should be able to clear this | 635 * current one because memory access check should be able to clear this |
603 * bit when restricted register is used. */ | 636 * bit when restricted register is used. */ |
604 MarkValidJumpTarget(instruction_begin - data, valid_targets); | 637 MarkValidJumpTarget(instruction_begin - codeblock, valid_targets); |
605 | 638 |
606 /* Clear variables. */ | 639 /* Clear variables. */ |
607 instruction_info_collected = 0; | 640 instruction_info_collected = 0; |
608 SET_REX_PREFIX(FALSE); | 641 SET_REX_PREFIX(FALSE); |
609 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | 642 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ |
610 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); | 643 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); |
611 SET_VEX_PREFIX3(0x00); | 644 SET_VEX_PREFIX3(0x00); |
612 operand_states = 0; | 645 operand_states = 0; |
613 base = 0; | 646 base = 0; |
614 index = 0; | 647 index = 0; |
615 } | 648 } |
616 | 649 |
617 # This action reports fatal error detected by DFA. | 650 # This action reports fatal error detected by DFA. |
618 action report_fatal_error { | 651 action report_fatal_error { |
619 result &= user_callback(instruction_begin, current_position, | 652 result &= user_callback(instruction_begin, current_position, |
620 UNRECOGNIZED_INSTRUCTION, callback_data); | 653 UNRECOGNIZED_INSTRUCTION, callback_data); |
621 /* | 654 /* |
622 * Process the next bundle: "continue" here is for the "for" cycle in | 655 * Process the next bundle: "continue" here is for the "for" cycle in |
623 * the ValidateChunkAMD64 function. | 656 * the ValidateChunkAMD64 function. |
624 * | 657 * |
625 * It does not affect the case which we really care about (when code | 658 * It does not affect the case which we really care about (when code |
626 * is validatable), but makes it possible to detect more errors in one | 659 * is validatable), but makes it possible to detect more errors in one |
627 * run in tools like ncval. | 660 * run in tools like ncval. |
628 */ | 661 */ |
629 continue; | 662 continue; |
630 } | 663 } |
631 | 664 |
632 # This is main ragel machine: it does 99% of validation work. There are only | 665 # This is main ragel machine: it does 99% of validation work. There are only |
633 # one thing to do with bundle if this machine accepts the bundle: | 666 # one thing to do with bundle if this ragel machine accepts the bundle: |
634 # * check for the state of the restricted_register at the end of the bundle. | 667 # * check for the state of the restricted_register at the end of the bundle. |
635 # It's an error is %rbp or %rsp is restricted at the end of the bundle. | 668 # It's an error is %rbp or %rsp is restricted at the end of the bundle. |
636 # Additionally if all the bundles are fine you need to check that direct jumps | 669 # Additionally if all the bundles are fine you need to check that direct jumps |
637 # are corect. Thiis is done in the following way: | 670 # are corect. Thiis is done in the following way: |
638 # * DFA fills two arrays: valid_targets and jump_dests. | 671 # * DFA fills two arrays: valid_targets and jump_dests. |
639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". | 672 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". |
640 # All other checks are done here. | 673 # All other checks are done here. |
641 | 674 |
642 main := ((call_alignment | normal_instruction | special_instruction) | 675 main := ((call_alignment | normal_instruction | special_instruction) |
643 @end_of_instruction_cleanup)* | 676 @end_of_instruction_cleanup)* |
644 $!report_fatal_error; | 677 $!report_fatal_error; |
645 | 678 |
646 }%% | 679 }%% |
647 | 680 |
681 /* | |
682 * The "write data" statement causes Ragel to emit the constant static data | |
683 * needed by the ragel machine. | |
684 */ | |
648 %% write data; | 685 %% write data; |
649 | 686 |
650 enum OperandKind { | 687 enum OperandKind { |
651 OPERAND_SANDBOX_IRRELEVANT = 0, | 688 OPERAND_SANDBOX_IRRELEVANT = 0, |
652 /* | 689 /* |
653 * Currently we do not distinguish 8bit and 16bit modifications from | 690 * Currently we do not distinguish 8bit and 16bit modifications from |
halyavin
2013/03/20 09:07:22
modifications of what?
halyavin
2013/03/20 15:19:19
8bit->64bit
| |
654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. | 691 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. |
655 * | 692 * |
656 * 8bit operands must be distinguished from other types because the REX prefix | 693 * 8bit operands must be distinguished from other types because the REX prefix |
657 * regulates the choice between %ah and %spl, as well as %ch and %bpl. | 694 * regulates the choice between %ah and %spl, as well as %ch and %bpl. |
658 */ | 695 */ |
659 OPERAND_SANDBOX_8BIT, | 696 OPERAND_SANDBOX_8BIT, |
halyavin
2013/03/20 15:19:19
8bit register that is modified by instruction.
khim
2013/03/21 14:38:17
Done.
| |
660 OPERAND_SANDBOX_RESTRICTED, | 697 OPERAND_SANDBOX_RESTRICTED, |
halyavin
2013/03/20 15:19:19
32-bit register that is modified by instruction. T
khim
2013/03/21 14:38:17
Done.
| |
661 OPERAND_SANDBOX_UNRESTRICTED | 698 OPERAND_SANDBOX_UNRESTRICTED |
halyavin
2013/03/20 15:19:19
64-bit or 16-bit register that is modified by inst
khim
2013/03/21 14:38:17
Done.
| |
662 }; | 699 }; |
663 | 700 |
664 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) | 701 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) |
halyavin
2013/03/20 09:07:22
Replace N, S and T with more descriptive names or
khim
2013/03/21 14:38:17
Done.
| |
665 #define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N) | 702 #define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N) |
666 #define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \ | 703 #define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \ |
667 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) | 704 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) |
668 #define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \ | 705 #define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \ |
669 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | 706 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
670 #define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \ | 707 #define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \ |
671 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) | 708 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) |
672 #define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \ | 709 #define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \ |
673 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | 710 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) |
674 #define CHECK_OPERAND(N, S, T) \ | 711 #define CHECK_OPERAND(N, S, T) \ |
(...skipping 15 matching lines...) Expand all Loading... | |
690 else | 727 else |
691 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; | 728 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; |
692 } else { | 729 } else { |
693 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; | 730 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; |
694 } | 731 } |
695 } | 732 } |
696 | 733 |
697 | 734 |
698 static INLINE void Process0Operands(enum OperandName *restricted_register, | 735 static INLINE void Process0Operands(enum OperandName *restricted_register, |
699 uint32_t *instruction_info_collected) { | 736 uint32_t *instruction_info_collected) { |
700 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | 737 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
halyavin
2013/03/20 09:07:22
Maybe extract to separate function.
khim
2013/03/21 14:38:17
Done.
| |
701 * instruction, not with regular instruction. */ | 738 * instruction, not with regular instruction. */ |
702 if (*restricted_register == REG_RSP) { | 739 if (*restricted_register == REG_RSP) { |
703 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | 740 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
704 } else if (*restricted_register == REG_RBP) { | 741 } else if (*restricted_register == REG_RBP) { |
705 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | 742 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
706 } | 743 } |
707 *restricted_register = NO_REG; | 744 *restricted_register = NO_REG; |
halyavin
2013/03/20 09:07:22
/* Every instruction clears restricted register ev
khim
2013/03/21 14:38:17
Done.
| |
708 } | 745 } |
709 | 746 |
710 static INLINE void Process1Operand(enum OperandName *restricted_register, | 747 static INLINE void Process1Operand(enum OperandName *restricted_register, |
711 uint32_t *instruction_info_collected, | 748 uint32_t *instruction_info_collected, |
712 uint8_t rex_prefix, | 749 uint8_t rex_prefix, |
713 uint32_t operand_states) { | 750 uint32_t operand_states) { |
714 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | 751 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special |
715 * instruction, not with regular instruction. */ | 752 * instruction, not with regular instruction. */ |
716 if (*restricted_register == REG_RSP) { | 753 if (*restricted_register == REG_RSP) { |
717 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | 754 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; |
718 } else if (*restricted_register == REG_RBP) { | 755 } else if (*restricted_register == REG_RBP) { |
719 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | 756 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; |
720 } | 757 } |
721 *restricted_register = NO_REG; | 758 *restricted_register = NO_REG; |
722 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | 759 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || |
723 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | 760 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || |
724 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | 761 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { |
725 *instruction_info_collected |= R15_MODIFIED; | 762 *instruction_info_collected |= R15_MODIFIED; |
726 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | 763 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
727 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | 764 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || |
728 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | 765 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { |
729 *instruction_info_collected |= BPL_MODIFIED; | 766 *instruction_info_collected |= BPL_MODIFIED; |
halyavin
2013/03/20 15:19:19
BPL_MODIFIED->BP_MODIFIED
khim
2013/03/21 14:38:17
Done.
| |
730 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | 767 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || |
731 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | 768 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || |
732 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | 769 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { |
733 *instruction_info_collected |= SPL_MODIFIED; | 770 *instruction_info_collected |= SPL_MODIFIED; |
734 } | 771 } |
735 } | 772 } |
736 | 773 |
737 static INLINE void Process1OperandZeroExtends( | 774 static INLINE void Process1OperandZeroExtends( |
738 enum OperandName *restricted_register, | 775 enum OperandName *restricted_register, |
739 uint32_t *instruction_info_collected, | 776 uint32_t *instruction_info_collected, |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
846 } | 883 } |
847 } | 884 } |
848 | 885 |
849 /* | 886 /* |
850 * This function merges "dangerous" instruction with sandboxing instructions to | 887 * This function merges "dangerous" instruction with sandboxing instructions to |
851 * get a "superinstruction" and unmarks in-between jump targets. | 888 * get a "superinstruction" and unmarks in-between jump targets. |
852 */ | 889 */ |
853 static INLINE void ExpandSuperinstructionBySandboxingBytes( | 890 static INLINE void ExpandSuperinstructionBySandboxingBytes( |
854 size_t sandbox_instructions_size, | 891 size_t sandbox_instructions_size, |
855 const uint8_t **instruction_begin, | 892 const uint8_t **instruction_begin, |
856 const uint8_t *data, | 893 const uint8_t codeblock[], |
857 bitmap_word *valid_targets) { | 894 bitmap_word *valid_targets) { |
858 *instruction_begin -= sandbox_instructions_size; | 895 *instruction_begin -= sandbox_instructions_size; |
859 /* | 896 /* |
860 * We need to unmark start of the "dangerous" instruction itself, too, but we | 897 * We need to unmark start of the "dangerous" instruction itself, too, but we |
861 * don't need to mark the beginning of the whole "superinstruction" - that's | 898 * don't need to mark the beginning of the whole "superinstruction" - that's |
862 * why we move start by one byte and don't change the length. | 899 * why we move start by one byte and don't change the length. |
863 */ | 900 */ |
864 UnmarkValidJumpTargets((*instruction_begin + 1 - data), | 901 UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock), |
865 sandbox_instructions_size, | 902 sandbox_instructions_size, |
866 valid_targets); | 903 valid_targets); |
867 } | 904 } |
868 | 905 |
869 /* | 906 /* |
870 * Return TRUE if naclcall or nacljmp uses the same register in all three | 907 * Return TRUE if naclcall or nacljmp uses the same register in all three |
871 * instructions. | 908 * instructions. |
872 * | 909 * |
873 * This version is for the case where "add %src_register, %dst_register" with | 910 * This version is for the case where "add %src_register, %dst_register" with |
874 * dst in RM field and src in REG field of ModR/M byte is used. | 911 * dst in RM field and src in REG field of ModR/M byte is used. |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
984 * 0: 83 eX e0 and $~0x1f,E86 | 1021 * 0: 83 eX e0 and $~0x1f,E86 |
985 * 3: 4? 01 fX add RBASE,R86 | 1022 * 3: 4? 01 fX add RBASE,R86 |
986 * 6: 4? ff eX jmpq *R86 | 1023 * 6: 4? ff eX jmpq *R86 |
987 * ^ ^ | 1024 * ^ ^ |
988 * instruction_begin current_position | 1025 * instruction_begin current_position |
989 */ | 1026 */ |
990 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( | 1027 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( |
991 uint32_t *instruction_info_collected, | 1028 uint32_t *instruction_info_collected, |
992 const uint8_t **instruction_begin, | 1029 const uint8_t **instruction_begin, |
993 const uint8_t *current_position, | 1030 const uint8_t *current_position, |
994 const uint8_t *data, | 1031 const uint8_t codeblock[], |
995 bitmap_word *valid_targets) { | 1032 bitmap_word *valid_targets) { |
996 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | 1033 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
997 ExpandSuperinstructionBySandboxingBytes( | 1034 ExpandSuperinstructionBySandboxingBytes( |
998 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | 1035 3 /* and */ + 3 /* add */, |
1036 instruction_begin, | |
1037 codeblock, | |
1038 valid_targets); | |
999 else | 1039 else |
1000 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | 1040 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
1001 } | 1041 } |
1002 | 1042 |
1003 /* | 1043 /* |
1004 * This function checks that naclcall or nacljmp are correct (that is: three | 1044 * This function checks that naclcall or nacljmp are correct (that is: three |
1005 * component instructions match) and if that is true then it merges call or jmp | 1045 * component instructions match) and if that is true then it merges call or jmp |
1006 * with a sandboxing to get a "superinstruction" and removes in-between jump | 1046 * with a sandboxing to get a "superinstruction" and removes in-between jump |
1007 * targets. If it's not true then it triggers "unrecognized instruction" error | 1047 * targets. If it's not true then it triggers "unrecognized instruction" error |
1008 * condition. | 1048 * condition. |
(...skipping 12 matching lines...) Expand all Loading... | |
1021 * 0: 83 eX e0 and $~0x1f,E86 | 1061 * 0: 83 eX e0 and $~0x1f,E86 |
1022 * 3: 4? 03 Xf add RBASE,R86 | 1062 * 3: 4? 03 Xf add RBASE,R86 |
1023 * 6: 4? ff eX jmpq *R86 | 1063 * 6: 4? ff eX jmpq *R86 |
1024 * ^ ^ | 1064 * ^ ^ |
1025 * instruction_begin current_position | 1065 * instruction_begin current_position |
1026 */ | 1066 */ |
1027 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( | 1067 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( |
1028 uint32_t *instruction_info_collected, | 1068 uint32_t *instruction_info_collected, |
1029 const uint8_t **instruction_begin, | 1069 const uint8_t **instruction_begin, |
1030 const uint8_t *current_position, | 1070 const uint8_t *current_position, |
1031 const uint8_t *data, | 1071 const uint8_t codeblock[], |
1032 bitmap_word *valid_targets) { | 1072 bitmap_word *valid_targets) { |
1033 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | 1073 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
1034 ExpandSuperinstructionBySandboxingBytes( | 1074 ExpandSuperinstructionBySandboxingBytes( |
1035 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | 1075 3 /* and */ + 3 /* add */, |
1076 instruction_begin, | |
1077 codeblock, | |
1078 valid_targets); | |
1036 else | 1079 else |
1037 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | 1080 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
1038 } | 1081 } |
1039 | 1082 |
1040 /* | 1083 /* |
1041 * This function checks that naclcall or nacljmp are correct (that is: three | 1084 * This function checks that naclcall or nacljmp are correct (that is: three |
1042 * component instructions match) and if that is true then it merges call or jmp | 1085 * component instructions match) and if that is true then it merges call or jmp |
1043 * with a sandboxing to get a "superinstruction" and removes in-between jump | 1086 * with a sandboxing to get a "superinstruction" and removes in-between jump |
1044 * targets. If it's not true then it triggers "unrecognized instruction" error | 1087 * targets. If it's not true then it triggers "unrecognized instruction" error |
1045 * condition. | 1088 * condition. |
(...skipping 18 matching lines...) Expand all Loading... | |
1064 * 0: 4? 83 eX e0 and $~0x1f,E64 | 1107 * 0: 4? 83 eX e0 and $~0x1f,E64 |
1065 * 4: 4? 01 fX add RBASE,R64 | 1108 * 4: 4? 01 fX add RBASE,R64 |
1066 * 7: 4? ff eX jmpq *R64 | 1109 * 7: 4? ff eX jmpq *R64 |
1067 * ^ ^ | 1110 * ^ ^ |
1068 * instruction_begin current_position | 1111 * instruction_begin current_position |
1069 */ | 1112 */ |
1070 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( | 1113 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( |
1071 uint32_t *instruction_info_collected, | 1114 uint32_t *instruction_info_collected, |
1072 const uint8_t **instruction_begin, | 1115 const uint8_t **instruction_begin, |
1073 const uint8_t *current_position, | 1116 const uint8_t *current_position, |
1074 const uint8_t *data, | 1117 const uint8_t codeblock[], |
1075 bitmap_word *valid_targets) { | 1118 bitmap_word *valid_targets) { |
1076 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | 1119 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) |
1077 ExpandSuperinstructionBySandboxingBytes( | 1120 ExpandSuperinstructionBySandboxingBytes( |
1078 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | 1121 4 /* and */ + 3 /* add */, |
1122 instruction_begin, | |
1123 codeblock, | |
1124 valid_targets); | |
1079 else | 1125 else |
1080 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | 1126 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
1081 } | 1127 } |
1082 | 1128 |
1083 /* | 1129 /* |
1084 * This function checks that naclcall or nacljmp are correct (that is: three | 1130 * This function checks that naclcall or nacljmp are correct (that is: three |
1085 * component instructions match) and if that is true then it merges call or jmp | 1131 * component instructions match) and if that is true then it merges call or jmp |
1086 * with a sandboxing to get a "superinstruction" and removes in-between jump | 1132 * with a sandboxing to get a "superinstruction" and removes in-between jump |
1087 * targets. If it's not true then it triggers "unrecognized instruction" error | 1133 * targets. If it's not true then it triggers "unrecognized instruction" error |
1088 * condition. | 1134 * condition. |
(...skipping 18 matching lines...) Expand all Loading... | |
1107 * 0: 4? 83 eX e0 and $~0x1f,E64 | 1153 * 0: 4? 83 eX e0 and $~0x1f,E64 |
1108 * 4: 4? 03 Xf add RBASE,R64 | 1154 * 4: 4? 03 Xf add RBASE,R64 |
1109 * 7: 4? ff eX jmpq *R64 | 1155 * 7: 4? ff eX jmpq *R64 |
1110 * ^ ^ | 1156 * ^ ^ |
1111 * instruction_begin current_position | 1157 * instruction_begin current_position |
1112 */ | 1158 */ |
1113 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( | 1159 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( |
1114 uint32_t *instruction_info_collected, | 1160 uint32_t *instruction_info_collected, |
1115 const uint8_t **instruction_begin, | 1161 const uint8_t **instruction_begin, |
1116 const uint8_t *current_position, | 1162 const uint8_t *current_position, |
1117 const uint8_t *data, | 1163 const uint8_t codeblock[], |
1118 bitmap_word *valid_targets) { | 1164 bitmap_word *valid_targets) { |
1119 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | 1165 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) |
1120 ExpandSuperinstructionBySandboxingBytes( | 1166 ExpandSuperinstructionBySandboxingBytes( |
1121 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | 1167 4 /* and */ + 3 /* add */, |
1168 instruction_begin, | |
1169 codeblock, | |
1170 valid_targets); | |
1122 else | 1171 else |
1123 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | 1172 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; |
1124 } | 1173 } |
1125 | 1174 |
1126 | 1175 |
1127 Bool ValidateChunkAMD64(const uint8_t *data, size_t size, | 1176 Bool ValidateChunkAMD64(const uint8_t codeblock[], |
1177 size_t size, | |
1128 uint32_t options, | 1178 uint32_t options, |
1129 const NaClCPUFeaturesX86 *cpu_features, | 1179 const NaClCPUFeaturesX86 *cpu_features, |
1130 ValidationCallbackFunc user_callback, | 1180 ValidationCallbackFunc user_callback, |
1131 void *callback_data) { | 1181 void *callback_data) { |
1132 bitmap_word valid_targets_small; | 1182 bitmap_word valid_targets_small; |
1133 bitmap_word jump_dests_small; | 1183 bitmap_word jump_dests_small; |
1134 bitmap_word *valid_targets; | 1184 bitmap_word *valid_targets; |
1135 bitmap_word *jump_dests; | 1185 bitmap_word *jump_dests; |
1136 const uint8_t *current_position; | 1186 const uint8_t *current_position; |
1137 const uint8_t *end_of_bundle; | 1187 const uint8_t *end_of_bundle; |
(...skipping 23 matching lines...) Expand all Loading... | |
1161 free(jump_dests); | 1211 free(jump_dests); |
1162 free(valid_targets); | 1212 free(valid_targets); |
1163 errno = ENOMEM; | 1213 errno = ENOMEM; |
1164 return FALSE; | 1214 return FALSE; |
1165 } | 1215 } |
1166 } | 1216 } |
1167 | 1217 |
1168 /* | 1218 /* |
1169 * This option is usually used in tests: we will process the whole chunk | 1219 * This option is usually used in tests: we will process the whole chunk |
1170 * in one pass. Usually each bundle is processed separately which means | 1220 * in one pass. Usually each bundle is processed separately which means |
1171 * instructions (and super-instructions) can not cross borders of the bundle. | 1221 * instructions (and "superinstructions") can not cross borders of the bundle. |
1172 */ | 1222 */ |
1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) | 1223 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) |
1174 end_of_bundle = data + size; | 1224 end_of_bundle = codeblock + size; |
1175 else | 1225 else |
1176 end_of_bundle = data + kBundleSize; | 1226 end_of_bundle = codeblock + kBundleSize; |
1177 | 1227 |
1178 /* | 1228 /* |
1179 * Main loop. Here we process the data array bundle-after-bundle. | 1229 * Main loop. Here we process the codeblock array bundle-after-bundle. |
1180 * Ragel-produced DFA does all the checks with one exception: direct jumps. | 1230 * Ragel-produced DFA does all the checks with one exception: direct jumps. |
1181 * It collects the two arrays: valid_targets and jump_dests which are used | 1231 * It collects the two arrays: valid_targets and jump_dests which are used |
1182 * to test direct jumps later. | 1232 * to test direct jumps later. |
1183 */ | 1233 */ |
1184 for (current_position = data; | 1234 for (current_position = codeblock; |
1185 current_position < data + size; | 1235 current_position < codeblock + size; |
1186 current_position = end_of_bundle, | 1236 current_position = end_of_bundle, |
1187 end_of_bundle = current_position + kBundleSize) { | 1237 end_of_bundle = current_position + kBundleSize) { |
1188 /* Start of the instruction being processed. */ | 1238 /* Start of the instruction being processed. */ |
1189 const uint8_t *instruction_begin = current_position; | 1239 const uint8_t *instruction_begin = current_position; |
1190 /* Only used locally in the end_of_instruction_cleanup action. */ | 1240 /* Only used locally in the end_of_instruction_cleanup action. */ |
1191 const uint8_t *instruction_end; | 1241 const uint8_t *instruction_end; |
1192 int current_state; | 1242 int current_state; |
1193 uint32_t instruction_info_collected = 0; | 1243 uint32_t instruction_info_collected = 0; |
1194 /* Keeps one byte of information per operand in the current instruction: | 1244 /* Keeps one byte of information per operand in the current instruction: |
1195 * 2 bits for register kinds, | 1245 * 2 bits for register kinds, |
1196 * 5 bits for register numbers (16 regs plus RIZ). */ | 1246 * 5 bits for register numbers (16 regs plus RIZ). */ |
1197 uint32_t operand_states = 0; | 1247 uint32_t operand_states = 0; |
1198 enum OperandName base = NO_REG; | 1248 enum OperandName base = NO_REG; |
1199 enum OperandName index = NO_REG; | 1249 enum OperandName index = NO_REG; |
1200 enum OperandName restricted_register = | 1250 enum OperandName restricted_register = |
1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); | 1251 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); |
1202 uint8_t rex_prefix = FALSE; | 1252 uint8_t rex_prefix = FALSE; |
1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | 1253 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ |
1204 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; | 1254 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; |
1205 uint8_t vex_prefix3 = 0x00; | 1255 uint8_t vex_prefix3 = 0x00; |
1206 | 1256 |
1257 /* | |
1258 * The "write init" statement causes Ragel to emit initialization code. | |
1259 * This should be executed once before the ragel machine is started. | |
1260 */ | |
1207 %% write init; | 1261 %% write init; |
1262 /* | |
1263 * The "write exec" statement causes Ragel to emit the ragel machine's | |
1264 * execution code. | |
1265 */ | |
1208 %% write exec; | 1266 %% write exec; |
1209 | 1267 |
1210 /* | 1268 /* |
1211 * Ragel DFA accepted the bundle, but we still need to make sure the last | 1269 * Ragel DFA accepted the bundle, but we still need to make sure the last |
1212 * instruction haven't left %rbp or %rsp in restricted state. | 1270 * instruction haven't left %rbp or %rsp in restricted state. |
1213 */ | 1271 */ |
1214 if (restricted_register == REG_RBP) | 1272 if (restricted_register == REG_RBP) |
1215 result &= user_callback(end_of_bundle, end_of_bundle, | 1273 result &= user_callback(end_of_bundle, end_of_bundle, |
1216 RESTRICTED_RBP_UNPROCESSED | | 1274 RESTRICTED_RBP_UNPROCESSED | |
1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & | 1275 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & |
1218 RESTRICTED_REGISTER_MASK), callback_data); | 1276 RESTRICTED_REGISTER_MASK), callback_data); |
1219 else if (restricted_register == REG_RSP) | 1277 else if (restricted_register == REG_RSP) |
1220 result &= user_callback(end_of_bundle, end_of_bundle, | 1278 result &= user_callback(end_of_bundle, end_of_bundle, |
1221 RESTRICTED_RSP_UNPROCESSED | | 1279 RESTRICTED_RSP_UNPROCESSED | |
1222 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & | 1280 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & |
1223 RESTRICTED_REGISTER_MASK), callback_data); | 1281 RESTRICTED_REGISTER_MASK), callback_data); |
1224 } | 1282 } |
1225 | 1283 |
1226 /* | 1284 /* |
1227 * Check the direct jumps. All the targets from jump_dests must be in | 1285 * Check the direct jumps. All the targets from jump_dests must be in |
1228 * valid_targets. | 1286 * valid_targets. |
1229 */ | 1287 */ |
1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, | 1288 result &= ProcessInvalidJumpTargets(codeblock, |
1231 user_callback, callback_data); | 1289 size, |
1290 valid_targets, | |
1291 jump_dests, | |
1292 user_callback, | |
1293 callback_data); | |
1232 | 1294 |
1233 /* We only use malloc for a large code sequences */ | 1295 /* We only use malloc for a large code sequences */ |
1234 if (jump_dests != &jump_dests_small) free(jump_dests); | 1296 if (jump_dests != &jump_dests_small) free(jump_dests); |
1235 if (valid_targets != &valid_targets_small) free(valid_targets); | 1297 if (valid_targets != &valid_targets_small) free(valid_targets); |
1236 if (!result) errno = EINVAL; | 1298 if (!result) errno = EINVAL; |
1237 return result; | 1299 return result; |
1238 } | 1300 } |
OLD | NEW |