Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(735)

Side by Side Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be 3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file. 4 * found in the LICENSE file.
5 */ 5 */
6 6
7 /* 7 /*
8 * This is the core of amd64-mode validator. Please note that this file 8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read 9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built: 10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like 11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc. 12 * "@{}" or "REX_WRX?" mean, etc.
13 */ 13 */
14 14
15 #include <assert.h> 15 #include <assert.h>
16 #include <errno.h> 16 #include <errno.h>
17 #include <stddef.h> 17 #include <stddef.h>
18 #include <stdio.h> 18 #include <stdio.h>
19 #include <stdlib.h> 19 #include <stdlib.h>
20 #include <string.h> 20 #include <string.h>
21 21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" 22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h" 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h"
24 24
25 %%{ 25 %%{
26 machine x86_64_validator; 26 machine x86_64_validator;
27 alphtype unsigned char; 27 alphtype unsigned char;
28 variable p current_position; 28 variable p current_position;
29 variable pe end_of_bundle; 29 variable pe end_of_bundle;
30 variable eof end_of_bundle; 30 variable eof end_of_bundle;
31 variable cs current_state; 31 variable cs current_state;
32 32
33 include byte_machine "byte_machines.rl"; 33 include byte_machine "byte_machines.rl";
(...skipping 20 matching lines...) Expand all
54 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 54 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
55 include immediate_fields_parsing 55 include immediate_fields_parsing
56 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 56 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
57 include relative_fields_validator_actions 57 include relative_fields_validator_actions
58 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 58 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
59 include relative_fields_parsing 59 include relative_fields_parsing
60 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 60 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
61 include cpuid_actions 61 include cpuid_actions
62 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 62 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
63 63
64 action check_access { 64 action check_memory_access {
65 CheckAccess(instruction_begin - data, base, index, restricted_register, 65 CheckMemoryAccess(instruction_begin - codeblock,
66 valid_targets, &instruction_info_collected); 66 base,
67 } 67 index,
68 68 restricted_register,
69 # Action which marks last byte as not immediate. Most 3DNow! instructions, 69 valid_targets,
70 # some AVX and XOP instructions have this proerty. It's referenced by 70 &instruction_info_collected);
71 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
72 # file.
73 action last_byte_is_not_immediate {
74 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
75 } 71 }
76 72
77 action modifiable_instruction { 73 action modifiable_instruction {
78 instruction_info_collected |= MODIFIABLE_INSTRUCTION; 74 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
79 } 75 }
80 76
81 action process_0_operands { 77 action process_0_operands {
82 Process0Operands(&restricted_register, &instruction_info_collected); 78 Process0Operands(&restricted_register, &instruction_info_collected);
83 } 79 }
84 action process_1_operand { 80 action process_1_operand {
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). 123 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
128 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp 124 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
129 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp 125 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
130 # Note: restricted_register keeps the restricted register as explained in 126 # Note: restricted_register keeps the restricted register as explained in
131 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems 127 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
132 # 128 #
133 # "Normal" instructions can not be used in a place where %rbp is restricted. 129 # "Normal" instructions can not be used in a place where %rbp is restricted.
134 # But since these instructions are "second half" of the %rbp sandboxing they 130 # But since these instructions are "second half" of the %rbp sandboxing they
135 # can be used *only* when %rbp is restricted. 131 # can be used *only* when %rbp is restricted.
136 # 132 #
137 # That is (normal instruction): 133 # Compare:
138 # mov %eax,%ebp 134 # mov %eax,%ebp
139 # mov %esi,%edi <- Error: %ebp is restricted 135 # mov %esi,%edi <- Error: %ebp is restricted
140 # vs 136 # vs
141 # mov %esi,%edi 137 # mov %esi,%edi
142 # add %r15,%rbp <- Error: %ebp is *not* restricted 138 # add %r15,%rbp <- Error: %ebp is *not* restricted
143 # vs 139 # vs
144 # mov %eax,%ebp 140 # mov %eax,%ebp
145 # add %r15,%rbp <- Ok: %rbp is restricted as it should be 141 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
146 # 142 #
147 # Check this precondition and mark the beginning of the instruction as 143 # Check this precondition and mark the beginning of the instruction as
148 # invalid jump for target. 144 # invalid jump for target.
149 @{ if (restricted_register == REG_RBP) 145 @{ if (restricted_register == REG_RBP)
146 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */
150 instruction_info_collected |= RESTRICTED_REGISTER_USED; 147 instruction_info_collected |= RESTRICTED_REGISTER_USED;
151 else 148 else
149 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */
152 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; 150 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
153 restricted_register = NO_REG; 151 restricted_register = NO_REG;
154 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 152 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
155 }; 153 };
156 154
157 # Special %rsp modifications - the ones which don't need a sandboxing. 155 # Special %rsp modifications - the ones which don't need a sandboxing.
158 # 156 #
159 # Note that there are two different opcodes for "mov": in x86-64 there are two 157 # Note that there are two different opcodes for "mov": in x86-64 there are two
160 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move 158 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
161 # from REG field to RM or in the other direction thus there are two encodings 159 # from REG field to RM or in the other direction thus there are two encodings
162 # for the register-to-register move. 160 # for the register-to-register move.
163 rsp_modifications = 161 rsp_modifications =
164 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp 162 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 # mov %eax,%esp 200 # mov %eax,%esp
203 # add %r15,%rsp <- Ok: %rsp is restricted as it should be 201 # add %r15,%rsp <- Ok: %rsp is restricted as it should be
204 # 202 #
205 # Check this precondition and mark the beginning of the instruction as 203 # Check this precondition and mark the beginning of the instruction as
206 # invalid jump for target. 204 # invalid jump for target.
207 @{ if (restricted_register == REG_RSP) 205 @{ if (restricted_register == REG_RSP)
208 instruction_info_collected |= RESTRICTED_REGISTER_USED; 206 instruction_info_collected |= RESTRICTED_REGISTER_USED;
209 else 207 else
210 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; 208 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
211 restricted_register = NO_REG; 209 restricted_register = NO_REG;
212 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 210 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
213 }; 211 };
214 212
215 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. 213 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
216 # and $~0x1f, %eXX 214 # and $~0x1f, %eXX
217 # and RBASE, %rXX 215 # and RBASE, %rXX
218 # jmpq *%rXX (or: callq *%rXX) 216 # jmpq *%rXX (or: callq *%rXX)
219 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not 217 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
220 # just as part of the naclcall/nacljmp, but also as a standolene instruction). 218 # just as part of the naclcall/nacljmp, but also as a standalone instruction).
221 # 219 #
222 # This means that when naclcall_or_nacljmp ragel machine will be combined with 220 # This means that when naclcall_or_nacljmp ragel machine will be combined with
223 # "normal_instruction*" regular action process_1_operand_zero_extends will be 221 # "normal_instruction*" regular action process_1_operand_zero_extends will be
224 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 222 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
225 # instruction. This action will check if %rbp/%rsp is legally modified thus 223 # instruction. This action will check if %rbp/%rsp is legally modified thus
226 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. 224 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
227 # 225 #
228 # There are number of variants present which differ by the REX prefix usage: 226 # There are number of variants present which differ by the REX prefix usage:
229 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" 227 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
230 # or "callq" is the same register and it's much simpler to do if one single 228 # or "callq" is the same register and it's much simpler to do if one single
231 # action handles only fixed number of bytes. 229 # action handles only fixed number of bytes.
232 # 230 #
233 # Additional complication arises because x86-64 contains two different "add" 231 # Additional complication arises because x86-64 contains two different "add"
234 # instruction: with "0x01" and "0x03" opcode. They differ in the direction 232 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
235 # used: both can encode "add %src_register, %dst_register", but the first one 233 # used: both can encode "add %src_register, %dst_register", but the first one
236 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M 234 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
237 # byte for the dst while last one uses field RM of the ModR/M byte for the src 235 # byte for the dst while last one uses field RM of the ModR/M byte for the src
238 # and field REG of the ModR/M byte for dst. Both should be allowed. 236 # and field REG of the ModR/M byte for dst. Both should be allowed.
239 # 237 #
240 # See AMD/Intel manual for clarification "add" instruction encoding. 238 # See AMD/Intel manual for clarification about "add" instruction encoding.
241 # 239 #
242 # REGISTER USAGE ABBREVIATIONS: 240 # REGISTER USAGE ABBREVIATIONS:
243 # E86: legacy ia32 registers (all eight: %eax to %edi) 241 # E86: legacy ia32 registers (all eight: %eax to %edi)
244 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) 242 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
245 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) 243 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
246 # R64: new amd64 registers (only seven: %r8 to %r14) 244 # R64: new amd64 registers (only seven: %r8 to %r14)
247 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) 245 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
246 #
247 # Note that in the actions below instruction_begin points to the start of the
248 # "call" or "jmp" instruction and current_position points to its end.
248 naclcall_or_nacljmp = 249 naclcall_or_nacljmp =
249 # This block encodes call and jump "superinstruction" of the following form: 250 # This block encodes call and jump "superinstruction" of the following form:
250 # 0: 83 e_ e0 and $~0x1f,E86 251 # 0: 83 e_ e0 and $~0x1f,E86
251 # 3: 4_ 01 f_ add RBASE,R86 252 # 3: 4_ 01 f_ add RBASE,R86
252 # 6: ff e_ jmpq *R86 253 # 6: ff e_ jmpq *R86
253 #### INSTRUCTION ONE (three bytes) 254 #### INSTRUCTION ONE (three bytes)
254 # and $~0x1f, E86 255 # and $~0x1f, E86
255 (0x83 b_11_100_xxx 0xe0 256 (0x83 b_11_100_xxx 0xe0
256 #### INSTRUCTION TWO (three bytes) 257 #### INSTRUCTION TWO (three bytes)
257 # add RBASE, R86 (0x01 opcode) 258 # add RBASE, R86 (0x01 opcode)
258 b_0100_11x0 0x01 b_11_111_xxx 259 b_0100_11x0 0x01 b_11_111_xxx
259 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) 260 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
260 # callq R86 261 # callq R86
261 ((REX_WRX? 0xff b_11_010_xxx) | 262 ((REX_WRX? 0xff b_11_010_xxx) |
262 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) 263 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
263 # jmpq R86 264 # jmpq R86
264 (REX_WRX? 0xff b_11_100_xxx))) 265 (REX_WRX? 0xff b_11_100_xxx)))
265 @{ 266 @{
266 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, 267 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
267 &instruction_begin, current_position, 268 &instruction_begin,
268 data, valid_targets); 269 current_position,
270 codeblock,
271 valid_targets);
269 } | 272 } |
270 273
271 # This block encodes call and jump "superinstruction" of the following form: 274 # This block encodes call and jump "superinstruction" of the following form:
272 # 0: 83 e_ e0 and $~0x1f,E86 275 # 0: 83 e_ e0 and $~0x1f,E86
273 # 3: 4_ 03 _f add RBASE,R86 276 # 3: 4_ 03 _f add RBASE,R86
274 # 6: ff e_ jmpq *R86 277 # 6: ff e_ jmpq *R86
275 #### INSTRUCTION ONE (three bytes) 278 #### INSTRUCTION ONE (three bytes)
276 # and $~0x1f, E86 279 # and $~0x1f, E86
277 (0x83 b_11_100_xxx 0xe0 280 (0x83 b_11_100_xxx 0xe0
278 #### INSTRUCTION TWO (three bytes) 281 #### INSTRUCTION TWO (three bytes)
279 # add RBASE, R86 (0x03 opcode) 282 # add RBASE, R86 (0x03 opcode)
280 b_0100_10x1 0x03 b_11_xxx_111 283 b_0100_10x1 0x03 b_11_xxx_111
281 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) 284 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
282 # callq R86 285 # callq R86
283 ((REX_WRX? 0xff b_11_010_xxx) | 286 ((REX_WRX? 0xff b_11_010_xxx) |
284 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) 287 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
285 # jmpq R86 288 # jmpq R86
286 (REX_WRX? 0xff b_11_100_xxx))) 289 (REX_WRX? 0xff b_11_100_xxx)))
287 @{ 290 @{
288 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, 291 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
289 &instruction_begin, current_position, 292 &instruction_begin,
290 data, valid_targets); 293 current_position,
294 codeblock,
295 valid_targets);
291 } | 296 } |
292 297
293 # This block encodes call and jump "superinstruction" of the following form: 298 # This block encodes call and jump "superinstruction" of the following form:
294 # 0: 4_ 83 e_ e0 and $~0x1f,E86 299 # 0: 4_ 83 e_ e0 and $~0x1f,E86
295 # 4: 4_ 01 f_ add RBASE,R86 300 # 4: 4_ 01 f_ add RBASE,R86
296 # 7: ff e_ jmpq *R86 301 # 7: ff e_ jmpq *R86
297 #### INSTRUCTION ONE (four bytes) 302 #### INSTRUCTION ONE (four bytes)
298 # and $~0x1f, E86 303 # and $~0x1f, E86
299 ((REX_RX 0x83 b_11_100_xxx 0xe0 304 ((REX_RX 0x83 b_11_100_xxx 0xe0
300 #### INSTRUCTION TWO (three bytes) 305 #### INSTRUCTION TWO (three bytes)
(...skipping 17 matching lines...) Expand all
318 # add RBASE, R64 (0x01 opcode) 323 # add RBASE, R64 (0x01 opcode)
319 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) 324 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
320 #### INSTRUCTION THREE: call (three bytes) 325 #### INSTRUCTION THREE: call (three bytes)
321 # callq R64 326 # callq R64
322 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | 327 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
323 #### INSTRUCTION THREE: jmp (three bytes) 328 #### INSTRUCTION THREE: jmp (three bytes)
324 # jmpq R64 329 # jmpq R64
325 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) 330 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
326 @{ 331 @{
327 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, 332 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
328 &instruction_begin, current_position, 333 &instruction_begin,
329 data, valid_targets); 334 current_position,
335 codeblock,
336 valid_targets);
330 } | 337 } |
331 338
332 # This block encodes call and jump "superinstruction" of the following form: 339 # This block encodes call and jump "superinstruction" of the following form:
333 # 0: 4_ 83 e_ e0 and $~0x1f,E86 340 # 0: 4_ 83 e_ e0 and $~0x1f,E86
334 # 4: 4_ 03 _f add RBASE,R86 341 # 4: 4_ 03 _f add RBASE,R86
335 # 7: ff e_ jmpq *R86 342 # 7: ff e_ jmpq *R86
336 #### INSTRUCTION ONE (four bytes) 343 #### INSTRUCTION ONE (four bytes)
337 # and $~0x1f, E86 344 # and $~0x1f, E86
338 ((REX_RX 0x83 b_11_100_xxx 0xe0 345 ((REX_RX 0x83 b_11_100_xxx 0xe0
339 #### INSTRUCTION TWO (three bytes) 346 #### INSTRUCTION TWO (three bytes)
(...skipping 17 matching lines...) Expand all
357 # add RBASE, R64 (0x03 opcode) 364 # add RBASE, R64 (0x03 opcode)
358 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) 365 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
359 #### INSTRUCTION THREE: call (three bytes) 366 #### INSTRUCTION THREE: call (three bytes)
360 # callq R64 367 # callq R64
361 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | 368 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
362 #### INSTRUCTION THREE: jmp (three bytes) 369 #### INSTRUCTION THREE: jmp (three bytes)
363 # jmpq R64 370 # jmpq R64
364 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) 371 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
365 @{ 372 @{
366 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, 373 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
367 &instruction_begin, current_position, 374 &instruction_begin,
368 data, valid_targets); 375 current_position,
376 codeblock,
377 valid_targets);
369 }; 378 };
370 379
371 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand 380 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
372 381
373 # maskmovq %mmX,%mmY (EMMX or SSE) 382 # maskmovq %mmX,%mmY (EMMX or SSE)
374 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; 383 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
375 384
376 # maskmovdqu %xmmX, %xmmY (SSE2) 385 # maskmovdqu %xmmX, %xmmY (SSE2)
377 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; 386 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
378 387
379 # vmaskmovdqu %xmmX, %xmmY (AVX) 388 # vmaskmovdqu %xmmX, %xmmY (AVX)
380 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) | 389 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
381 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers; 390 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
382 391
383 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu; 392 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
384 393
385 # Temporary fix: for string instructions combination of data16 and rep(ne) 394 # Temporary fix: for string instructions combination of data16 and rep(ne)
386 # prefixes is disallowed to mimic old validator behavior. 395 # prefixes is disallowed to mimic old validator behavior.
387 # See http://code.google.com/p/nativeclient/issues/detail?id=1950 396 # See http://code.google.com/p/nativeclient/issues/detail?id=1950
388 397
389 # data16rep = (data16 | rep data16 | data16 rep); 398 # data16rep = (data16 | rep data16 | data16 rep);
390 # data16condrep = (data16 | condrep data16 | data16 condrep); 399 # data16condrep = (data16 | condrep data16 | data16 condrep);
391 data16rep = data16; 400 data16rep = data16;
392 data16condrep = data16; 401 data16condrep = data16;
393 402
394 # String instructions which use only %ds:(%rsi) 403 # String instructions which use only %ds:(%rsi)
395 string_instruction_rsi_no_rdi = 404 string_instruction_rsi_no_rdi =
396 (rep? 0xac | # lods %ds:(%rsi),%al 405 (rep? 0xac | # lods %ds:(%rsi),%al
397 data16rep 0xad | # lods %ds:(%rsi),%ax 406 data16rep 0xad | # lods %ds:(%rsi),%ax
398 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax 407 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
399 408
400 # String instructions which use only %ds:(%rdi) 409 # String instructions which use only %ds:(%rdi)
401 string_instruction_rdi_no_rsi = 410 string_instruction_rdi_no_rsi =
402 condrep? 0xae | # scas %es:(%rdi),%al 411 condrep? 0xae | # scas %es:(%rdi),%al
403 data16condrep 0xaf | # scas %es:(%rdi),%ax 412 data16condrep 0xaf | # scas %es:(%rdi),%ax
404 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax 413 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
405 414
406 rep? 0xaa | # stos %al,%es:(%rdi) 415 rep? 0xaa | # stos %al,%es:(%rdi)
407 data16rep 0xab | # stos %ax,%es:(%rdi) 416 data16rep 0xab | # stos %ax,%es:(%rdi)
408 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) 417 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
409 418
410 # String instructions which use both %ds:(%rsi) and %es:(%rdi) 419 # String instructions which use both %ds:(%rsi) and %es:(%rdi)
411 string_instruction_rsi_rdi = 420 string_instruction_rsi_rdi =
412 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi) 421 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
413 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi) 422 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
414 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi) 423 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
415 424
416 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi) 425 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
417 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi) 426 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
418 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi) 427 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
419 428
420 # "Superinstruction" which includes %rsi sandboxing. 429 # "Superinstruction" which includes %rsi sandboxing.
421 # 430 #
422 # There are two variants which handle spurious REX prefixes. 431 # There are two variants which handle spurious REX prefixes.
423 # 432 #
424 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64 433 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
425 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may 434 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
426 # be used to move from REG field to RM or in the other direction thus there 435 # be used to move from REG field to RM or in the other direction thus there
427 # are two encodings for the register-to-register move (and since REG and RM 436 # are two encodings for the register-to-register move (and since REG and RM
428 # are identical here only opcode differs). 437 # are identical here only opcode differs).
429 sandbox_instruction_rsi_no_rdi = 438 sandbox_instruction_rsi_no_rdi =
430 (0x89 | 0x8b) 0xf6 # mov %esi,%esi 439 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
431 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 440 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
432 string_instruction_rsi_no_rdi 441 string_instruction_rsi_no_rdi
433 @{ 442 @{
434 ExpandSuperinstructionBySandboxingBytes( 443 ExpandSuperinstructionBySandboxingBytes(
435 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 444 2 /* mov */ + 4 /* lea */,
445 &instruction_begin,
446 codeblock,
447 valid_targets);
436 } | 448 } |
437 449
438 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi 450 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
439 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 451 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
440 string_instruction_rsi_no_rdi 452 string_instruction_rsi_no_rdi
441 @{ 453 @{
442 ExpandSuperinstructionBySandboxingBytes( 454 ExpandSuperinstructionBySandboxingBytes(
443 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 455 3 /* mov */ + 4 /* lea */,
456 &instruction_begin,
457 codeblock,
458 valid_targets);
444 }; 459 };
445 460
446 # "Superinstruction" which includes %rdi sandboxing. 461 # "Superinstruction" which includes %rdi sandboxing.
447 # 462 #
448 # There are two variants which handle spurious REX prefixes. 463 # There are two variants which handle spurious REX prefixes.
449 # 464 #
450 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 465 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
451 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may 466 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
452 # be used to move from REG field to RM or in the other direction thus there 467 # be used to move from REG field to RM or in the other direction thus there
453 # are two encodings for the register-to-register move (and since REG and RM 468 # are two encodings for the register-to-register move (and since REG and RM
454 # are identical here only opcode differs). 469 # are identical here only opcode differs).
455 sandbox_instruction_rdi_no_rsi = 470 sandbox_instruction_rdi_no_rsi =
456 (0x89 | 0x8b) 0xff # mov %edi,%edi 471 (0x89 | 0x8b) 0xff # mov %edi,%edi
457 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi 472 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
458 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) 473 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
459 @{ 474 @{
460 ExpandSuperinstructionBySandboxingBytes( 475 ExpandSuperinstructionBySandboxingBytes(
461 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 476 2 /* mov */ + 4 /* lea */,
477 &instruction_begin,
478 codeblock,
479 valid_targets);
462 } | 480 } |
463 481
464 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi 482 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
465 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi 483 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
466 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) 484 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
467 @{ 485 @{
468 ExpandSuperinstructionBySandboxingBytes( 486 ExpandSuperinstructionBySandboxingBytes(
469 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 487 3 /* mov */ + 4 /* lea */,
488 &instruction_begin,
489 codeblock,
490 valid_targets);
470 }; 491 };
471 492
472 493
473 # "Superinstruction" which includes both %rsi and %rdi sandboxing. 494 # "Superinstruction" which includes both %rsi and %rdi sandboxing.
474 # 495 #
475 # There are four variants which handle spurious REX prefixes. 496 # There are four variants which handle spurious REX prefixes.
476 # 497 #
477 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both 498 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
478 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two 499 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
479 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move 500 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
480 # from REG field to RM or in the other direction thus there are two encodings 501 # from REG field to RM or in the other direction thus there are two encodings
481 # for the register-to-register move (and since REG and RM are identical here 502 # for the register-to-register move (and since REG and RM are identical here
482 # only opcode differs). 503 # only opcode differs).
483 sandbox_instruction_rsi_rdi = 504 sandbox_instruction_rsi_rdi =
484 (0x89 | 0x8b) 0xf6 # mov %esi,%esi 505 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
485 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 506 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
486 (0x89 | 0x8b) 0xff # mov %edi,%edi 507 (0x89 | 0x8b) 0xff # mov %edi,%edi
487 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi 508 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
488 string_instruction_rsi_rdi 509 string_instruction_rsi_rdi
489 @{ 510 @{
490 ExpandSuperinstructionBySandboxingBytes( 511 ExpandSuperinstructionBySandboxingBytes(
491 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, 512 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
492 &instruction_begin, data, valid_targets); 513 &instruction_begin,
514 codeblock,
515 valid_targets);
493 } | 516 } |
494 517
495 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi 518 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
496 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 519 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
497 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi 520 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
498 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi 521 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
499 522
500 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi 523 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
501 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 524 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
502 (0x89 | 0x8b) 0xff # mov %edi,%edi 525 (0x89 | 0x8b) 0xff # mov %edi,%edi
503 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi 526 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
504 string_instruction_rsi_rdi 527 string_instruction_rsi_rdi
505 @{ 528 @{
506 ExpandSuperinstructionBySandboxingBytes( 529 ExpandSuperinstructionBySandboxingBytes(
507 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ 530 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
508 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, 531 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
509 &instruction_begin, data, valid_targets); 532 &instruction_begin,
533 codeblock,
534 valid_targets);
510 } | 535 } |
511 536
512 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi 537 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
513 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi 538 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
514 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi 539 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
515 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi 540 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
516 string_instruction_rsi_rdi 541 string_instruction_rsi_rdi
517 @{ 542 @{
518 ExpandSuperinstructionBySandboxingBytes( 543 ExpandSuperinstructionBySandboxingBytes(
519 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, 544 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
520 &instruction_begin, data, valid_targets); 545 &instruction_begin,
546 codeblock,
547 valid_targets);
521 }; 548 };
522 549
523 # All the "special" instructions (== instructions which obey non-standard 550 # All the "special" instructions (== instructions which obey non-standard
524 # rules). Three groups: 551 # rules). Three groups:
525 # * %rsp/%rsp related instructions (these instructions are special because 552 # * %rsp/%rsp related instructions (these registers and operations which
526 # they must be in the range %r15...%r15+4294967295 except momentarily they 553 # operate on them are special because registers must be in the range
527 # can be in the range 0...4294967295) 554 # %r15...%r15+4294967295 except momentarily they can be in the range
555 # 0...4294967295, but then the very next instruction MUST restore the
556 # status quo).
528 # * string instructions (which can not use %r15 as base and thus need special 557 # * string instructions (which can not use %r15 as base and thus need special
529 # handling both in compiler and validator) 558 # handling both in compiler and validator)
530 # * naclcall/nacljmp (indirect jumps need special care) 559 # * naclcall/nacljmp (indirect jumps need special care)
531 special_instruction = 560 special_instruction =
532 (rbp_modifications | 561 (rbp_modifications |
533 rsp_modifications | 562 rsp_modifications |
534 rbp_sandboxing | 563 rbp_sandboxing |
535 rsp_sandboxing | 564 rsp_sandboxing |
536 sandbox_instruction_rsi_no_rdi | 565 sandbox_instruction_rsi_no_rdi |
537 sandbox_instruction_rdi_no_rsi | 566 sandbox_instruction_rdi_no_rsi |
538 sandbox_instruction_rsi_rdi | 567 sandbox_instruction_rsi_rdi |
539 naclcall_or_nacljmp) 568 naclcall_or_nacljmp)
540 # Mark the instruction as special - currently this information is used only 569 # Mark the instruction as special - currently this information is used only
541 # in tests, but in the future we may use it for dynamic code modification 570 # in tests, but in the future we may use it for dynamic code modification
542 # support. 571 # support.
543 @{ 572 @{
544 instruction_info_collected |= SPECIAL_INSTRUCTION; 573 instruction_info_collected |= SPECIAL_INSTRUCTION;
545 }; 574 };
546 575
547 # Remove special instructions which are only allowed in special cases. 576 # Remove special instructions which are only allowed in special cases.
548 normal_instruction = one_instruction - special_instruction; 577 normal_instruction = one_instruction - special_instruction;
549 578
550 # Check if call is properly aligned. 579 # For direct call we explicitly encode all variations.
551 # 580 direct_call = (data16 REX_RXB? 0xe8 rel16) |
552 # For direct call we explicitly encode all variations. For indirect call 581 (REX_WRXB? 0xe8 rel32) |
553 # we accept all the special instructions which ends with register-addressed 582 (data16 REXW_RXB 0xe8 rel32);
554 # indirect call. 583
584 # For indirect call we accept only near register-addressed indirect call.
585 indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers);
586
587 # Ragel machine that accepts one call instruction or call superinstruction and
588 # checks if call is properly aligned.
555 call_alignment = 589 call_alignment =
556 ((normal_instruction & 590 ((normal_instruction & direct_call) |
557 # Direct call 591 # For indirect calls we accept all the special instructions which ends with
558 ((data16 REX_RXB? 0xe8 rel16) | 592 # register-addressed indirect call.
559 (REX_WRXB? 0xe8 rel32) | 593 (special_instruction & (any* indirect_call_register)))
560 (data16 REXW_RXB 0xe8 rel32))) |
561 (special_instruction &
562 # Indirect call
563 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
564 modrm_registers))))
565 # Call instruction must aligned to the end of bundle. Previously this was 594 # Call instruction must aligned to the end of bundle. Previously this was
566 # strict requirement, today it's just warning to aid with debugging. 595 # strict requirement, today it's just warning to aid with debugging.
567 @{ 596 @{
568 if (((current_position - data) & kBundleMask) != kBundleMask) 597 if (((current_position - codeblock) & kBundleMask) != kBundleMask)
569 instruction_info_collected |= BAD_CALL_ALIGNMENT; 598 instruction_info_collected |= BAD_CALL_ALIGNMENT;
570 }; 599 };
571 600
572 # This action calls user's callback (if needed) and cleans up validator's 601 # This action calls users callback (if needed) and cleans up validator
573 # internal state. 602 # internal state.
574 # 603 #
575 # We call the user callback if there are validation errors or if the 604 # We call the user callback either on validation errors or on every
576 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. 605 # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option.
577 # 606 #
578 # After that we move instruction_begin and clean all the variables which 607 # After that we move instruction_begin and clean all the variables which
579 # only used in the processing of a single instruction (prefixes, operand 608 # are only used in the processing of a single instruction (prefixes, operand
580 # states and instruction_info_collected). 609 # states and instruction_info_collected).
581 action end_of_instruction_cleanup { 610 action end_of_instruction_cleanup {
582 /* Call user-supplied callback. */ 611 /* Call user-supplied callback. */
583 instruction_end = current_position + 1; 612 instruction_end = current_position + 1;
584 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || 613 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
585 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { 614 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
586 result &= user_callback( 615 result &= user_callback(
587 instruction_begin, instruction_end, 616 instruction_begin, instruction_end,
588 instruction_info_collected | 617 instruction_info_collected |
589 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & 618 ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
590 RESTRICTED_REGISTER_MASK), callback_data); 619 RESTRICTED_REGISTER_MASK), callback_data);
591 } 620 }
592 621
593 /* On successful match the instruction_begin must point to the next byte 622 /* On successful match the instruction_begin must point to the next byte
594 * to be able to report the new offset as the start of instruction 623 * to be able to report the new offset as the start of instruction
595 * causing error. */ 624 * causing error. */
596 instruction_begin = instruction_end; 625 instruction_begin = instruction_end;
597 626
598 /* Mark start of the next instruction as a valid target for jump. 627 /*
599 * Note: we mark start of the next instruction here, not start of the 628 * We may set instruction_begin at the first byte of the instruction instead
600 * current one because memory access check should be able to clear this 629 * of here but in the case of incorrect one byte instructions user callback
601 * bit when restricted register is used. */ 630 * may be called before instruction_begin is set.
602 MarkValidJumpTarget(instruction_begin - data, valid_targets); 631 */
632 MarkValidJumpTarget(instruction_begin - codeblock, valid_targets);
603 633
604 /* Clear variables. */ 634 /* Clear variables. */
605 instruction_info_collected = 0; 635 instruction_info_collected = 0;
606 SET_REX_PREFIX(FALSE); 636 SET_REX_PREFIX(FALSE);
607 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ 637 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
608 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); 638 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
609 SET_VEX_PREFIX3(0x00); 639 SET_VEX_PREFIX3(0x00);
610 operand_states = 0; 640 operand_states = 0;
611 base = 0; 641 base = 0;
612 index = 0; 642 index = 0;
613 } 643 }
614 644
615 # This action reports fatal error detected by DFA. 645 # This action reports fatal error detected by DFA.
616 action report_fatal_error { 646 action report_fatal_error {
617 result &= user_callback(instruction_begin, current_position, 647 result &= user_callback(instruction_begin, current_position,
618 UNRECOGNIZED_INSTRUCTION, callback_data); 648 UNRECOGNIZED_INSTRUCTION, callback_data);
619 /* 649 /*
620 * Process the next bundle: "continue" here is for the "for" cycle in 650 * Process the next bundle: "continue" here is for the "for" cycle in
621 * the ValidateChunkAMD64 function. 651 * the ValidateChunkAMD64 function.
622 * 652 *
623 * It does not affect the case which we really care about (when code 653 * It does not affect the case which we really care about (when code
624 * is validatable), but makes it possible to detect more errors in one 654 * is validatable), but makes it possible to detect more errors in one
625 * run in tools like ncval. 655 * run in tools like ncval.
626 */ 656 */
627 continue; 657 continue;
628 } 658 }
629 659
630 # This is main ragel machine: it does 99% of validation work. There are only 660 # This is main ragel machine: it does 99% of validation work. There are only
631 # one thing to do with bundle if this machine accepts the bundle: 661 # one thing to do with bundle if this ragel machine accepts the bundle:
632 # * check for the state of the restricted_register at the end of the bundle. 662 # * check for the state of the restricted_register at the end of the bundle.
633 # It's an error is %rbp or %rsp is restricted at the end of the bundle. 663 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
634 # Additionally if all the bundles are fine you need to check that direct jumps 664 # Additionally if all the bundles are fine you need to check that direct jumps
635 # are corect. Thiis is done in the following way: 665 # are corect. Thiis is done in the following way:
636 # * DFA fills two arrays: valid_targets and jump_dests. 666 # * DFA fills two arrays: valid_targets and jump_dests.
637 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". 667 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
638 # All other checks are done here. 668 # All other checks are done here.
639 669
640 main := ((call_alignment | normal_instruction | special_instruction) 670 main := ((call_alignment | normal_instruction | special_instruction)
641 @end_of_instruction_cleanup)* 671 @end_of_instruction_cleanup)*
642 $!report_fatal_error; 672 $!report_fatal_error;
643 673
644 }%% 674 }%%
645 675
676 /*
677 * The "write data" statement causes Ragel to emit the constant static data
678 * needed by the ragel machine.
679 */
646 %% write data; 680 %% write data;
647 681
682 /*
683 * Operand's kind WRT sandboxing effect: no effect, can be used for sandboxing
684 * and will make register invalid if used.
685 *
686 * No effect is the "initial state", 32bit stores can be used for sandboxing (in
687 * that case he high 32-bit bits of the corresponding 64-bit register are set to
688 * zero) and we do not distinguish modifications of 16bit and 64bit registers to
689 * match the behavior of the old validator.
690 *
691 * 8bit operands must be distinguished from other types because the REX prefix
692 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
693 */
648 enum OperandKind { 694 enum OperandKind {
649 OPERAND_SANDBOX_IRRELEVANT = 0, 695 OPERAND_SANDBOX_IRRELEVANT = 0,
696 /* 8bit register that is modified by instruction. */
697 OPERAND_SANDBOX_8BIT,
650 /* 698 /*
651 * Currently we do not distinguish 8bit and 16bit modifications from 699 * 32-bit register that is modified by instruction. The high 32-bit bits of
652 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. 700 * the corresponding 64-bit register are set to zero.
653 *
654 * 8bit operands must be distinguished from other types because the REX prefix
655 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
656 */ 701 */
657 OPERAND_SANDBOX_8BIT,
658 OPERAND_SANDBOX_RESTRICTED, 702 OPERAND_SANDBOX_RESTRICTED,
703 /* 64-bit or 16-bit register that is modified by instruction. */
659 OPERAND_SANDBOX_UNRESTRICTED 704 OPERAND_SANDBOX_UNRESTRICTED
660 }; 705 };
661 706
662 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) 707 /*
663 #define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N) 708 * operand_states variable keeps one byte of information per operand in the
664 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \ 709 * current instruction:
665 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) 710 * * the first 5 bits (least significant ones) are for register numbers (16
666 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \ 711 posible registers regs plus RIZ),
667 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) 712 * * the next 2 bits for register kinds.
668 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \ 713 *
669 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) 714 * Macroses below are used to access this data.
670 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \ 715 */
671 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) 716 #define SET_OPERAND_NAME(INDEX, REGISTER_NAME) \
672 #define CHECK_OPERAND(N, S, T) \ 717 operand_states |= ((REGISTER_NAME) << ((INDEX) << 3))
673 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) 718 #define SET_OPERAND_FORMAT(INDEX, FORMAT) \
719 SET_OPERAND_FORMAT_ ## FORMAT(INDEX)
720 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(INDEX) \
721 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((INDEX) << 3))
722 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(INDEX) \
723 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3))
724 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(INDEX) \
725 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3))
726 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(INDEX) \
727 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3))
728 #define CHECK_OPERAND(INDEX, REGISTER_NAME, KIND) \
729 ((operand_states & (0xff << ((INDEX) << 3))) == \
730 ((((KIND) << 5) | (REGISTER_NAME)) << ((INDEX) << 3)))
731 #define CHECK_OPERAND_R15_MODIFIED(INDEX) \
732 (CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_8BIT) || \
733 CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_RESTRICTED) || \
734 CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_UNRESTRICTED))
735 /*
736 * Note that macroses below access operand_states variable and also rex_prefix
737 * variable. This is to distinguish %ah from %spl, as well as %ch from %bpl.
738 */
739 #define CHECK_OPERAND_BP_MODIFIED(INDEX) \
740 ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
741 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_RESTRICTED) || \
742 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED))
743 #define CHECK_OPERAND_SP_MODIFIED(INDEX) \
744 ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
745 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_RESTRICTED) || \
746 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) \
747 /*
748 * This is for Process?OperandsZeroExtends functions: in this case %esp or %ebp
749 * can be written to, but %spl/%sp/%rsp or %bpl/%bp/%rbp can not be modified.
750 */
751 #define CHECK_OPERAND_BP_INVALID_MODIFICATION(INDEX) \
752 ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
753 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED))
754 #define CHECK_OPERAND_SP_INVALID_MODIFICATION(INDEX) \
755 ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
756 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED))
757 #define CHECK_OPERAND_RESTRICTED(INDEX) \
758 /* Take 2 bits of operand type from operand_states as *restricted_register */\
759 /* and also make sure operand_states denotes a register (4th bit == 0). */\
760 (operand_states & (0x70 << ((INDEX) << 3))) == \
761 (OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3)))
762 #define GET_OPERAND_NAME(INDEX) ((operand_states >> ((INDEX) << 3)) & 0x1f)
674 763
675 static INLINE void CheckAccess(ptrdiff_t instruction_begin, 764 static INLINE void CheckMemoryAccess(ptrdiff_t instruction_begin,
676 enum OperandName base, 765 enum OperandName base,
677 enum OperandName index, 766 enum OperandName index,
678 uint8_t restricted_register, 767 uint8_t restricted_register,
679 bitmap_word *valid_targets, 768 bitmap_word *valid_targets,
680 uint32_t *instruction_info_collected) { 769 uint32_t *instruction_info_collected) {
681 if ((base == REG_RIP) || (base == REG_R15) || 770 if ((base == REG_RIP) || (base == REG_R15) ||
682 (base == REG_RSP) || (base == REG_RBP)) { 771 (base == REG_RSP) || (base == REG_RBP)) {
683 if ((index == NO_REG) || (index == REG_RIZ)) 772 if ((index == NO_REG) || (index == REG_RIZ))
684 { /* do nothing. */ } 773 { /* do nothing. */ }
685 else if (index == restricted_register) 774 else if (index == restricted_register)
686 BitmapClearBit(valid_targets, instruction_begin), 775 BitmapClearBit(valid_targets, instruction_begin),
687 *instruction_info_collected |= RESTRICTED_REGISTER_USED; 776 *instruction_info_collected |= RESTRICTED_REGISTER_USED;
688 else 777 else
689 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; 778 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
690 } else { 779 } else {
691 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; 780 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
692 } 781 }
693 } 782 }
694 783
784 static FORCEINLINE uint32_t CheckValidityOfRegularInstruction(
785 enum OperandName restricted_register) {
786 /*
787 * Restricted %rsp or %rbp must be %rsp or %rbp must be restored from
788 * zero-extension state by appropriate "special" instruction, not with
789 * regular instruction.
790 */
791 if (restricted_register == REG_RBP)
792 return RESTRICTED_RBP_UNPROCESSED;
793 if (restricted_register == REG_RSP)
794 return RESTRICTED_RSP_UNPROCESSED;
795 return 0;
796 }
695 797
696 static INLINE void Process0Operands(enum OperandName *restricted_register, 798 static INLINE void Process0Operands(enum OperandName *restricted_register,
697 uint32_t *instruction_info_collected) { 799 uint32_t *instruction_info_collected) {
698 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 800 *instruction_info_collected |=
699 * instruction, not with regular instruction. */ 801 CheckValidityOfRegularInstruction(*restricted_register);
700 if (*restricted_register == REG_RSP) { 802 /* Every instruction clears restricted register even if it is not modified. */
701 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
702 } else if (*restricted_register == REG_RBP) {
703 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
704 }
705 *restricted_register = NO_REG; 803 *restricted_register = NO_REG;
706 } 804 }
707 805
708 static INLINE void Process1Operand(enum OperandName *restricted_register, 806 static INLINE void Process1Operand(enum OperandName *restricted_register,
709 uint32_t *instruction_info_collected, 807 uint32_t *instruction_info_collected,
710 uint8_t rex_prefix, 808 uint8_t rex_prefix,
711 uint32_t operand_states) { 809 uint32_t operand_states) {
712 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 810 *instruction_info_collected |=
713 * instruction, not with regular instruction. */ 811 CheckValidityOfRegularInstruction(*restricted_register);
714 if (*restricted_register == REG_RSP) { 812 if (CHECK_OPERAND_R15_MODIFIED(0))
715 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 813 *instruction_info_collected |= R15_MODIFIED;
716 } else if (*restricted_register == REG_RBP) { 814 if (CHECK_OPERAND_BP_MODIFIED(0))
717 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 815 *instruction_info_collected |= BP_MODIFIED;
718 } 816 if (CHECK_OPERAND_SP_MODIFIED(0))
817 *instruction_info_collected |= SP_MODIFIED;
818 /* Every instruction clears restricted register even if it is not modified. */
719 *restricted_register = NO_REG; 819 *restricted_register = NO_REG;
720 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
721 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
722 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
723 *instruction_info_collected |= R15_MODIFIED;
724 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
725 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
726 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
727 *instruction_info_collected |= BPL_MODIFIED;
728 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
729 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
730 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
731 *instruction_info_collected |= SPL_MODIFIED;
732 }
733 } 820 }
734 821
735 static INLINE void Process1OperandZeroExtends( 822 static INLINE void Process1OperandZeroExtends(
736 enum OperandName *restricted_register, 823 enum OperandName *restricted_register,
737 uint32_t *instruction_info_collected, 824 uint32_t *instruction_info_collected,
738 uint8_t rex_prefix, 825 uint8_t rex_prefix,
739 uint32_t operand_states) { 826 uint32_t operand_states) {
740 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 827 *instruction_info_collected |=
741 * instruction, not with regular instruction. */ 828 CheckValidityOfRegularInstruction(*restricted_register);
742 if (*restricted_register == REG_RSP) { 829 /* Every instruction clears restricted register even if it is not modified. */
743 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
744 } else if (*restricted_register == REG_RBP) {
745 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
746 }
747 *restricted_register = NO_REG; 830 *restricted_register = NO_REG;
748 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || 831 if (CHECK_OPERAND_R15_MODIFIED(0))
749 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
750 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
751 *instruction_info_collected |= R15_MODIFIED; 832 *instruction_info_collected |= R15_MODIFIED;
752 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 833 if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0))
753 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { 834 *instruction_info_collected |= BP_MODIFIED;
754 *instruction_info_collected |= BPL_MODIFIED; 835 if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0))
755 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 836 *instruction_info_collected |= SP_MODIFIED;
756 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { 837 if (CHECK_OPERAND_RESTRICTED(0))
757 *instruction_info_collected |= SPL_MODIFIED; 838 *restricted_register = GET_OPERAND_NAME(0);
758 /* Take 2 bits of operand type from operand_states as *restricted_register,
759 * make sure operand_states denotes a register (4th bit == 0). */
760 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
761 *restricted_register = operand_states & 0x0f;
762 }
763 } 839 }
764 840
765 static INLINE void Process2Operands(enum OperandName *restricted_register, 841 static INLINE void Process2Operands(enum OperandName *restricted_register,
766 uint32_t *instruction_info_collected, 842 uint32_t *instruction_info_collected,
767 uint8_t rex_prefix, 843 uint8_t rex_prefix,
768 uint32_t operand_states) { 844 uint32_t operand_states) {
769 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 845 *instruction_info_collected |=
770 * instruction, not with regular instruction. */ 846 CheckValidityOfRegularInstruction(*restricted_register);
771 if (*restricted_register == REG_RSP) { 847 if (CHECK_OPERAND_R15_MODIFIED(0) || CHECK_OPERAND_R15_MODIFIED(1))
772 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 848 *instruction_info_collected |= R15_MODIFIED;
773 } else if (*restricted_register == REG_RBP) { 849 if (CHECK_OPERAND_BP_MODIFIED(0) || CHECK_OPERAND_BP_MODIFIED(1))
774 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 850 *instruction_info_collected |= BP_MODIFIED;
775 } 851 if (CHECK_OPERAND_SP_MODIFIED(0) || CHECK_OPERAND_SP_MODIFIED(1))
852 *instruction_info_collected |= SP_MODIFIED;
853 /* Every instruction clears restricted register even if it is not modified. */
776 *restricted_register = NO_REG; 854 *restricted_register = NO_REG;
777 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
778 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
779 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
780 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
781 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
783 *instruction_info_collected |= R15_MODIFIED;
784 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
785 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
786 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
787 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
788 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
789 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
790 *instruction_info_collected |= BPL_MODIFIED;
791 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
792 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
793 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
794 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
795 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
796 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
797 *instruction_info_collected |= SPL_MODIFIED;
798 }
799 } 855 }
800 856
801 static INLINE void Process2OperandsZeroExtends( 857 static INLINE void Process2OperandsZeroExtends(
802 enum OperandName *restricted_register, 858 enum OperandName *restricted_register,
803 uint32_t *instruction_info_collected, 859 uint32_t *instruction_info_collected,
804 uint8_t rex_prefix, 860 uint8_t rex_prefix,
805 uint32_t operand_states) { 861 uint32_t operand_states) {
806 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 862 *instruction_info_collected |=
807 * instruction, not with regular instruction. */ 863 CheckValidityOfRegularInstruction(*restricted_register);
808 if (*restricted_register == REG_RSP) { 864 /* Every instruction clears restricted register even if it is not modified. */
809 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
810 } else if (*restricted_register == REG_RBP) {
811 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
812 }
813 *restricted_register = NO_REG; 865 *restricted_register = NO_REG;
814 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || 866 if (CHECK_OPERAND_R15_MODIFIED(0) ||
815 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || 867 CHECK_OPERAND_R15_MODIFIED(1))
816 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
817 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
818 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
820 *instruction_info_collected |= R15_MODIFIED; 868 *instruction_info_collected |= R15_MODIFIED;
821 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 869 if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0) ||
822 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || 870 CHECK_OPERAND_BP_INVALID_MODIFICATION(1))
823 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 871 *instruction_info_collected |= R15_MODIFIED;
halyavin 2013/03/25 14:18:36 BP_MODIFIED
khim 2013/03/25 14:24:45 Done.
824 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { 872 if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0) ||
825 *instruction_info_collected |= BPL_MODIFIED; 873 CHECK_OPERAND_SP_INVALID_MODIFICATION(1))
826 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 874 *instruction_info_collected |= R15_MODIFIED;
halyavin 2013/03/25 14:18:36 SP_MODIFIED
khim 2013/03/25 14:24:45 Done.
827 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || 875 if (CHECK_OPERAND_RESTRICTED(0)) {
828 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 876 *restricted_register = GET_OPERAND_NAME(0);
829 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { 877 /*
830 *instruction_info_collected |= SPL_MODIFIED; 878 * If both operands are sandboxed, the second one doesn't count. We can't
831 /* Take 2 bits of operand type from operand_states as *restricted_register, 879 * ignore it completely though, since it can modify %rsp or %rbp which must
832 * make sure operand_states denotes a register (4th bit == 0). */ 880 * follow special rules. In this case NaCl forbids the instruction.
833 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { 881 */
834 *restricted_register = operand_states & 0x0f; 882 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED))
835 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
836 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 883 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
837 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) { 884 if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED))
838 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 885 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
839 } 886 } else if (CHECK_OPERAND_RESTRICTED(1)) {
840 /* Take 2 bits of operand type from operand_states as *restricted_register, 887 *restricted_register = GET_OPERAND_NAME(1);
841 * make sure operand_states denotes a register (12th bit == 0). */
842 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
843 *restricted_register = (operand_states & 0x0f00) >> 8;
844 } 888 }
845 } 889 }
846 890
847 /* 891 /*
848 * This function merges "dangerous" instruction with sandboxing instructions to 892 * This function merges "dangerous" instruction with sandboxing instructions to
849 * get a "superinstruction" and unmarks in-between jump targets. 893 * get a "superinstruction" and unmarks in-between jump targets.
850 */ 894 */
851 static INLINE void ExpandSuperinstructionBySandboxingBytes( 895 static INLINE void ExpandSuperinstructionBySandboxingBytes(
852 size_t sandbox_instructions_size, 896 size_t sandbox_instructions_size,
853 const uint8_t **instruction_begin, 897 const uint8_t **instruction_begin,
854 const uint8_t *data, 898 const uint8_t codeblock[],
855 bitmap_word *valid_targets) { 899 bitmap_word *valid_targets) {
856 *instruction_begin -= sandbox_instructions_size; 900 *instruction_begin -= sandbox_instructions_size;
857 /* 901 /*
858 * We need to unmark start of the "dangerous" instruction itself, too, but we 902 * We need to unmark start of the "dangerous" instruction itself, too, but we
859 * don't need to mark the beginning of the whole "superinstruction" - that's 903 * don't need to mark the beginning of the whole "superinstruction" - that's
860 * why we move start by one byte and don't change the length. 904 * why we move start by one byte and don't change the length.
861 */ 905 */
862 UnmarkValidJumpTargets((*instruction_begin + 1 - data), 906 UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock),
863 sandbox_instructions_size, 907 sandbox_instructions_size,
864 valid_targets); 908 valid_targets);
865 } 909 }
866 910
867 /* 911 /*
868 * Return TRUE if naclcall or nacljmp uses the same register in all three 912 * Return TRUE if naclcall or nacljmp uses the same register in all three
869 * instructions. 913 * instructions.
870 * 914 *
871 * This version is for the case where "add %src_register, %dst_register" with 915 * This version is for the case where "add %src_register, %dst_register" with
872 * dst in RM field and src in REG field of ModR/M byte is used. 916 * dst in RM field and src in REG field of ModR/M byte is used.
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
982 * 0: 83 eX e0 and $~0x1f,E86 1026 * 0: 83 eX e0 and $~0x1f,E86
983 * 3: 4? 01 fX add RBASE,R86 1027 * 3: 4? 01 fX add RBASE,R86
984 * 6: 4? ff eX jmpq *R86 1028 * 6: 4? ff eX jmpq *R86
985 * ^ ^ 1029 * ^ ^
986 * instruction_begin current_position 1030 * instruction_begin current_position
987 */ 1031 */
988 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( 1032 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
989 uint32_t *instruction_info_collected, 1033 uint32_t *instruction_info_collected,
990 const uint8_t **instruction_begin, 1034 const uint8_t **instruction_begin,
991 const uint8_t *current_position, 1035 const uint8_t *current_position,
992 const uint8_t *data, 1036 const uint8_t codeblock[],
993 bitmap_word *valid_targets) { 1037 bitmap_word *valid_targets) {
994 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) 1038 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
995 ExpandSuperinstructionBySandboxingBytes( 1039 ExpandSuperinstructionBySandboxingBytes(
996 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1040 3 /* and */ + 3 /* add */,
1041 instruction_begin,
1042 codeblock,
1043 valid_targets);
997 else 1044 else
998 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1045 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
999 } 1046 }
1000 1047
1001 /* 1048 /*
1002 * This function checks that naclcall or nacljmp are correct (that is: three 1049 * This function checks that naclcall or nacljmp are correct (that is: three
1003 * component instructions match) and if that is true then it merges call or jmp 1050 * component instructions match) and if that is true then it merges call or jmp
1004 * with a sandboxing to get a "superinstruction" and removes in-between jump 1051 * with a sandboxing to get a "superinstruction" and removes in-between jump
1005 * targets. If it's not true then it triggers "unrecognized instruction" error 1052 * targets. If it's not true then it triggers "unrecognized instruction" error
1006 * condition. 1053 * condition.
(...skipping 12 matching lines...) Expand all
1019 * 0: 83 eX e0 and $~0x1f,E86 1066 * 0: 83 eX e0 and $~0x1f,E86
1020 * 3: 4? 03 Xf add RBASE,R86 1067 * 3: 4? 03 Xf add RBASE,R86
1021 * 6: 4? ff eX jmpq *R86 1068 * 6: 4? ff eX jmpq *R86
1022 * ^ ^ 1069 * ^ ^
1023 * instruction_begin current_position 1070 * instruction_begin current_position
1024 */ 1071 */
1025 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( 1072 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
1026 uint32_t *instruction_info_collected, 1073 uint32_t *instruction_info_collected,
1027 const uint8_t **instruction_begin, 1074 const uint8_t **instruction_begin,
1028 const uint8_t *current_position, 1075 const uint8_t *current_position,
1029 const uint8_t *data, 1076 const uint8_t codeblock[],
1030 bitmap_word *valid_targets) { 1077 bitmap_word *valid_targets) {
1031 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) 1078 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1032 ExpandSuperinstructionBySandboxingBytes( 1079 ExpandSuperinstructionBySandboxingBytes(
1033 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1080 3 /* and */ + 3 /* add */,
1081 instruction_begin,
1082 codeblock,
1083 valid_targets);
1034 else 1084 else
1035 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1085 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1036 } 1086 }
1037 1087
1038 /* 1088 /*
1039 * This function checks that naclcall or nacljmp are correct (that is: three 1089 * This function checks that naclcall or nacljmp are correct (that is: three
1040 * component instructions match) and if that is true then it merges call or jmp 1090 * component instructions match) and if that is true then it merges call or jmp
1041 * with a sandboxing to get a "superinstruction" and removes in-between jump 1091 * with a sandboxing to get a "superinstruction" and removes in-between jump
1042 * targets. If it's not true then it triggers "unrecognized instruction" error 1092 * targets. If it's not true then it triggers "unrecognized instruction" error
1043 * condition. 1093 * condition.
(...skipping 18 matching lines...) Expand all
1062 * 0: 4? 83 eX e0 and $~0x1f,E64 1112 * 0: 4? 83 eX e0 and $~0x1f,E64
1063 * 4: 4? 01 fX add RBASE,R64 1113 * 4: 4? 01 fX add RBASE,R64
1064 * 7: 4? ff eX jmpq *R64 1114 * 7: 4? ff eX jmpq *R64
1065 * ^ ^ 1115 * ^ ^
1066 * instruction_begin current_position 1116 * instruction_begin current_position
1067 */ 1117 */
1068 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( 1118 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
1069 uint32_t *instruction_info_collected, 1119 uint32_t *instruction_info_collected,
1070 const uint8_t **instruction_begin, 1120 const uint8_t **instruction_begin,
1071 const uint8_t *current_position, 1121 const uint8_t *current_position,
1072 const uint8_t *data, 1122 const uint8_t codeblock[],
1073 bitmap_word *valid_targets) { 1123 bitmap_word *valid_targets) {
1074 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) 1124 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1075 ExpandSuperinstructionBySandboxingBytes( 1125 ExpandSuperinstructionBySandboxingBytes(
1076 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1126 4 /* and */ + 3 /* add */,
1127 instruction_begin,
1128 codeblock,
1129 valid_targets);
1077 else 1130 else
1078 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1131 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1079 } 1132 }
1080 1133
1081 /* 1134 /*
1082 * This function checks that naclcall or nacljmp are correct (that is: three 1135 * This function checks that naclcall or nacljmp are correct (that is: three
1083 * component instructions match) and if that is true then it merges call or jmp 1136 * component instructions match) and if that is true then it merges call or jmp
1084 * with a sandboxing to get a "superinstruction" and removes in-between jump 1137 * with a sandboxing to get a "superinstruction" and removes in-between jump
1085 * targets. If it's not true then it triggers "unrecognized instruction" error 1138 * targets. If it's not true then it triggers "unrecognized instruction" error
1086 * condition. 1139 * condition.
(...skipping 18 matching lines...) Expand all
1105 * 0: 4? 83 eX e0 and $~0x1f,E64 1158 * 0: 4? 83 eX e0 and $~0x1f,E64
1106 * 4: 4? 03 Xf add RBASE,R64 1159 * 4: 4? 03 Xf add RBASE,R64
1107 * 7: 4? ff eX jmpq *R64 1160 * 7: 4? ff eX jmpq *R64
1108 * ^ ^ 1161 * ^ ^
1109 * instruction_begin current_position 1162 * instruction_begin current_position
1110 */ 1163 */
1111 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( 1164 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
1112 uint32_t *instruction_info_collected, 1165 uint32_t *instruction_info_collected,
1113 const uint8_t **instruction_begin, 1166 const uint8_t **instruction_begin,
1114 const uint8_t *current_position, 1167 const uint8_t *current_position,
1115 const uint8_t *data, 1168 const uint8_t codeblock[],
1116 bitmap_word *valid_targets) { 1169 bitmap_word *valid_targets) {
1117 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) 1170 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1118 ExpandSuperinstructionBySandboxingBytes( 1171 ExpandSuperinstructionBySandboxingBytes(
1119 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1172 4 /* and */ + 3 /* add */,
1173 instruction_begin,
1174 codeblock,
1175 valid_targets);
1120 else 1176 else
1121 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1177 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1122 } 1178 }
1123 1179
1124 1180
1125 Bool ValidateChunkAMD64(const uint8_t *data, size_t size, 1181 Bool ValidateChunkAMD64(const uint8_t codeblock[],
1182 size_t size,
1126 uint32_t options, 1183 uint32_t options,
1127 const NaClCPUFeaturesX86 *cpu_features, 1184 const NaClCPUFeaturesX86 *cpu_features,
1128 ValidationCallbackFunc user_callback, 1185 ValidationCallbackFunc user_callback,
1129 void *callback_data) { 1186 void *callback_data) {
1130 bitmap_word valid_targets_small; 1187 bitmap_word valid_targets_small;
1131 bitmap_word jump_dests_small; 1188 bitmap_word jump_dests_small;
1132 bitmap_word *valid_targets; 1189 bitmap_word *valid_targets;
1133 bitmap_word *jump_dests; 1190 bitmap_word *jump_dests;
1134 const uint8_t *current_position; 1191 const uint8_t *current_position;
1135 const uint8_t *end_of_bundle; 1192 const uint8_t *end_of_bundle;
(...skipping 23 matching lines...) Expand all
1159 free(jump_dests); 1216 free(jump_dests);
1160 free(valid_targets); 1217 free(valid_targets);
1161 errno = ENOMEM; 1218 errno = ENOMEM;
1162 return FALSE; 1219 return FALSE;
1163 } 1220 }
1164 } 1221 }
1165 1222
1166 /* 1223 /*
1167 * This option is usually used in tests: we will process the whole chunk 1224 * This option is usually used in tests: we will process the whole chunk
1168 * in one pass. Usually each bundle is processed separately which means 1225 * in one pass. Usually each bundle is processed separately which means
1169 * instructions (and super-instructions) can not cross borders of the bundle. 1226 * instructions (and "superinstructions") can not cross borders of the bundle.
1170 */ 1227 */
1171 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) 1228 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1172 end_of_bundle = data + size; 1229 end_of_bundle = codeblock + size;
1173 else 1230 else
1174 end_of_bundle = data + kBundleSize; 1231 end_of_bundle = codeblock + kBundleSize;
1175 1232
1176 /* 1233 /*
1177 * Main loop. Here we process the data array bundle-after-bundle. 1234 * Main loop. Here we process the codeblock array bundle-after-bundle.
1178 * Ragel-produced DFA does all the checks with one exception: direct jumps. 1235 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1179 * It collects the two arrays: valid_targets and jump_dests which are used 1236 * It collects the two arrays: valid_targets and jump_dests which are used
1180 * to test direct jumps later. 1237 * to test direct jumps later.
1181 */ 1238 */
1182 for (current_position = data; 1239 for (current_position = codeblock;
1183 current_position < data + size; 1240 current_position < codeblock + size;
1184 current_position = end_of_bundle, 1241 current_position = end_of_bundle,
1185 end_of_bundle = current_position + kBundleSize) { 1242 end_of_bundle = current_position + kBundleSize) {
1186 /* Start of the instruction being processed. */ 1243 /* Start of the instruction being processed. */
1187 const uint8_t *instruction_begin = current_position; 1244 const uint8_t *instruction_begin = current_position;
1188 /* Only used locally in the end_of_instruction_cleanup action. */ 1245 /* Only used locally in the end_of_instruction_cleanup action. */
1189 const uint8_t *instruction_end; 1246 const uint8_t *instruction_end;
1190 int current_state; 1247 int current_state;
1191 uint32_t instruction_info_collected = 0; 1248 uint32_t instruction_info_collected = 0;
1192 /* Keeps one byte of information per operand in the current instruction: 1249 /*
1193 * 2 bits for register kinds, 1250 * Contains register number and type of register modification (see
1194 * 5 bits for register numbers (16 regs plus RIZ). */ 1251 * OperandKind enum) for each operand that is changed in the instruction.
1252 * Information about read-only and memory operands is not saved in 64-bit
1253 * mode.
1254 */
1195 uint32_t operand_states = 0; 1255 uint32_t operand_states = 0;
1196 enum OperandName base = NO_REG; 1256 enum OperandName base = NO_REG;
1197 enum OperandName index = NO_REG; 1257 enum OperandName index = NO_REG;
1198 enum OperandName restricted_register = 1258 enum OperandName restricted_register =
1199 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); 1259 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1200 uint8_t rex_prefix = FALSE; 1260 uint8_t rex_prefix = FALSE;
1201 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ 1261 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1202 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; 1262 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1203 uint8_t vex_prefix3 = 0x00; 1263 uint8_t vex_prefix3 = 0x00;
1204 1264
1265 /*
1266 * The "write init" statement causes Ragel to emit initialization code.
1267 * This should be executed once before the ragel machine is started.
1268 */
1205 %% write init; 1269 %% write init;
1270 /*
1271 * The "write exec" statement causes Ragel to emit the ragel machine's
1272 * execution code.
1273 */
1206 %% write exec; 1274 %% write exec;
1207 1275
1208 /* 1276 /*
1209 * Ragel DFA accepted the bundle, but we still need to make sure the last 1277 * Ragel DFA accepted the bundle, but we still need to make sure the last
1210 * instruction haven't left %rbp or %rsp in restricted state. 1278 * instruction haven't left %rbp or %rsp in restricted state.
1211 */ 1279 */
1212 if (restricted_register == REG_RBP) 1280 if (restricted_register == REG_RBP)
1213 result &= user_callback(end_of_bundle, end_of_bundle, 1281 result &= user_callback(end_of_bundle, end_of_bundle,
1214 RESTRICTED_RBP_UNPROCESSED | 1282 RESTRICTED_RBP_UNPROCESSED |
1215 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & 1283 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
1216 RESTRICTED_REGISTER_MASK), callback_data); 1284 RESTRICTED_REGISTER_MASK), callback_data);
1217 else if (restricted_register == REG_RSP) 1285 else if (restricted_register == REG_RSP)
1218 result &= user_callback(end_of_bundle, end_of_bundle, 1286 result &= user_callback(end_of_bundle, end_of_bundle,
1219 RESTRICTED_RSP_UNPROCESSED | 1287 RESTRICTED_RSP_UNPROCESSED |
1220 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & 1288 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
1221 RESTRICTED_REGISTER_MASK), callback_data); 1289 RESTRICTED_REGISTER_MASK), callback_data);
1222 } 1290 }
1223 1291
1224 /* 1292 /*
1225 * Check the direct jumps. All the targets from jump_dests must be in 1293 * Check the direct jumps. All the targets from jump_dests must be in
1226 * valid_targets. 1294 * valid_targets.
1227 */ 1295 */
1228 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, 1296 result &= ProcessInvalidJumpTargets(codeblock,
1229 user_callback, callback_data); 1297 size,
1298 valid_targets,
1299 jump_dests,
1300 user_callback,
1301 callback_data);
1230 1302
1231 /* We only use malloc for a large code sequences */ 1303 /* We only use malloc for a large code sequences */
1232 if (jump_dests != &jump_dests_small) free(jump_dests); 1304 if (jump_dests != &jump_dests_small) free(jump_dests);
1233 if (valid_targets != &valid_targets_small) free(valid_targets); 1305 if (valid_targets != &valid_targets_small) free(valid_targets);
1234 if (!result) errno = EINVAL; 1306 if (!result) errno = EINVAL;
1235 return result; 1307 return result;
1236 } 1308 }
OLDNEW
« no previous file with comments | « src/trusted/validator_ragel/validator_x86_32.rl ('k') | src/trusted/validator_x86/testdata/64/stack_regs.test » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698