Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(207)

Side by Side Diff: src/trusted/validator_ragel/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. 2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be 3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file. 4 * found in the LICENSE file.
5 */ 5 */
6 6
7 /* 7 /*
8 * This is the core of amd64-mode validator. Please note that this file 8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read 9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built: 10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like 11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc. 12 * "@{}" or "REX_WRX?" mean, etc.
13 */ 13 */
14 14
15 #include <assert.h> 15 #include <assert.h>
16 #include <errno.h> 16 #include <errno.h>
17 #include <stddef.h> 17 #include <stddef.h>
18 #include <stdio.h> 18 #include <stdio.h>
19 #include <stdlib.h> 19 #include <stdlib.h>
20 #include <string.h> 20 #include <string.h>
21 21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" 22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h" 23 #include "native_client/src/trusted/validator_ragel/validator_internal.h"
24 24
25 %%{ 25 %%{
26 machine x86_64_validator; 26 machine x86_64_validator;
27 alphtype unsigned char; 27 alphtype unsigned char;
28 variable p current_position; 28 variable p current_position;
29 variable pe end_of_bundle; 29 variable pe end_of_bundle;
30 variable eof end_of_bundle; 30 variable eof end_of_bundle;
31 variable cs current_state; 31 variable cs current_state;
32 32
33 include byte_machine "byte_machines.rl"; 33 include byte_machine "byte_machines.rl";
(...skipping 20 matching lines...) Expand all
54 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 54 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
55 include immediate_fields_parsing 55 include immediate_fields_parsing
56 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 56 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
57 include relative_fields_validator_actions 57 include relative_fields_validator_actions
58 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 58 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
59 include relative_fields_parsing 59 include relative_fields_parsing
60 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 60 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
61 include cpuid_actions 61 include cpuid_actions
62 "native_client/src/trusted/validator_ragel/parse_instruction.rl"; 62 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
63 63
64 action check_access { 64 action check_memory_access {
65 CheckAccess(instruction_begin - data, base, index, restricted_register, 65 CheckMemoryAccess(instruction_begin - codeblock,
66 valid_targets, &instruction_info_collected); 66 base,
67 } 67 index,
68 68 restricted_register,
69 # Action which marks last byte as not immediate. Most 3DNow! instructions, 69 valid_targets,
70 # some AVX and XOP instructions have this proerty. It's referenced by 70 &instruction_info_collected);
71 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
72 # file.
73 action last_byte_is_not_immediate {
74 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
75 } 71 }
76 72
77 action modifiable_instruction { 73 action modifiable_instruction {
78 instruction_info_collected |= MODIFIABLE_INSTRUCTION; 74 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
79 } 75 }
80 76
81 action process_0_operands { 77 action process_0_operands {
82 Process0Operands(&restricted_register, &instruction_info_collected); 78 Process0Operands(&restricted_register, &instruction_info_collected);
83 } 79 }
84 action process_1_operand { 80 action process_1_operand {
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). 123 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
128 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp 124 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
129 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp 125 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
130 # Note: restricted_register keeps the restricted register as explained in 126 # Note: restricted_register keeps the restricted register as explained in
131 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems 127 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
132 # 128 #
133 # "Normal" instructions can not be used in a place where %rbp is restricted. 129 # "Normal" instructions can not be used in a place where %rbp is restricted.
134 # But since these instructions are "second half" of the %rbp sandboxing they 130 # But since these instructions are "second half" of the %rbp sandboxing they
135 # can be used *only* when %rbp is restricted. 131 # can be used *only* when %rbp is restricted.
136 # 132 #
137 # That is (normal instruction): 133 # Compare:
138 # mov %eax,%ebp 134 # mov %eax,%ebp
139 # mov %esi,%edi <- Error: %ebp is restricted 135 # mov %esi,%edi <- Error: %ebp is restricted
140 # vs 136 # vs
141 # mov %esi,%edi 137 # mov %esi,%edi
142 # add %r15,%rbp <- Error: %ebp is *not* restricted 138 # add %r15,%rbp <- Error: %ebp is *not* restricted
143 # vs 139 # vs
144 # mov %eax,%ebp 140 # mov %eax,%ebp
145 # add %r15,%rbp <- Ok: %rbp is restricted as it should be 141 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
146 # 142 #
147 # Check this precondition and mark the beginning of the instruction as 143 # Check this precondition and mark the beginning of the instruction as
148 # invalid jump for target. 144 # invalid jump for target.
149 @{ if (restricted_register == REG_RBP) 145 @{ if (restricted_register == REG_RBP)
146 /* RESTRICTED_REGISTER_USED is informational flag used in tests. */
150 instruction_info_collected |= RESTRICTED_REGISTER_USED; 147 instruction_info_collected |= RESTRICTED_REGISTER_USED;
151 else 148 else
149 /* UNRESTRICTED_RSP_PROCESSED is error flag used in production. */
152 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; 150 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
153 restricted_register = NO_REG; 151 restricted_register = NO_REG;
154 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 152 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
155 }; 153 };
156 154
157 # Special %rsp modifications - the ones which don't need a sandboxing. 155 # Special %rsp modifications - the ones which don't need a sandboxing.
158 # 156 #
159 # Note that there are two different opcodes for "mov": in x86-64 there are two 157 # Note that there are two different opcodes for "mov": in x86-64 there are two
160 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move 158 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
161 # from REG field to RM or in the other direction thus there are two encodings 159 # from REG field to RM or in the other direction thus there are two encodings
162 # for the register-to-register move. 160 # for the register-to-register move.
163 rsp_modifications = 161 rsp_modifications =
164 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp 162 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 # mov %eax,%esp 200 # mov %eax,%esp
203 # add %r15,%rsp <- Ok: %rsp is restricted as it should be 201 # add %r15,%rsp <- Ok: %rsp is restricted as it should be
204 # 202 #
205 # Check this precondition and mark the beginning of the instruction as 203 # Check this precondition and mark the beginning of the instruction as
206 # invalid jump for target. 204 # invalid jump for target.
207 @{ if (restricted_register == REG_RSP) 205 @{ if (restricted_register == REG_RSP)
208 instruction_info_collected |= RESTRICTED_REGISTER_USED; 206 instruction_info_collected |= RESTRICTED_REGISTER_USED;
209 else 207 else
210 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; 208 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
211 restricted_register = NO_REG; 209 restricted_register = NO_REG;
212 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); 210 UnmarkValidJumpTarget((instruction_begin - codeblock), valid_targets);
213 }; 211 };
214 212
215 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. 213 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
216 # and $~0x1f, %eXX 214 # and $~0x1f, %eXX
217 # and RBASE, %rXX 215 # and RBASE, %rXX
218 # jmpq *%rXX (or: callq *%rXX) 216 # jmpq *%rXX (or: callq *%rXX)
219 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not 217 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
220 # just as part of the naclcall/nacljmp, but also as a standolene instruction). 218 # just as part of the naclcall/nacljmp, but also as a standalone instruction).
221 # 219 #
222 # This means that when naclcall_or_nacljmp ragel machine will be combined with 220 # This means that when naclcall_or_nacljmp ragel machine will be combined with
223 # "normal_instruction*" regular action process_1_operand_zero_extends will be 221 # "normal_instruction*" regular action process_1_operand_zero_extends will be
224 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 222 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
225 # instruction. This action will check if %rbp/%rsp is legally modified thus 223 # instruction. This action will check if %rbp/%rsp is legally modified thus
226 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. 224 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
227 # 225 #
228 # There are number of variants present which differ by the REX prefix usage: 226 # There are number of variants present which differ by the REX prefix usage:
229 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" 227 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
230 # or "callq" is the same register and it's much simpler to do if one single 228 # or "callq" is the same register and it's much simpler to do if one single
231 # action handles only fixed number of bytes. 229 # action handles only fixed number of bytes.
232 # 230 #
233 # Additional complication arises because x86-64 contains two different "add" 231 # Additional complication arises because x86-64 contains two different "add"
234 # instruction: with "0x01" and "0x03" opcode. They differ in the direction 232 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
235 # used: both can encode "add %src_register, %dst_register", but the first one 233 # used: both can encode "add %src_register, %dst_register", but the first one
236 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M 234 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
237 # byte for the dst while last one uses field RM of the ModR/M byte for the src 235 # byte for the dst while last one uses field RM of the ModR/M byte for the src
238 # and field REG of the ModR/M byte for dst. Both should be allowed. 236 # and field REG of the ModR/M byte for dst. Both should be allowed.
239 # 237 #
240 # See AMD/Intel manual for clarification "add" instruction encoding. 238 # See AMD/Intel manual for clarification about "add" instruction encoding.
241 # 239 #
242 # REGISTER USAGE ABBREVIATIONS: 240 # REGISTER USAGE ABBREVIATIONS:
243 # E86: legacy ia32 registers (all eight: %eax to %edi) 241 # E86: legacy ia32 registers (all eight: %eax to %edi)
244 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) 242 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
245 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) 243 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
246 # R64: new amd64 registers (only seven: %r8 to %r14) 244 # R64: new amd64 registers (only seven: %r8 to %r14)
247 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) 245 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
246 #
247 # Note that in the actions below instruction_begin points to the start of the
248 # "call" or "jmp" instruction and current_position points to its end.
248 naclcall_or_nacljmp = 249 naclcall_or_nacljmp =
249 # This block encodes call and jump "superinstruction" of the following form: 250 # This block encodes call and jump "superinstruction" of the following form:
250 # 0: 83 e_ e0 and $~0x1f,E86 251 # 0: 83 e_ e0 and $~0x1f,E86
251 # 3: 4_ 01 f_ add RBASE,R86 252 # 3: 4_ 01 f_ add RBASE,R86
252 # 6: ff e_ jmpq *R86 253 # 6: ff e_ jmpq *R86
253 #### INSTRUCTION ONE (three bytes) 254 #### INSTRUCTION ONE (three bytes)
254 # and $~0x1f, E86 255 # and $~0x1f, E86
255 (0x83 b_11_100_xxx 0xe0 256 (0x83 b_11_100_xxx 0xe0
256 #### INSTRUCTION TWO (three bytes) 257 #### INSTRUCTION TWO (three bytes)
257 # add RBASE, R86 (0x01 opcode) 258 # add RBASE, R86 (0x01 opcode)
258 b_0100_11x0 0x01 b_11_111_xxx 259 b_0100_11x0 0x01 b_11_111_xxx
259 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) 260 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
260 # callq R86 261 # callq R86
261 ((REX_WRX? 0xff b_11_010_xxx) | 262 ((REX_WRX? 0xff b_11_010_xxx) |
262 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) 263 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
263 # jmpq R86 264 # jmpq R86
264 (REX_WRX? 0xff b_11_100_xxx))) 265 (REX_WRX? 0xff b_11_100_xxx)))
265 @{ 266 @{
266 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, 267 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
267 &instruction_begin, current_position, 268 &instruction_begin,
268 data, valid_targets); 269 current_position,
270 codeblock,
271 valid_targets);
269 } | 272 } |
270 273
271 # This block encodes call and jump "superinstruction" of the following form: 274 # This block encodes call and jump "superinstruction" of the following form:
272 # 0: 83 e_ e0 and $~0x1f,E86 275 # 0: 83 e_ e0 and $~0x1f,E86
273 # 3: 4_ 03 _f add RBASE,R86 276 # 3: 4_ 03 _f add RBASE,R86
274 # 6: ff e_ jmpq *R86 277 # 6: ff e_ jmpq *R86
275 #### INSTRUCTION ONE (three bytes) 278 #### INSTRUCTION ONE (three bytes)
276 # and $~0x1f, E86 279 # and $~0x1f, E86
277 (0x83 b_11_100_xxx 0xe0 280 (0x83 b_11_100_xxx 0xe0
278 #### INSTRUCTION TWO (three bytes) 281 #### INSTRUCTION TWO (three bytes)
279 # add RBASE, R86 (0x03 opcode) 282 # add RBASE, R86 (0x03 opcode)
280 b_0100_10x1 0x03 b_11_xxx_111 283 b_0100_10x1 0x03 b_11_xxx_111
281 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) 284 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
282 # callq R86 285 # callq R86
283 ((REX_WRX? 0xff b_11_010_xxx) | 286 ((REX_WRX? 0xff b_11_010_xxx) |
284 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) 287 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
285 # jmpq R86 288 # jmpq R86
286 (REX_WRX? 0xff b_11_100_xxx))) 289 (REX_WRX? 0xff b_11_100_xxx)))
287 @{ 290 @{
288 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, 291 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
289 &instruction_begin, current_position, 292 &instruction_begin,
290 data, valid_targets); 293 current_position,
294 codeblock,
295 valid_targets);
291 } | 296 } |
292 297
293 # This block encodes call and jump "superinstruction" of the following form: 298 # This block encodes call and jump "superinstruction" of the following form:
294 # 0: 4_ 83 e_ e0 and $~0x1f,E86 299 # 0: 4_ 83 e_ e0 and $~0x1f,E86
295 # 4: 4_ 01 f_ add RBASE,R86 300 # 4: 4_ 01 f_ add RBASE,R86
296 # 7: ff e_ jmpq *R86 301 # 7: ff e_ jmpq *R86
297 #### INSTRUCTION ONE (four bytes) 302 #### INSTRUCTION ONE (four bytes)
298 # and $~0x1f, E86 303 # and $~0x1f, E86
299 ((REX_RX 0x83 b_11_100_xxx 0xe0 304 ((REX_RX 0x83 b_11_100_xxx 0xe0
300 #### INSTRUCTION TWO (three bytes) 305 #### INSTRUCTION TWO (three bytes)
(...skipping 17 matching lines...) Expand all
318 # add RBASE, R64 (0x01 opcode) 323 # add RBASE, R64 (0x01 opcode)
319 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) 324 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
320 #### INSTRUCTION THREE: call (three bytes) 325 #### INSTRUCTION THREE: call (three bytes)
321 # callq R64 326 # callq R64
322 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | 327 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
323 #### INSTRUCTION THREE: jmp (three bytes) 328 #### INSTRUCTION THREE: jmp (three bytes)
324 # jmpq R64 329 # jmpq R64
325 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) 330 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
326 @{ 331 @{
327 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, 332 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
328 &instruction_begin, current_position, 333 &instruction_begin,
329 data, valid_targets); 334 current_position,
335 codeblock,
336 valid_targets);
330 } | 337 } |
331 338
332 # This block encodes call and jump "superinstruction" of the following form: 339 # This block encodes call and jump "superinstruction" of the following form:
333 # 0: 4_ 83 e_ e0 and $~0x1f,E86 340 # 0: 4_ 83 e_ e0 and $~0x1f,E86
334 # 4: 4_ 03 _f add RBASE,R86 341 # 4: 4_ 03 _f add RBASE,R86
335 # 7: ff e_ jmpq *R86 342 # 7: ff e_ jmpq *R86
336 #### INSTRUCTION ONE (four bytes) 343 #### INSTRUCTION ONE (four bytes)
337 # and $~0x1f, E86 344 # and $~0x1f, E86
338 ((REX_RX 0x83 b_11_100_xxx 0xe0 345 ((REX_RX 0x83 b_11_100_xxx 0xe0
339 #### INSTRUCTION TWO (three bytes) 346 #### INSTRUCTION TWO (three bytes)
(...skipping 17 matching lines...) Expand all
357 # add RBASE, R64 (0x03 opcode) 364 # add RBASE, R64 (0x03 opcode)
358 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) 365 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
359 #### INSTRUCTION THREE: call (three bytes) 366 #### INSTRUCTION THREE: call (three bytes)
360 # callq R64 367 # callq R64
361 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | 368 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
362 #### INSTRUCTION THREE: jmp (three bytes) 369 #### INSTRUCTION THREE: jmp (three bytes)
363 # jmpq R64 370 # jmpq R64
364 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) 371 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
365 @{ 372 @{
366 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, 373 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
367 &instruction_begin, current_position, 374 &instruction_begin,
368 data, valid_targets); 375 current_position,
376 codeblock,
377 valid_targets);
369 }; 378 };
370 379
371 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand 380 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
372 381
373 # maskmovq %mmX,%mmY (EMMX or SSE) 382 # maskmovq %mmX,%mmY (EMMX or SSE)
374 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; 383 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
375 384
376 # maskmovdqu %xmmX, %xmmY (SSE2) 385 # maskmovdqu %xmmX, %xmmY (SSE2)
377 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; 386 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
378 387
379 # vmaskmovdqu %xmmX, %xmmY (AVX) 388 # vmaskmovdqu %xmmX, %xmmY (AVX)
380 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) | 389 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
381 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers; 390 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
382 391
383 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu; 392 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
384 393
385 # Temporary fix: for string instructions combination of data16 and rep(ne) 394 # Temporary fix: for string instructions combination of data16 and rep(ne)
386 # prefixes is disallowed to mimic old validator behavior. 395 # prefixes is disallowed to mimic old validator behavior.
387 # See http://code.google.com/p/nativeclient/issues/detail?id=1950 396 # See http://code.google.com/p/nativeclient/issues/detail?id=1950
388 397
389 # data16rep = (data16 | rep data16 | data16 rep); 398 # data16rep = (data16 | rep data16 | data16 rep);
390 # data16condrep = (data16 | condrep data16 | data16 condrep); 399 # data16condrep = (data16 | condrep data16 | data16 condrep);
391 data16rep = data16; 400 data16rep = data16;
392 data16condrep = data16; 401 data16condrep = data16;
393 402
394 # String instructions which use only %ds:(%rsi) 403 # String instructions which use only %ds:(%rsi)
395 string_instruction_rsi_no_rdi = 404 string_instruction_rsi_no_rdi =
396 (rep? 0xac | # lods %ds:(%rsi),%al 405 (rep? 0xac | # lods %ds:(%rsi),%al
397 data16rep 0xad | # lods %ds:(%rsi),%ax 406 data16rep 0xad | # lods %ds:(%rsi),%ax
398 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax 407 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
399 408
400 # String instructions which use only %ds:(%rdi) 409 # String instructions which use only %ds:(%rdi)
401 string_instruction_rdi_no_rsi = 410 string_instruction_rdi_no_rsi =
402 condrep? 0xae | # scas %es:(%rdi),%al 411 condrep? 0xae | # scas %es:(%rdi),%al
403 data16condrep 0xaf | # scas %es:(%rdi),%ax 412 data16condrep 0xaf | # scas %es:(%rdi),%ax
404 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax 413 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
405 414
406 rep? 0xaa | # stos %al,%es:(%rdi) 415 rep? 0xaa | # stos %al,%es:(%rdi)
407 data16rep 0xab | # stos %ax,%es:(%rdi) 416 data16rep 0xab | # stos %ax,%es:(%rdi)
408 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) 417 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
409 418
410 # String instructions which use both %ds:(%rsi) and %es:(%rdi) 419 # String instructions which use both %ds:(%rsi) and %es:(%rdi)
411 string_instruction_rsi_rdi = 420 string_instruction_rsi_rdi =
412 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi) 421 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
413 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi) 422 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
414 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi) 423 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
415 424
416 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi) 425 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
417 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi) 426 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
418 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi) 427 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
419 428
420 # "Superinstruction" which includes %rsi sandboxing. 429 # Sandboxing operations for %rsi. There are two versions: 6 bytes long and
421 # 430 # 7 bytes long (depending on presence of spurious REX prefix).
422 # There are two variants which handle spurious REX prefixes.
423 # 431 #
424 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64 432 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
425 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may 433 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
426 # be used to move from REG field to RM or in the other direction thus there 434 # be used to move from REG field to RM or in the other direction thus there
427 # are two encodings for the register-to-register move (and since REG and RM 435 # are two encodings for the register-to-register move (and since REG and RM
428 # are identical here only opcode differs). 436 # are identical here only opcode differs).
429 sandbox_instruction_rsi_no_rdi = 437 sandbox_rsi_6_bytes =
430 (0x89 | 0x8b) 0xf6 # mov %esi,%esi 438 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
431 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 439 0x49 0x8d 0x34 0x37; # lea (%r15,%rsi,1),%rsi
432 string_instruction_rsi_no_rdi 440 sandbox_rsi_7_bytes =
433 @{ 441 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
434 ExpandSuperinstructionBySandboxingBytes( 442 0x49 0x8d 0x34 0x37; # lea (%r15,%rsi,1),%rsi
435 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
436 } |
437 443
438 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi 444 # Sandboxing operations for %rdi. There are two versions: 6 bytes long and
439 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 445 # 7 bytes long (depending on presence of spurious REX prefix).
440 string_instruction_rsi_no_rdi
441 @{
442 ExpandSuperinstructionBySandboxingBytes(
443 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
444 };
445
446 # "Superinstruction" which includes %rdi sandboxing.
447 #
448 # There are two variants which handle spurious REX prefixes.
449 # 446 #
450 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 447 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
451 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may 448 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
452 # be used to move from REG field to RM or in the other direction thus there 449 # be used to move from REG field to RM or in the other direction thus there
453 # are two encodings for the register-to-register move (and since REG and RM 450 # are two encodings for the register-to-register move (and since REG and RM
454 # are identical here only opcode differs). 451 # are identical here only opcode differs).
452 sandbox_rdi_6_bytes =
453 (0x89 | 0x8b) 0xff # mov %edi,%edi
454 0x49 0x8d 0x3c 0x3f; # lea (%r15,%rdi,1),%rdi
455 sandbox_rdi_7_bytes =
456 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
457 0x49 0x8d 0x3c 0x3f; # lea (%r15,%rdi,1),%rdi
458
459 # "Superinstruction" which includes %rsi sandboxing.
460 #
461 # There are two variants which handle spurious REX prefixes.
462 sandbox_instruction_rsi_no_rdi =
463 sandbox_rsi_6_bytes
464 string_instruction_rsi_no_rdi
465 @{
466 ExpandSuperinstructionBySandboxingBytes(
467 6 /* sandbox_rsi_6_bytes */,
468 &instruction_begin,
469 codeblock,
470 valid_targets);
471 } |
472
473 sandbox_rsi_7_bytes
474 string_instruction_rsi_no_rdi
475 @{
476 ExpandSuperinstructionBySandboxingBytes(
477 7 /* sandbox_rsi_7_bytes */,
478 &instruction_begin,
479 codeblock,
480 valid_targets);
481 };
482
483 # "Superinstruction" which includes %rdi sandboxing.
484 #
485 # There are two variants which handle spurious REX prefixes.
455 sandbox_instruction_rdi_no_rsi = 486 sandbox_instruction_rdi_no_rsi =
456 (0x89 | 0x8b) 0xff # mov %edi,%edi 487 sandbox_rdi_6_bytes
457 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
458 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) 488 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
459 @{ 489 @{
460 ExpandSuperinstructionBySandboxingBytes( 490 ExpandSuperinstructionBySandboxingBytes(
461 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 491 6 /* sandbox_rdi_6_bytes */,
492 &instruction_begin,
493 codeblock,
494 valid_targets);
462 } | 495 } |
463 496
464 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi 497 sandbox_rdi_7_bytes
465 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
466 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) 498 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
467 @{ 499 @{
468 ExpandSuperinstructionBySandboxingBytes( 500 ExpandSuperinstructionBySandboxingBytes(
469 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); 501 7 /* sandbox_rdi_7_bytes */,
502 &instruction_begin,
503 codeblock,
504 valid_targets);
470 }; 505 };
471 506
472 507
473 # "Superinstruction" which includes both %rsi and %rdi sandboxing. 508 # "Superinstruction" which includes both %rsi and %rdi sandboxing.
474 # 509 #
475 # There are four variants which handle spurious REX prefixes. 510 # There are four variants which handle spurious REX prefixes.
476 #
477 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
478 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
479 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
480 # from REG field to RM or in the other direction thus there are two encodings
481 # for the register-to-register move (and since REG and RM are identical here
482 # only opcode differs).
483 sandbox_instruction_rsi_rdi = 511 sandbox_instruction_rsi_rdi =
484 (0x89 | 0x8b) 0xf6 # mov %esi,%esi 512 sandbox_rsi_6_bytes
485 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 513 sandbox_rdi_6_bytes
486 (0x89 | 0x8b) 0xff # mov %edi,%edi
487 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
488 string_instruction_rsi_rdi 514 string_instruction_rsi_rdi
489 @{ 515 @{
490 ExpandSuperinstructionBySandboxingBytes( 516 ExpandSuperinstructionBySandboxingBytes(
491 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, 517 6 /* sandbox_rsi_6_bytes */ + 6 /* sandbox_rdi_6_bytes */,
492 &instruction_begin, data, valid_targets); 518 &instruction_begin,
519 codeblock,
520 valid_targets);
493 } | 521 } |
494 522
495 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi 523 ((sandbox_rsi_6_bytes
496 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 524 sandbox_rdi_7_bytes) |
497 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
498 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
499 525
500 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi 526 (sandbox_rsi_7_bytes
501 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi 527 sandbox_rdi_6_bytes))
502 (0x89 | 0x8b) 0xff # mov %edi,%edi
503 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
504 string_instruction_rsi_rdi 528 string_instruction_rsi_rdi
505 @{ 529 @{
506 ExpandSuperinstructionBySandboxingBytes( 530 ExpandSuperinstructionBySandboxingBytes(
507 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ 531 6 /* sandbox_rsi_6_bytes */ + 7 /* sandbox_rdi_7_bytes */
508 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, 532 /* == 7 (* sandbox_rsi_6_bytes *) + 6 (* sandbox_rdi_6_bytes *) */,
509 &instruction_begin, data, valid_targets); 533 &instruction_begin,
534 codeblock,
535 valid_targets);
510 } | 536 } |
511 537
512 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi 538 sandbox_rsi_7_bytes
513 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi 539 sandbox_rdi_7_bytes
514 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
515 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
516 string_instruction_rsi_rdi 540 string_instruction_rsi_rdi
517 @{ 541 @{
518 ExpandSuperinstructionBySandboxingBytes( 542 ExpandSuperinstructionBySandboxingBytes(
519 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, 543 7 /* sandbox_rsi_7_bytes */ + 7 /* sandbox_rdi_7_bytes */,
520 &instruction_begin, data, valid_targets); 544 &instruction_begin,
545 codeblock,
546 valid_targets);
521 }; 547 };
522 548
523 # All the "special" instructions (== instructions which obey non-standard 549 # All the "special" instructions (== instructions which obey non-standard
524 # rules). Three groups: 550 # rules). Three groups:
525 # * %rsp/%rsp related instructions (these instructions are special because 551 # * %rsp/%rsp related instructions (these registers and operations which
526 # they must be in the range %r15...%r15+4294967295 except momentarily they 552 # operate on them are special because registers must be in the range
527 # can be in the range 0...4294967295) 553 # %r15...%r15+4294967295 except momentarily they can be in the range
554 # 0...4294967295, but then the very next instruction MUST restore the
555 # status quo).
528 # * string instructions (which can not use %r15 as base and thus need special 556 # * string instructions (which can not use %r15 as base and thus need special
529 # handling both in compiler and validator) 557 # handling both in compiler and validator)
530 # * naclcall/nacljmp (indirect jumps need special care) 558 # * naclcall/nacljmp (indirect jumps need special care)
531 special_instruction = 559 special_instruction =
532 (rbp_modifications | 560 (rbp_modifications |
533 rsp_modifications | 561 rsp_modifications |
534 rbp_sandboxing | 562 rbp_sandboxing |
535 rsp_sandboxing | 563 rsp_sandboxing |
536 sandbox_instruction_rsi_no_rdi | 564 sandbox_instruction_rsi_no_rdi |
537 sandbox_instruction_rdi_no_rsi | 565 sandbox_instruction_rdi_no_rsi |
538 sandbox_instruction_rsi_rdi | 566 sandbox_instruction_rsi_rdi |
539 naclcall_or_nacljmp) 567 naclcall_or_nacljmp)
540 # Mark the instruction as special - currently this information is used only 568 # Mark the instruction as special - currently this information is used only
541 # in tests, but in the future we may use it for dynamic code modification 569 # in tests, but in the future we may use it for dynamic code modification
542 # support. 570 # support.
543 @{ 571 @{
544 instruction_info_collected |= SPECIAL_INSTRUCTION; 572 instruction_info_collected |= SPECIAL_INSTRUCTION;
545 }; 573 };
546 574
547 # Remove special instructions which are only allowed in special cases. 575 # Remove special instructions which are only allowed in special cases.
548 normal_instruction = one_instruction - special_instruction; 576 normal_instruction = one_instruction - special_instruction;
549 577
550 # Check if call is properly aligned. 578 # For direct call we explicitly encode all variations.
551 # 579 direct_call = (data16 REX_RXB? 0xe8 rel16) |
552 # For direct call we explicitly encode all variations. For indirect call 580 (REX_WRXB? 0xe8 rel32) |
553 # we accept all the special instructions which ends with register-addressed 581 (data16 REXW_RXB 0xe8 rel32);
554 # indirect call. 582
583 # For indirect call we accept only near register-addressed indirect call.
584 indirect_call_register = data16? REX_WRXB? 0xff (opcode_2 & modrm_registers);
585
586 # Ragel machine that accepts one call instruction or call superinstruction and
587 # checks if call is properly aligned.
555 call_alignment = 588 call_alignment =
556 ((normal_instruction & 589 ((normal_instruction & direct_call) |
557 # Direct call 590 # For indirect calls we accept all the special instructions which ends with
558 ((data16 REX_RXB? 0xe8 rel16) | 591 # register-addressed indirect call.
559 (REX_WRXB? 0xe8 rel32) | 592 (special_instruction & (any* indirect_call_register)))
560 (data16 REXW_RXB 0xe8 rel32))) |
561 (special_instruction &
562 # Indirect call
563 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
564 modrm_registers))))
565 # Call instruction must aligned to the end of bundle. Previously this was 593 # Call instruction must aligned to the end of bundle. Previously this was
566 # strict requirement, today it's just warning to aid with debugging. 594 # strict requirement, today it's just warning to aid with debugging.
567 @{ 595 @{
568 if (((current_position - data) & kBundleMask) != kBundleMask) 596 if (((current_position - codeblock) & kBundleMask) != kBundleMask)
569 instruction_info_collected |= BAD_CALL_ALIGNMENT; 597 instruction_info_collected |= BAD_CALL_ALIGNMENT;
570 }; 598 };
571 599
572 # This action calls user's callback (if needed) and cleans up validator's 600 # This action calls users callback (if needed) and cleans up validator
573 # internal state. 601 # internal state.
574 # 602 #
575 # We call the user callback if there are validation errors or if the 603 # We call the user callback either on validation errors or on every
576 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. 604 # instruction, depending on CALL_USER_CALLBACK_ON_EACH_INSTRUTION option.
577 # 605 #
578 # After that we move instruction_begin and clean all the variables which 606 # After that we move instruction_begin and clean all the variables which
579 # only used in the processing of a single instruction (prefixes, operand 607 # are only used in the processing of a single instruction (prefixes, operand
580 # states and instruction_info_collected). 608 # states and instruction_info_collected).
581 action end_of_instruction_cleanup { 609 action end_of_instruction_cleanup {
582 /* Call user-supplied callback. */ 610 /* Call user-supplied callback. */
583 instruction_end = current_position + 1; 611 instruction_end = current_position + 1;
584 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || 612 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
585 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { 613 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
586 result &= user_callback( 614 result &= user_callback(
587 instruction_begin, instruction_end, 615 instruction_begin, instruction_end,
588 instruction_info_collected | 616 instruction_info_collected |
589 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & 617 ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
590 RESTRICTED_REGISTER_MASK), callback_data); 618 RESTRICTED_REGISTER_MASK), callback_data);
591 } 619 }
592 620
593 /* On successful match the instruction_begin must point to the next byte 621 /* On successful match the instruction_begin must point to the next byte
594 * to be able to report the new offset as the start of instruction 622 * to be able to report the new offset as the start of instruction
595 * causing error. */ 623 * causing error. */
596 instruction_begin = instruction_end; 624 instruction_begin = instruction_end;
597 625
598 /* Mark start of the next instruction as a valid target for jump. 626 /*
599 * Note: we mark start of the next instruction here, not start of the 627 * We may set instruction_begin at the first byte of the instruction instead
600 * current one because memory access check should be able to clear this 628 * of here but in the case of incorrect one byte instructions user callback
601 * bit when restricted register is used. */ 629 * may be called before instruction_begin is set.
602 MarkValidJumpTarget(instruction_begin - data, valid_targets); 630 */
631 MarkValidJumpTarget(instruction_begin - codeblock, valid_targets);
603 632
604 /* Clear variables. */ 633 /* Clear variables. */
605 instruction_info_collected = 0; 634 instruction_info_collected = 0;
606 SET_REX_PREFIX(FALSE); 635 SET_REX_PREFIX(FALSE);
607 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ 636 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
608 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); 637 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
609 SET_VEX_PREFIX3(0x00); 638 SET_VEX_PREFIX3(0x00);
610 operand_states = 0; 639 operand_states = 0;
611 base = 0; 640 base = 0;
612 index = 0; 641 index = 0;
613 } 642 }
614 643
615 # This action reports fatal error detected by DFA. 644 # This action reports fatal error detected by DFA.
616 action report_fatal_error { 645 action report_fatal_error {
617 result &= user_callback(instruction_begin, current_position, 646 result &= user_callback(instruction_begin, current_position,
618 UNRECOGNIZED_INSTRUCTION, callback_data); 647 UNRECOGNIZED_INSTRUCTION, callback_data);
619 /* 648 /*
620 * Process the next bundle: "continue" here is for the "for" cycle in 649 * Process the next bundle: "continue" here is for the "for" cycle in
621 * the ValidateChunkAMD64 function. 650 * the ValidateChunkAMD64 function.
622 * 651 *
623 * It does not affect the case which we really care about (when code 652 * It does not affect the case which we really care about (when code
624 * is validatable), but makes it possible to detect more errors in one 653 * is validatable), but makes it possible to detect more errors in one
625 * run in tools like ncval. 654 * run in tools like ncval.
626 */ 655 */
627 continue; 656 continue;
628 } 657 }
629 658
630 # This is main ragel machine: it does 99% of validation work. There are only 659 # This is main ragel machine: it does 99% of validation work. There are only
631 # one thing to do with bundle if this machine accepts the bundle: 660 # one thing to do with bundle if this ragel machine accepts the bundle:
632 # * check for the state of the restricted_register at the end of the bundle. 661 # * check for the state of the restricted_register at the end of the bundle.
633 # It's an error is %rbp or %rsp is restricted at the end of the bundle. 662 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
634 # Additionally if all the bundles are fine you need to check that direct jumps 663 # Additionally if all the bundles are fine you need to check that direct jumps
635 # are corect. Thiis is done in the following way: 664 # are corect. Thiis is done in the following way:
636 # * DFA fills two arrays: valid_targets and jump_dests. 665 # * DFA fills two arrays: valid_targets and jump_dests.
637 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". 666 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
638 # All other checks are done here. 667 # All other checks are done here.
639 668
640 main := ((call_alignment | normal_instruction | special_instruction) 669 main := ((call_alignment | normal_instruction | special_instruction)
641 @end_of_instruction_cleanup)* 670 @end_of_instruction_cleanup)*
642 $!report_fatal_error; 671 $!report_fatal_error;
643 672
644 }%% 673 }%%
645 674
675 /*
676 * The "write data" statement causes Ragel to emit the constant static data
677 * needed by the ragel machine.
678 */
646 %% write data; 679 %% write data;
647 680
681 /*
682 * Operand's kind WRT sandboxing effect: no effect, can be used for sandboxing
683 * and will make register invalid if used.
684 *
685 * No effect is the "initial state", 32bit stores can be used for sandboxing (in
686 * that case he high 32-bit bits of the corresponding 64-bit register are set to
687 * zero) and we do not distinguish modifications of 16bit and 64bit registers to
688 * match the behavior of the old validator.
689 *
690 * 8bit operands must be distinguished from other types because the REX prefix
691 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
692 */
648 enum OperandKind { 693 enum OperandKind {
649 OPERAND_SANDBOX_IRRELEVANT = 0, 694 OPERAND_SANDBOX_IRRELEVANT = 0,
695 /* 8bit register that is modified by instruction. */
696 OPERAND_SANDBOX_8BIT,
650 /* 697 /*
651 * Currently we do not distinguish 8bit and 16bit modifications from 698 * 32-bit register that is modified by instruction. The high 32-bit bits of
652 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. 699 * the corresponding 64-bit register are set to zero.
653 *
654 * 8bit operands must be distinguished from other types because the REX prefix
655 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
656 */ 700 */
657 OPERAND_SANDBOX_8BIT,
658 OPERAND_SANDBOX_RESTRICTED, 701 OPERAND_SANDBOX_RESTRICTED,
702 /* 64-bit or 16-bit register that is modified by instruction. */
659 OPERAND_SANDBOX_UNRESTRICTED 703 OPERAND_SANDBOX_UNRESTRICTED
660 }; 704 };
661 705
662 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) 706 /*
663 #define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N) 707 * operand_states variable keeps one byte of information per operand in the
664 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \ 708 * current instruction:
665 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) 709 * * the first 5 bits (least significant ones) are for register numbers (16
666 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \ 710 posible registers regs plus RIZ),
667 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) 711 * * the next 2 bits for register kinds.
668 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \ 712 *
669 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) 713 * Macroses below are used to access this data.
670 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \ 714 */
671 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) 715 #define SET_OPERAND_NAME(INDEX, REGISTER_NAME) \
672 #define CHECK_OPERAND(N, S, T) \ 716 operand_states |= ((REGISTER_NAME) << ((INDEX) << 3))
673 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) 717 #define SET_OPERAND_FORMAT(INDEX, FORMAT) \
718 SET_OPERAND_FORMAT_ ## FORMAT(INDEX)
719 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(INDEX) \
720 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((INDEX) << 3))
721 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(INDEX) \
722 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3))
723 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(INDEX) \
724 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3))
725 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(INDEX) \
726 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((INDEX) << 3))
727 #define CHECK_OPERAND(INDEX, REGISTER_NAME, KIND) \
728 ((operand_states & (0xff << ((INDEX) << 3))) == \
729 ((((KIND) << 5) | (REGISTER_NAME)) << ((INDEX) << 3)))
730 #define CHECK_OPERAND_R15_MODIFIED(INDEX) \
731 (CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_8BIT) || \
732 CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_RESTRICTED) || \
733 CHECK_OPERAND((INDEX), REG_R15, OPERAND_SANDBOX_UNRESTRICTED))
734 /*
735 * Note that macroses below access operand_states variable and also rex_prefix
736 * variable. This is to distinguish %ah from %spl, as well as %ch from %bpl.
737 */
738 #define CHECK_OPERAND_BP_MODIFIED(INDEX) \
739 ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
740 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_RESTRICTED) || \
741 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED))
742 #define CHECK_OPERAND_SP_MODIFIED(INDEX) \
743 ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
744 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_RESTRICTED) || \
745 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) \
746 /*
747 * This is for Process?OperandsZeroExtends functions: in this case %esp or %ebp
748 * can be written to, but %spl/%sp/%rsp or %bpl/%bp/%rbp can not be modified.
749 */
750 #define CHECK_OPERAND_BP_INVALID_MODIFICATION(INDEX) \
751 ((CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
752 CHECK_OPERAND((INDEX), REG_RBP, OPERAND_SANDBOX_UNRESTRICTED))
753 #define CHECK_OPERAND_SP_INVALID_MODIFICATION(INDEX) \
754 ((CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || \
755 CHECK_OPERAND((INDEX), REG_RSP, OPERAND_SANDBOX_UNRESTRICTED))
756 #define CHECK_OPERAND_RESTRICTED(INDEX) \
757 /* Take 2 bits of operand type from operand_states as *restricted_register */\
758 /* and also make sure operand_states denotes a register (4th bit == 0). */\
759 (operand_states & (0x70 << ((INDEX) << 3))) == \
760 (OPERAND_SANDBOX_RESTRICTED << (5 + ((INDEX) << 3)))
761 #define GET_OPERAND_NAME(INDEX) ((operand_states >> ((INDEX) << 3)) & 0x1f)
674 762
675 static INLINE void CheckAccess(ptrdiff_t instruction_begin, 763 static INLINE void CheckMemoryAccess(ptrdiff_t instruction_begin,
676 enum OperandName base, 764 enum OperandName base,
677 enum OperandName index, 765 enum OperandName index,
678 uint8_t restricted_register, 766 uint8_t restricted_register,
679 bitmap_word *valid_targets, 767 bitmap_word *valid_targets,
680 uint32_t *instruction_info_collected) { 768 uint32_t *instruction_info_collected) {
681 if ((base == REG_RIP) || (base == REG_R15) || 769 if ((base == REG_RIP) || (base == REG_R15) ||
682 (base == REG_RSP) || (base == REG_RBP)) { 770 (base == REG_RSP) || (base == REG_RBP)) {
683 if ((index == NO_REG) || (index == REG_RIZ)) 771 if ((index == NO_REG) || (index == REG_RIZ))
684 { /* do nothing. */ } 772 { /* do nothing. */ }
685 else if (index == restricted_register) 773 else if (index == restricted_register)
686 BitmapClearBit(valid_targets, instruction_begin), 774 BitmapClearBit(valid_targets, instruction_begin),
687 *instruction_info_collected |= RESTRICTED_REGISTER_USED; 775 *instruction_info_collected |= RESTRICTED_REGISTER_USED;
688 else 776 else
689 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; 777 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
690 } else { 778 } else {
691 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; 779 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
692 } 780 }
693 } 781 }
694 782
783 static FORCEINLINE uint32_t CheckValidityOfRegularInstruction(
784 enum OperandName restricted_register) {
785 /*
786 * Restricted %rsp or %rbp must be %rsp or %rbp must be restored from
787 * zero-extension state by appropriate "special" instruction, not with
788 * regular instruction.
789 */
790 if (restricted_register == REG_RBP)
791 return RESTRICTED_RBP_UNPROCESSED;
792 if (restricted_register == REG_RSP)
793 return RESTRICTED_RSP_UNPROCESSED;
794 return 0;
795 }
695 796
696 static INLINE void Process0Operands(enum OperandName *restricted_register, 797 static INLINE void Process0Operands(enum OperandName *restricted_register,
697 uint32_t *instruction_info_collected) { 798 uint32_t *instruction_info_collected) {
698 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 799 *instruction_info_collected |=
699 * instruction, not with regular instruction. */ 800 CheckValidityOfRegularInstruction(*restricted_register);
700 if (*restricted_register == REG_RSP) { 801 /* Every instruction clears restricted register even if it is not modified. */
701 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
702 } else if (*restricted_register == REG_RBP) {
703 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
704 }
705 *restricted_register = NO_REG; 802 *restricted_register = NO_REG;
706 } 803 }
707 804
708 static INLINE void Process1Operand(enum OperandName *restricted_register, 805 static INLINE void Process1Operand(enum OperandName *restricted_register,
709 uint32_t *instruction_info_collected, 806 uint32_t *instruction_info_collected,
710 uint8_t rex_prefix, 807 uint8_t rex_prefix,
711 uint32_t operand_states) { 808 uint32_t operand_states) {
712 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 809 *instruction_info_collected |=
713 * instruction, not with regular instruction. */ 810 CheckValidityOfRegularInstruction(*restricted_register);
714 if (*restricted_register == REG_RSP) { 811 if (CHECK_OPERAND_R15_MODIFIED(0))
715 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 812 *instruction_info_collected |= R15_MODIFIED;
716 } else if (*restricted_register == REG_RBP) { 813 if (CHECK_OPERAND_BP_MODIFIED(0))
717 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 814 *instruction_info_collected |= BP_MODIFIED;
718 } 815 if (CHECK_OPERAND_SP_MODIFIED(0))
816 *instruction_info_collected |= SP_MODIFIED;
817 /* Every instruction clears restricted register even if it is not modified. */
719 *restricted_register = NO_REG; 818 *restricted_register = NO_REG;
720 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
721 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
722 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
723 *instruction_info_collected |= R15_MODIFIED;
724 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
725 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
726 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
727 *instruction_info_collected |= BPL_MODIFIED;
728 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
729 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
730 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
731 *instruction_info_collected |= SPL_MODIFIED;
732 }
733 } 819 }
734 820
735 static INLINE void Process1OperandZeroExtends( 821 static INLINE void Process1OperandZeroExtends(
736 enum OperandName *restricted_register, 822 enum OperandName *restricted_register,
737 uint32_t *instruction_info_collected, 823 uint32_t *instruction_info_collected,
738 uint8_t rex_prefix, 824 uint8_t rex_prefix,
739 uint32_t operand_states) { 825 uint32_t operand_states) {
740 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 826 *instruction_info_collected |=
741 * instruction, not with regular instruction. */ 827 CheckValidityOfRegularInstruction(*restricted_register);
742 if (*restricted_register == REG_RSP) { 828 /* Every instruction clears restricted register even if it is not modified. */
743 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
744 } else if (*restricted_register == REG_RBP) {
745 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
746 }
747 *restricted_register = NO_REG; 829 *restricted_register = NO_REG;
748 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || 830 if (CHECK_OPERAND_R15_MODIFIED(0))
749 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
750 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
751 *instruction_info_collected |= R15_MODIFIED; 831 *instruction_info_collected |= R15_MODIFIED;
752 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 832 if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0))
753 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { 833 *instruction_info_collected |= BP_MODIFIED;
754 *instruction_info_collected |= BPL_MODIFIED; 834 if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0))
755 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 835 *instruction_info_collected |= SP_MODIFIED;
756 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { 836 if (CHECK_OPERAND_RESTRICTED(0))
757 *instruction_info_collected |= SPL_MODIFIED; 837 *restricted_register = GET_OPERAND_NAME(0);
758 /* Take 2 bits of operand type from operand_states as *restricted_register,
759 * make sure operand_states denotes a register (4th bit == 0). */
760 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
761 *restricted_register = operand_states & 0x0f;
762 }
763 } 838 }
764 839
765 static INLINE void Process2Operands(enum OperandName *restricted_register, 840 static INLINE void Process2Operands(enum OperandName *restricted_register,
766 uint32_t *instruction_info_collected, 841 uint32_t *instruction_info_collected,
767 uint8_t rex_prefix, 842 uint8_t rex_prefix,
768 uint32_t operand_states) { 843 uint32_t operand_states) {
769 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 844 *instruction_info_collected |=
770 * instruction, not with regular instruction. */ 845 CheckValidityOfRegularInstruction(*restricted_register);
771 if (*restricted_register == REG_RSP) { 846 if (CHECK_OPERAND_R15_MODIFIED(0) || CHECK_OPERAND_R15_MODIFIED(1))
772 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 847 *instruction_info_collected |= R15_MODIFIED;
773 } else if (*restricted_register == REG_RBP) { 848 if (CHECK_OPERAND_BP_MODIFIED(0) || CHECK_OPERAND_BP_MODIFIED(1))
774 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 849 *instruction_info_collected |= BP_MODIFIED;
775 } 850 if (CHECK_OPERAND_SP_MODIFIED(0) || CHECK_OPERAND_SP_MODIFIED(1))
851 *instruction_info_collected |= SP_MODIFIED;
852 /* Every instruction clears restricted register even if it is not modified. */
776 *restricted_register = NO_REG; 853 *restricted_register = NO_REG;
777 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
778 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
779 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
780 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
781 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
783 *instruction_info_collected |= R15_MODIFIED;
784 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
785 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
786 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
787 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
788 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
789 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
790 *instruction_info_collected |= BPL_MODIFIED;
791 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
792 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
793 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
794 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
795 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
796 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
797 *instruction_info_collected |= SPL_MODIFIED;
798 }
799 } 854 }
800 855
801 static INLINE void Process2OperandsZeroExtends( 856 static INLINE void Process2OperandsZeroExtends(
802 enum OperandName *restricted_register, 857 enum OperandName *restricted_register,
803 uint32_t *instruction_info_collected, 858 uint32_t *instruction_info_collected,
804 uint8_t rex_prefix, 859 uint8_t rex_prefix,
805 uint32_t operand_states) { 860 uint32_t operand_states) {
806 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special 861 *instruction_info_collected |=
807 * instruction, not with regular instruction. */ 862 CheckValidityOfRegularInstruction(*restricted_register);
808 if (*restricted_register == REG_RSP) { 863 /* Every instruction clears restricted register even if it is not modified. */
809 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
810 } else if (*restricted_register == REG_RBP) {
811 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
812 }
813 *restricted_register = NO_REG; 864 *restricted_register = NO_REG;
814 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || 865 if (CHECK_OPERAND_R15_MODIFIED(0) ||
815 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || 866 CHECK_OPERAND_R15_MODIFIED(1))
816 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
817 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
818 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
820 *instruction_info_collected |= R15_MODIFIED; 867 *instruction_info_collected |= R15_MODIFIED;
821 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 868 if (CHECK_OPERAND_BP_INVALID_MODIFICATION(0) ||
822 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || 869 CHECK_OPERAND_BP_INVALID_MODIFICATION(1))
823 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 870 *instruction_info_collected |= BP_MODIFIED;
824 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { 871 if (CHECK_OPERAND_SP_INVALID_MODIFICATION(0) ||
825 *instruction_info_collected |= BPL_MODIFIED; 872 CHECK_OPERAND_SP_INVALID_MODIFICATION(1))
826 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 873 *instruction_info_collected |= SP_MODIFIED;
827 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || 874 if (CHECK_OPERAND_RESTRICTED(0)) {
828 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || 875 *restricted_register = GET_OPERAND_NAME(0);
829 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { 876 /*
830 *instruction_info_collected |= SPL_MODIFIED; 877 * If both operands are sandboxed, the second one doesn't count. We can't
831 /* Take 2 bits of operand type from operand_states as *restricted_register, 878 * ignore it completely though, since it can modify %rsp or %rbp which must
832 * make sure operand_states denotes a register (4th bit == 0). */ 879 * follow special rules. In this case NaCl forbids the instruction.
833 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { 880 */
834 *restricted_register = operand_states & 0x0f; 881 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED))
835 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
836 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; 882 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
837 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) { 883 if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED))
838 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; 884 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
839 } 885 } else if (CHECK_OPERAND_RESTRICTED(1)) {
840 /* Take 2 bits of operand type from operand_states as *restricted_register, 886 *restricted_register = GET_OPERAND_NAME(1);
841 * make sure operand_states denotes a register (12th bit == 0). */
842 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
843 *restricted_register = (operand_states & 0x0f00) >> 8;
844 } 887 }
845 } 888 }
846 889
847 /* 890 /*
848 * This function merges "dangerous" instruction with sandboxing instructions to 891 * This function merges "dangerous" instruction with sandboxing instructions to
849 * get a "superinstruction" and unmarks in-between jump targets. 892 * get a "superinstruction" and unmarks in-between jump targets.
850 */ 893 */
851 static INLINE void ExpandSuperinstructionBySandboxingBytes( 894 static INLINE void ExpandSuperinstructionBySandboxingBytes(
852 size_t sandbox_instructions_size, 895 size_t sandbox_instructions_size,
853 const uint8_t **instruction_begin, 896 const uint8_t **instruction_begin,
854 const uint8_t *data, 897 const uint8_t codeblock[],
855 bitmap_word *valid_targets) { 898 bitmap_word *valid_targets) {
856 *instruction_begin -= sandbox_instructions_size; 899 *instruction_begin -= sandbox_instructions_size;
857 /* 900 /*
858 * We need to unmark start of the "dangerous" instruction itself, too, but we 901 * We need to unmark start of the "dangerous" instruction itself, too, but we
859 * don't need to mark the beginning of the whole "superinstruction" - that's 902 * don't need to mark the beginning of the whole "superinstruction" - that's
860 * why we move start by one byte and don't change the length. 903 * why we move start by one byte and don't change the length.
861 */ 904 */
862 UnmarkValidJumpTargets((*instruction_begin + 1 - data), 905 UnmarkValidJumpTargets((*instruction_begin + 1 - codeblock),
863 sandbox_instructions_size, 906 sandbox_instructions_size,
864 valid_targets); 907 valid_targets);
865 } 908 }
866 909
867 /* 910 /*
868 * Return TRUE if naclcall or nacljmp uses the same register in all three 911 * Return TRUE if naclcall or nacljmp uses the same register in all three
869 * instructions. 912 * instructions.
870 * 913 *
871 * This version is for the case where "add %src_register, %dst_register" with 914 * This version is for the case where "add %src_register, %dst_register" with
872 * dst in RM field and src in REG field of ModR/M byte is used. 915 * dst in RM field and src in REG field of ModR/M byte is used.
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
982 * 0: 83 eX e0 and $~0x1f,E86 1025 * 0: 83 eX e0 and $~0x1f,E86
983 * 3: 4? 01 fX add RBASE,R86 1026 * 3: 4? 01 fX add RBASE,R86
984 * 6: 4? ff eX jmpq *R86 1027 * 6: 4? ff eX jmpq *R86
985 * ^ ^ 1028 * ^ ^
986 * instruction_begin current_position 1029 * instruction_begin current_position
987 */ 1030 */
988 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( 1031 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
989 uint32_t *instruction_info_collected, 1032 uint32_t *instruction_info_collected,
990 const uint8_t **instruction_begin, 1033 const uint8_t **instruction_begin,
991 const uint8_t *current_position, 1034 const uint8_t *current_position,
992 const uint8_t *data, 1035 const uint8_t codeblock[],
993 bitmap_word *valid_targets) { 1036 bitmap_word *valid_targets) {
994 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) 1037 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
995 ExpandSuperinstructionBySandboxingBytes( 1038 ExpandSuperinstructionBySandboxingBytes(
996 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1039 3 /* and */ + 3 /* add */,
1040 instruction_begin,
1041 codeblock,
1042 valid_targets);
997 else 1043 else
998 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1044 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
999 } 1045 }
1000 1046
1001 /* 1047 /*
1002 * This function checks that naclcall or nacljmp are correct (that is: three 1048 * This function checks that naclcall or nacljmp are correct (that is: three
1003 * component instructions match) and if that is true then it merges call or jmp 1049 * component instructions match) and if that is true then it merges call or jmp
1004 * with a sandboxing to get a "superinstruction" and removes in-between jump 1050 * with a sandboxing to get a "superinstruction" and removes in-between jump
1005 * targets. If it's not true then it triggers "unrecognized instruction" error 1051 * targets. If it's not true then it triggers "unrecognized instruction" error
1006 * condition. 1052 * condition.
(...skipping 12 matching lines...) Expand all
1019 * 0: 83 eX e0 and $~0x1f,E86 1065 * 0: 83 eX e0 and $~0x1f,E86
1020 * 3: 4? 03 Xf add RBASE,R86 1066 * 3: 4? 03 Xf add RBASE,R86
1021 * 6: 4? ff eX jmpq *R86 1067 * 6: 4? ff eX jmpq *R86
1022 * ^ ^ 1068 * ^ ^
1023 * instruction_begin current_position 1069 * instruction_begin current_position
1024 */ 1070 */
1025 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( 1071 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
1026 uint32_t *instruction_info_collected, 1072 uint32_t *instruction_info_collected,
1027 const uint8_t **instruction_begin, 1073 const uint8_t **instruction_begin,
1028 const uint8_t *current_position, 1074 const uint8_t *current_position,
1029 const uint8_t *data, 1075 const uint8_t codeblock[],
1030 bitmap_word *valid_targets) { 1076 bitmap_word *valid_targets) {
1031 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) 1077 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1032 ExpandSuperinstructionBySandboxingBytes( 1078 ExpandSuperinstructionBySandboxingBytes(
1033 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1079 3 /* and */ + 3 /* add */,
1080 instruction_begin,
1081 codeblock,
1082 valid_targets);
1034 else 1083 else
1035 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1084 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1036 } 1085 }
1037 1086
1038 /* 1087 /*
1039 * This function checks that naclcall or nacljmp are correct (that is: three 1088 * This function checks that naclcall or nacljmp are correct (that is: three
1040 * component instructions match) and if that is true then it merges call or jmp 1089 * component instructions match) and if that is true then it merges call or jmp
1041 * with a sandboxing to get a "superinstruction" and removes in-between jump 1090 * with a sandboxing to get a "superinstruction" and removes in-between jump
1042 * targets. If it's not true then it triggers "unrecognized instruction" error 1091 * targets. If it's not true then it triggers "unrecognized instruction" error
1043 * condition. 1092 * condition.
(...skipping 18 matching lines...) Expand all
1062 * 0: 4? 83 eX e0 and $~0x1f,E64 1111 * 0: 4? 83 eX e0 and $~0x1f,E64
1063 * 4: 4? 01 fX add RBASE,R64 1112 * 4: 4? 01 fX add RBASE,R64
1064 * 7: 4? ff eX jmpq *R64 1113 * 7: 4? ff eX jmpq *R64
1065 * ^ ^ 1114 * ^ ^
1066 * instruction_begin current_position 1115 * instruction_begin current_position
1067 */ 1116 */
1068 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( 1117 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
1069 uint32_t *instruction_info_collected, 1118 uint32_t *instruction_info_collected,
1070 const uint8_t **instruction_begin, 1119 const uint8_t **instruction_begin,
1071 const uint8_t *current_position, 1120 const uint8_t *current_position,
1072 const uint8_t *data, 1121 const uint8_t codeblock[],
1073 bitmap_word *valid_targets) { 1122 bitmap_word *valid_targets) {
1074 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) 1123 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1075 ExpandSuperinstructionBySandboxingBytes( 1124 ExpandSuperinstructionBySandboxingBytes(
1076 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1125 4 /* and */ + 3 /* add */,
1126 instruction_begin,
1127 codeblock,
1128 valid_targets);
1077 else 1129 else
1078 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1130 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1079 } 1131 }
1080 1132
1081 /* 1133 /*
1082 * This function checks that naclcall or nacljmp are correct (that is: three 1134 * This function checks that naclcall or nacljmp are correct (that is: three
1083 * component instructions match) and if that is true then it merges call or jmp 1135 * component instructions match) and if that is true then it merges call or jmp
1084 * with a sandboxing to get a "superinstruction" and removes in-between jump 1136 * with a sandboxing to get a "superinstruction" and removes in-between jump
1085 * targets. If it's not true then it triggers "unrecognized instruction" error 1137 * targets. If it's not true then it triggers "unrecognized instruction" error
1086 * condition. 1138 * condition.
(...skipping 18 matching lines...) Expand all
1105 * 0: 4? 83 eX e0 and $~0x1f,E64 1157 * 0: 4? 83 eX e0 and $~0x1f,E64
1106 * 4: 4? 03 Xf add RBASE,R64 1158 * 4: 4? 03 Xf add RBASE,R64
1107 * 7: 4? ff eX jmpq *R64 1159 * 7: 4? ff eX jmpq *R64
1108 * ^ ^ 1160 * ^ ^
1109 * instruction_begin current_position 1161 * instruction_begin current_position
1110 */ 1162 */
1111 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( 1163 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
1112 uint32_t *instruction_info_collected, 1164 uint32_t *instruction_info_collected,
1113 const uint8_t **instruction_begin, 1165 const uint8_t **instruction_begin,
1114 const uint8_t *current_position, 1166 const uint8_t *current_position,
1115 const uint8_t *data, 1167 const uint8_t codeblock[],
1116 bitmap_word *valid_targets) { 1168 bitmap_word *valid_targets) {
1117 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) 1169 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1118 ExpandSuperinstructionBySandboxingBytes( 1170 ExpandSuperinstructionBySandboxingBytes(
1119 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); 1171 4 /* and */ + 3 /* add */,
1172 instruction_begin,
1173 codeblock,
1174 valid_targets);
1120 else 1175 else
1121 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; 1176 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1122 } 1177 }
1123 1178
1124 1179
1125 Bool ValidateChunkAMD64(const uint8_t *data, size_t size, 1180 Bool ValidateChunkAMD64(const uint8_t codeblock[],
1181 size_t size,
1126 uint32_t options, 1182 uint32_t options,
1127 const NaClCPUFeaturesX86 *cpu_features, 1183 const NaClCPUFeaturesX86 *cpu_features,
1128 ValidationCallbackFunc user_callback, 1184 ValidationCallbackFunc user_callback,
1129 void *callback_data) { 1185 void *callback_data) {
1130 bitmap_word valid_targets_small; 1186 bitmap_word valid_targets_small;
1131 bitmap_word jump_dests_small; 1187 bitmap_word jump_dests_small;
1132 bitmap_word *valid_targets; 1188 bitmap_word *valid_targets;
1133 bitmap_word *jump_dests; 1189 bitmap_word *jump_dests;
1134 const uint8_t *current_position; 1190 const uint8_t *current_position;
1135 const uint8_t *end_of_bundle; 1191 const uint8_t *end_of_bundle;
(...skipping 23 matching lines...) Expand all
1159 free(jump_dests); 1215 free(jump_dests);
1160 free(valid_targets); 1216 free(valid_targets);
1161 errno = ENOMEM; 1217 errno = ENOMEM;
1162 return FALSE; 1218 return FALSE;
1163 } 1219 }
1164 } 1220 }
1165 1221
1166 /* 1222 /*
1167 * This option is usually used in tests: we will process the whole chunk 1223 * This option is usually used in tests: we will process the whole chunk
1168 * in one pass. Usually each bundle is processed separately which means 1224 * in one pass. Usually each bundle is processed separately which means
1169 * instructions (and super-instructions) can not cross borders of the bundle. 1225 * instructions (and "superinstructions") can not cross borders of the bundle.
1170 */ 1226 */
1171 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) 1227 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1172 end_of_bundle = data + size; 1228 end_of_bundle = codeblock + size;
1173 else 1229 else
1174 end_of_bundle = data + kBundleSize; 1230 end_of_bundle = codeblock + kBundleSize;
1175 1231
1176 /* 1232 /*
1177 * Main loop. Here we process the data array bundle-after-bundle. 1233 * Main loop. Here we process the codeblock array bundle-after-bundle.
1178 * Ragel-produced DFA does all the checks with one exception: direct jumps. 1234 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1179 * It collects the two arrays: valid_targets and jump_dests which are used 1235 * It collects the two arrays: valid_targets and jump_dests which are used
1180 * to test direct jumps later. 1236 * to test direct jumps later.
1181 */ 1237 */
1182 for (current_position = data; 1238 for (current_position = codeblock;
1183 current_position < data + size; 1239 current_position < codeblock + size;
1184 current_position = end_of_bundle, 1240 current_position = end_of_bundle,
1185 end_of_bundle = current_position + kBundleSize) { 1241 end_of_bundle = current_position + kBundleSize) {
1186 /* Start of the instruction being processed. */ 1242 /* Start of the instruction being processed. */
1187 const uint8_t *instruction_begin = current_position; 1243 const uint8_t *instruction_begin = current_position;
1188 /* Only used locally in the end_of_instruction_cleanup action. */ 1244 /* Only used locally in the end_of_instruction_cleanup action. */
1189 const uint8_t *instruction_end; 1245 const uint8_t *instruction_end;
1190 int current_state; 1246 int current_state;
1191 uint32_t instruction_info_collected = 0; 1247 uint32_t instruction_info_collected = 0;
1192 /* Keeps one byte of information per operand in the current instruction: 1248 /*
1193 * 2 bits for register kinds, 1249 * Contains register number and type of register modification (see
1194 * 5 bits for register numbers (16 regs plus RIZ). */ 1250 * OperandKind enum) for each operand that is changed in the instruction.
1251 * Information about read-only and memory operands is not saved in 64-bit
1252 * mode.
1253 */
1195 uint32_t operand_states = 0; 1254 uint32_t operand_states = 0;
1196 enum OperandName base = NO_REG; 1255 enum OperandName base = NO_REG;
1197 enum OperandName index = NO_REG; 1256 enum OperandName index = NO_REG;
1198 enum OperandName restricted_register = 1257 enum OperandName restricted_register =
1199 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); 1258 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1200 uint8_t rex_prefix = FALSE; 1259 uint8_t rex_prefix = FALSE;
1201 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ 1260 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1202 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; 1261 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1203 uint8_t vex_prefix3 = 0x00; 1262 uint8_t vex_prefix3 = 0x00;
1204 1263
1264 /*
1265 * The "write init" statement causes Ragel to emit initialization code.
1266 * This should be executed once before the ragel machine is started.
1267 */
1205 %% write init; 1268 %% write init;
1269 /*
1270 * The "write exec" statement causes Ragel to emit the ragel machine's
1271 * execution code.
1272 */
1206 %% write exec; 1273 %% write exec;
1207 1274
1208 /* 1275 /*
1209 * Ragel DFA accepted the bundle, but we still need to make sure the last 1276 * Ragel DFA accepted the bundle, but we still need to make sure the last
1210 * instruction haven't left %rbp or %rsp in restricted state. 1277 * instruction haven't left %rbp or %rsp in restricted state.
1211 */ 1278 */
1212 if (restricted_register == REG_RBP) 1279 if (restricted_register == REG_RBP)
1213 result &= user_callback(end_of_bundle, end_of_bundle, 1280 result &= user_callback(end_of_bundle, end_of_bundle,
1214 RESTRICTED_RBP_UNPROCESSED | 1281 RESTRICTED_RBP_UNPROCESSED |
1215 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & 1282 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
1216 RESTRICTED_REGISTER_MASK), callback_data); 1283 RESTRICTED_REGISTER_MASK), callback_data);
1217 else if (restricted_register == REG_RSP) 1284 else if (restricted_register == REG_RSP)
1218 result &= user_callback(end_of_bundle, end_of_bundle, 1285 result &= user_callback(end_of_bundle, end_of_bundle,
1219 RESTRICTED_RSP_UNPROCESSED | 1286 RESTRICTED_RSP_UNPROCESSED |
1220 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & 1287 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
1221 RESTRICTED_REGISTER_MASK), callback_data); 1288 RESTRICTED_REGISTER_MASK), callback_data);
1222 } 1289 }
1223 1290
1224 /* 1291 /*
1225 * Check the direct jumps. All the targets from jump_dests must be in 1292 * Check the direct jumps. All the targets from jump_dests must be in
1226 * valid_targets. 1293 * valid_targets.
1227 */ 1294 */
1228 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, 1295 result &= ProcessInvalidJumpTargets(codeblock,
1229 user_callback, callback_data); 1296 size,
1297 valid_targets,
1298 jump_dests,
1299 user_callback,
1300 callback_data);
1230 1301
1231 /* We only use malloc for a large code sequences */ 1302 /* We only use malloc for a large code sequences */
1232 if (jump_dests != &jump_dests_small) free(jump_dests); 1303 if (jump_dests != &jump_dests_small) free(jump_dests);
1233 if (valid_targets != &valid_targets_small) free(valid_targets); 1304 if (valid_targets != &valid_targets_small) free(valid_targets);
1234 if (!result) errno = EINVAL; 1305 if (!result) errno = EINVAL;
1235 return result; 1306 return result;
1236 } 1307 }
OLDNEW
« no previous file with comments | « src/trusted/validator_ragel/validator_x86_32.rl ('k') | src/trusted/validator_x86/testdata/64/stack_regs.test » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698