Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 /*
8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc.
13 */
14
15 #include <assert.h>
16 #include <errno.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h"
24
25 %%{
26 machine x86_64_validator;
27 alphtype unsigned char;
28 variable p current_position;
29 variable pe end_of_bundle;
30 variable eof end_of_bundle;
31 variable cs current_state;
32
33 include byte_machine "byte_machines.rl";
34
35 include prefixes_parsing_validator
36 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
37 include rex_actions
38 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
39 include rex_parsing
40 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
41 include vex_actions_amd64
42 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
43 include vex_parsing_amd64
44 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
45 include displacement_fields_parsing
46 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
47 include modrm_actions_amd64
48 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
49 include modrm_parsing
50 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
51 include operand_format_actions
52 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
53 include operand_source_actions_amd64
54 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
55 include immediate_fields_parsing
56 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
57 include relative_fields_validator_actions
58 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
59 include relative_fields_parsing
60 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
61 include cpuid_actions
62 "native_client/src/trusted/validator_ragel/parse_instruction.rl";
63
64 action check_access {
65 CheckAccess(instruction_begin - data, base, index, restricted_register,
66 valid_targets, &instruction_info_collected);
67 }
68
69 # Action which marks last byte as not immediate. Most 3DNow! instructions,
70 # some AVX and XOP instructions have this proerty. It's referenced by
71 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
72 # file.
73 action last_byte_is_not_immediate {
74 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
75 }
76
77 action modifiable_instruction {
78 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
79 }
80
81 action process_0_operands {
82 Process0Operands(&restricted_register, &instruction_info_collected);
83 }
84 action process_1_operand {
85 Process1Operand(&restricted_register, &instruction_info_collected,
86 rex_prefix, operand_states);
87 }
88 action process_1_operand_zero_extends {
89 Process1OperandZeroExtends(&restricted_register,
90 &instruction_info_collected, rex_prefix,
91 operand_states);
92 }
93 action process_2_operands {
94 Process2Operands(&restricted_register, &instruction_info_collected,
95 rex_prefix, operand_states);
96 }
97 action process_2_operands_zero_extends {
98 Process2OperandsZeroExtends(&restricted_register,
99 &instruction_info_collected, rex_prefix,
100 operand_states);
101 }
102
103 include decode_x86_64 "validator_x86_64_instruction.rl";
104
105 # Special %rbp modifications - the ones which don't need a sandboxing.
106 #
107 # Note that there are two different opcodes for "mov": in x86-64 there are two
108 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
109 # from REG field to RM or in the other direction thus there are two encodings
110 # for the register-to-register move.
111 rbp_modifications =
112 (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp
113 b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
114 @process_0_operands;
115
116 # Special instructions used for %rbp sandboxing.
117 #
118 # This is the "second half" of the %rbp sandboxing. Any zero-extending
119 # instruction which stores the data in %ebp can be first half, but unlike
120 # the situation with other "normal" registers you can not just write to
121 # %ebp and continue: such activity MUST restore the status quo immediately
122 # via one of these instructions.
123 rbp_sandboxing =
124 (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
125 b_0100_10x1 0x03 0xef | # add %r15,%rbp
126 # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
127 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
128 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
129 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
130 # Note: restricted_register keeps the restricted register as explained in
131 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
132 #
133 # "Normal" instructions can not be used in a place where %rbp is restricted.
134 # But since these instructions are "second half" of the %rbp sandboxing they
135 # can be used *only* when %rbp is restricted.
136 #
137 # That is (normal instruction):
138 # mov %eax,%ebp
139 # mov %esi,%edi <- Error: %ebp is restricted
140 # vs
141 # mov %esi,%edi
142 # add %r15,%rbp <- Error: %ebp is *not* restricted
143 # vs
144 # mov %eax,%ebp
145 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
146 #
147 # Check this precondition and mark the beginning of the instruction as
148 # invalid jump for target.
149 @{ if (restricted_register == REG_RBP)
150 instruction_info_collected |= RESTRICTED_REGISTER_USED;
151 else
152 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
153 restricted_register = NO_REG;
154 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
155 };
156
157 # Special %rsp modifications - the ones which don't need a sandboxing.
158 #
159 # Note that there are two different opcodes for "mov": in x86-64 there are two
160 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
161 # from REG field to RM or in the other direction thus there are two encodings
162 # for the register-to-register move.
163 rsp_modifications =
164 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
165 b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp
166 # Superfluous bits are not supported:
167 # http://code.google.com/p/nativeclient/issues/detail?id=3012
168 b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
169 @process_0_operands;
170
171 # Special instructions used for %rsp sandboxing.
172 #
173 # This is the "second half" of the %rsp sandboxing. Any zero-extending
174 # instruction which stores the data in %esp can be first half, but unlike
175 # the situation with other "normal" registers you can not just write to
176 # %esp and continue: such activity MUST restore the status quo immediately
177 # via one of these instructions.
178 rsp_sandboxing =
179 (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
180 b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
181 # OR can be used as well, see
182 # http://code.google.com/p/nativeclient/issues/detail?id=3070
183 b_0100_11x0 0x09 0xfc | # or %r15,%rsp
184 b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp
185 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp
186 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp
187 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
188 # Note: restricted_register keeps the restricted register as explained in
189 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
190 #
191 # "Normal" instructions can not be used in a place where %rsp is restricted.
192 # But since these instructions are "second half" of the %rsp sandboxing they
193 # can be used *only* when %rsp is restricted.
194 #
195 # That is (normal instruction):
196 # mov %eax,%esp
197 # mov %esi,%edi <- Error: %esp is restricted
198 # vs
199 # mov %esi,%edi
200 # add %r15,%rsp <- Error: %esp is *not* restricted
201 # vs
202 # mov %eax,%esp
203 # add %r15,%rsp <- Ok: %rsp is restricted as it should be
204 #
205 # Check this precondition and mark the beginning of the instruction as
206 # invalid jump for target.
207 @{ if (restricted_register == REG_RSP)
208 instruction_info_collected |= RESTRICTED_REGISTER_USED;
209 else
210 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
211 restricted_register = NO_REG;
212 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
213 };
214
215 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
216 # and $~0x1f, %eXX
217 # and RBASE, %rXX
218 # jmpq *%rXX (or: callq *%rXX)
219 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
220 # just as part of the naclcall/nacljmp, but also as a standolene instruction).
221 #
222 # This means that when naclcall_or_nacljmp ragel machine will be combined with
223 # "normal_instruction*" regular action process_1_operand_zero_extends will be
224 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
225 # instruction. This action will check if %rbp/%rsp is legally modified thus
226 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
227 #
228 # There are number of variants present which differ by the REX prefix usage:
229 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
230 # or "callq" is the same register and it's much simpler to do if one single
231 # action handles only fixed number of bytes.
232 #
233 # Additional complication arises because x86-64 contains two different "add"
234 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
235 # used: both can encode "add %src_register, %dst_register", but the first one
236 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
237 # byte for the dst while last one uses field RM of the ModR/M byte for the src
238 # and field REG of the ModR/M byte for dst. Both should be allowed.
239 #
240 # See AMD/Intel manual for clarification "add" instruction encoding.
241 #
242 # REGISTER USAGE ABBREVIATIONS:
243 # E86: legacy ia32 registers (all eight: %eax to %edi)
244 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
245 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
246 # R64: new amd64 registers (only seven: %r8 to %r14)
247 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
248 naclcall_or_nacljmp =
249 # This block encodes call and jump "superinstruction" of the following form:
250 # 0: 83 e_ e0 and $~0x1f,E86
251 # 3: 4_ 01 f_ add RBASE,R86
252 # 6: ff e_ jmpq *R86
253 #### INSTRUCTION ONE (three bytes)
254 # and $~0x1f, E86
255 (0x83 b_11_100_xxx 0xe0
256 #### INSTRUCTION TWO (three bytes)
257 # add RBASE, R86 (0x01 opcode)
258 b_0100_11x0 0x01 b_11_111_xxx
259 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
260 # callq R86
261 ((REX_WRX? 0xff b_11_010_xxx) |
262 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
263 # jmpq R86
264 (REX_WRX? 0xff b_11_100_xxx)))
265 @{
266 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
267 &instruction_begin, current_position,
268 data, valid_targets);
269 } |
270
271 # This block encodes call and jump "superinstruction" of the following form:
272 # 0: 83 e_ e0 and $~0x1f,E86
273 # 3: 4_ 03 _f add RBASE,R86
274 # 6: ff e_ jmpq *R86
275 #### INSTRUCTION ONE (three bytes)
276 # and $~0x1f, E86
277 (0x83 b_11_100_xxx 0xe0
278 #### INSTRUCTION TWO (three bytes)
279 # add RBASE, R86 (0x03 opcode)
280 b_0100_10x1 0x03 b_11_xxx_111
281 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
282 # callq R86
283 ((REX_WRX? 0xff b_11_010_xxx) |
284 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
285 # jmpq R86
286 (REX_WRX? 0xff b_11_100_xxx)))
287 @{
288 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
289 &instruction_begin, current_position,
290 data, valid_targets);
291 } |
292
293 # This block encodes call and jump "superinstruction" of the following form:
294 # 0: 4_ 83 e_ e0 and $~0x1f,E86
295 # 4: 4_ 01 f_ add RBASE,R86
296 # 7: ff e_ jmpq *R86
297 #### INSTRUCTION ONE (four bytes)
298 # and $~0x1f, E86
299 ((REX_RX 0x83 b_11_100_xxx 0xe0
300 #### INSTRUCTION TWO (three bytes)
301 # add RBASE, R86 (0x01 opcode)
302 b_0100_11x0 0x01 b_11_111_xxx
303 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
304 # callq R86
305 ((REX_WRX? 0xff b_11_010_xxx) |
306 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
307 # jmpq R86
308 (REX_WRX? 0xff b_11_100_xxx))) |
309
310 # This block encodes call and jump "superinstruction" of the following form:
311 # 0: 4_ 83 e_ e0 and $~0x1f,E64
312 # 4: 4_ 01 f_ add RBASE,R64
313 # 7: 4_ ff e_ jmpq *R64
314 #### INSTRUCTION ONE (four bytes)
315 # and $~0x1f, E64
316 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
317 #### INSTRUCTION TWO (three bytes)
318 # add RBASE, R64 (0x01 opcode)
319 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
320 #### INSTRUCTION THREE: call (three bytes)
321 # callq R64
322 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
323 #### INSTRUCTION THREE: jmp (three bytes)
324 # jmpq R64
325 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
326 @{
327 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
328 &instruction_begin, current_position,
329 data, valid_targets);
330 } |
331
332 # This block encodes call and jump "superinstruction" of the following form:
333 # 0: 4_ 83 e_ e0 and $~0x1f,E86
334 # 4: 4_ 03 _f add RBASE,R86
335 # 7: ff e_ jmpq *R86
336 #### INSTRUCTION ONE (four bytes)
337 # and $~0x1f, E86
338 ((REX_RX 0x83 b_11_100_xxx 0xe0
339 #### INSTRUCTION TWO (three bytes)
340 # add RBASE, R86 (0x03 opcode)
341 b_0100_10x1 0x03 b_11_xxx_111
342 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
343 # callq R86
344 ((REX_WRX? 0xff b_11_010_xxx) |
345 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
346 # jmpq R86
347 (REX_WRX? 0xff b_11_100_xxx))) |
348
349 # This block encodes call and jump "superinstruction" of the following form:
350 # 0: 4_ 83 e_ e0 and $~0x1f,E64
351 # 4: 4_ 03 _f add RBASE,R64
352 # 7: 4_ ff e_ jmpq *R64
353 #### INSTRUCTION ONE (four bytes)
354 # and $~0x1f, E64
355 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
356 #### INSTRUCTION TWO (three bytes)
357 # add RBASE, R64 (0x03 opcode)
358 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
359 #### INSTRUCTION THREE: call (three bytes)
360 # callq R64
361 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
362 #### INSTRUCTION THREE: jmp (three bytes)
363 # jmpq R64
364 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
365 @{
366 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
367 &instruction_begin, current_position,
368 data, valid_targets);
369 };
370
371 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
372
373 # maskmovq %mmX,%mmY (EMMX or SSE)
374 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
375
376 # maskmovdqu %xmmX, %xmmY (SSE2)
377 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
378
379 # vmaskmovdqu %xmmX, %xmmY (AVX)
380 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
381 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
382
383 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
384
385 # Temporary fix: for string instructions combination of data16 and rep(ne)
386 # prefixes is disallowed to mimic old validator behavior.
387 # See http://code.google.com/p/nativeclient/issues/detail?id=1950
388
389 # data16rep = (data16 | rep data16 | data16 rep);
390 # data16condrep = (data16 | condrep data16 | data16 condrep);
391 data16rep = data16;
392 data16condrep = data16;
393
394 # String instructions which use only %ds:(%rsi)
395 string_instruction_rsi_no_rdi =
396 (rep? 0xac | # lods %ds:(%rsi),%al
397 data16rep 0xad | # lods %ds:(%rsi),%ax
398 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
399
400 # String instructions which use only %ds:(%rdi)
401 string_instruction_rdi_no_rsi =
402 condrep? 0xae | # scas %es:(%rdi),%al
403 data16condrep 0xaf | # scas %es:(%rdi),%ax
404 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
405
406 rep? 0xaa | # stos %al,%es:(%rdi)
407 data16rep 0xab | # stos %ax,%es:(%rdi)
408 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
409
410 # String instructions which use both %ds:(%rsi) and %es:(%rdi)
411 string_instruction_rsi_rdi =
412 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
413 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
414 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
415
416 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
417 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
418 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
419
420 # "Superinstruction" which includes %rsi sandboxing.
421 #
422 # There are two variants which handle spurious REX prefixes.
423 #
424 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
425 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
426 # be used to move from REG field to RM or in the other direction thus there
427 # are two encodings for the register-to-register move (and since REG and RM
428 # are identical here only opcode differs).
429 sandbox_instruction_rsi_no_rdi =
430 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
431 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
432 string_instruction_rsi_no_rdi
433 @{
434 ExpandSuperinstructionBySandboxingBytes(
435 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
436 } |
437
438 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
439 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
440 string_instruction_rsi_no_rdi
441 @{
442 ExpandSuperinstructionBySandboxingBytes(
443 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
444 };
445
446 # "Superinstruction" which includes %rdi sandboxing.
447 #
448 # There are two variants which handle spurious REX prefixes.
449 #
450 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
451 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
452 # be used to move from REG field to RM or in the other direction thus there
453 # are two encodings for the register-to-register move (and since REG and RM
454 # are identical here only opcode differs).
455 sandbox_instruction_rdi_no_rsi =
456 (0x89 | 0x8b) 0xff # mov %edi,%edi
457 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
458 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
459 @{
460 ExpandSuperinstructionBySandboxingBytes(
461 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
462 } |
463
464 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
465 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
466 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
467 @{
468 ExpandSuperinstructionBySandboxingBytes(
469 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
470 };
471
472
473 # "Superinstruction" which includes both %rsi and %rdi sandboxing.
474 #
475 # There are four variants which handle spurious REX prefixes.
476 #
477 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
478 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
479 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
480 # from REG field to RM or in the other direction thus there are two encodings
481 # for the register-to-register move (and since REG and RM are identical here
482 # only opcode differs).
483 sandbox_instruction_rsi_rdi =
484 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
485 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
486 (0x89 | 0x8b) 0xff # mov %edi,%edi
487 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
488 string_instruction_rsi_rdi
489 @{
490 ExpandSuperinstructionBySandboxingBytes(
491 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
492 &instruction_begin, data, valid_targets);
493 } |
494
495 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
496 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
497 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
498 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
499
500 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
501 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
502 (0x89 | 0x8b) 0xff # mov %edi,%edi
503 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
504 string_instruction_rsi_rdi
505 @{
506 ExpandSuperinstructionBySandboxingBytes(
507 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
508 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
509 &instruction_begin, data, valid_targets);
510 } |
511
512 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
513 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
514 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
515 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
516 string_instruction_rsi_rdi
517 @{
518 ExpandSuperinstructionBySandboxingBytes(
519 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
520 &instruction_begin, data, valid_targets);
521 };
522
523 # All the "special" instructions (== instructions which obey non-standard
524 # rules). Three groups:
525 # * %rsp/%rsp related instructions (these instructions are special because
526 # they must be in the range %r15...%r15+4294967295 except momentarily they
527 # can be in the range 0...4294967295)
528 # * string instructions (which can not use %r15 as base and thus need special
529 # handling both in compiler and validator)
530 # * naclcall/nacljmp (indirect jumps need special care)
531 special_instruction =
532 (rbp_modifications |
533 rsp_modifications |
534 rbp_sandboxing |
535 rsp_sandboxing |
536 sandbox_instruction_rsi_no_rdi |
537 sandbox_instruction_rdi_no_rsi |
538 sandbox_instruction_rsi_rdi |
539 naclcall_or_nacljmp)
540 # Mark the instruction as special - currently this information is used only
541 # in tests, but in the future we may use it for dynamic code modification
542 # support.
543 @{
544 instruction_info_collected |= SPECIAL_INSTRUCTION;
545 };
546
547 # Remove special instructions which are only allowed in special cases.
548 normal_instruction = one_instruction - special_instruction;
549
550 # Check if call is properly aligned.
551 #
552 # For direct call we explicitly encode all variations. For indirect call
553 # we accept all the special instructions which ends with register-addressed
554 # indirect call.
555 call_alignment =
556 ((normal_instruction &
557 # Direct call
558 ((data16 REX_RXB? 0xe8 rel16) |
559 (REX_WRXB? 0xe8 rel32) |
560 (data16 REXW_RXB 0xe8 rel32))) |
561 (special_instruction &
562 # Indirect call
563 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
564 modrm_registers))))
565 # Call instruction must aligned to the end of bundle. Previously this was
566 # strict requirement, today it's just warning to aid with debugging.
567 @{
568 if (((current_position - data) & kBundleMask) != kBundleMask)
569 instruction_info_collected |= BAD_CALL_ALIGNMENT;
570 };
571
572 # This action calls user's callback (if needed) and cleans up validator's
573 # internal state.
574 #
575 # We call the user callback if there are validation errors or if the
576 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
577 #
578 # After that we move instruction_begin and clean all the variables which
579 # only used in the processing of a single instruction (prefixes, operand
580 # states and instruction_info_collected).
581 action end_of_instruction_cleanup {
582 /* Call user-supplied callback. */
583 instruction_end = current_position + 1;
584 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
585 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
586 result &= user_callback(
587 instruction_begin, instruction_end,
588 instruction_info_collected |
589 ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
590 RESTRICTED_REGISTER_MASK), callback_data);
591 }
592
593 /* On successful match the instruction_begin must point to the next byte
594 * to be able to report the new offset as the start of instruction
595 * causing error. */
596 instruction_begin = instruction_end;
597
598 /* Mark start of the next instruction as a valid target for jump.
599 * Note: we mark start of the next instruction here, not start of the
600 * current one because memory access check should be able to clear this
601 * bit when restricted register is used. */
602 MarkValidJumpTarget(instruction_begin - data, valid_targets);
603
604 /* Clear variables. */
605 instruction_info_collected = 0;
606 SET_REX_PREFIX(FALSE);
607 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
608 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
609 SET_VEX_PREFIX3(0x00);
610 operand_states = 0;
611 base = 0;
612 index = 0;
613 }
614
615 # This action reports fatal error detected by DFA.
616 action report_fatal_error {
617 result &= user_callback(instruction_begin, current_position,
618 UNRECOGNIZED_INSTRUCTION, callback_data);
619 /*
620 * Process the next bundle: "continue" here is for the "for" cycle in
621 * the ValidateChunkAMD64 function.
622 *
623 * It does not affect the case which we really care about (when code
624 * is validatable), but makes it possible to detect more errors in one
625 * run in tools like ncval.
626 */
627 continue;
628 }
629
630 # This is main ragel machine: it does 99% of validation work. There are only
631 # one thing to do with bundle if this machine accepts the bundle:
632 # * check for the state of the restricted_register at the end of the bundle.
633 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
634 # Additionally if all the bundles are fine you need to check that direct jumps
635 # are corect. Thiis is done in the following way:
636 # * DFA fills two arrays: valid_targets and jump_dests.
637 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
638 # All other checks are done here.
639
640 main := ((call_alignment | normal_instruction | special_instruction)
641 @end_of_instruction_cleanup)*
642 $!report_fatal_error;
643
644 }%%
645
646 %% write data;
647
648 enum OperandKind {
649 OPERAND_SANDBOX_IRRELEVANT = 0,
650 /*
651 * Currently we do not distinguish 8bit and 16bit modifications from
652 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
653 *
654 * 8bit operands must be distinguished from other types because the REX prefix
655 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
656 */
657 OPERAND_SANDBOX_8BIT,
658 OPERAND_SANDBOX_RESTRICTED,
659 OPERAND_SANDBOX_UNRESTRICTED
660 };
661
662 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3))
663 #define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N)
664 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \
665 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3))
666 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \
667 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
668 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \
669 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3))
670 #define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \
671 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
672 #define CHECK_OPERAND(N, S, T) \
673 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3)))
674
675 static INLINE void CheckAccess(ptrdiff_t instruction_begin,
676 enum OperandName base,
677 enum OperandName index,
678 uint8_t restricted_register,
679 bitmap_word *valid_targets,
680 uint32_t *instruction_info_collected) {
681 if ((base == REG_RIP) || (base == REG_R15) ||
682 (base == REG_RSP) || (base == REG_RBP)) {
683 if ((index == NO_REG) || (index == REG_RIZ))
684 { /* do nothing. */ }
685 else if (index == restricted_register)
686 BitmapClearBit(valid_targets, instruction_begin),
687 *instruction_info_collected |= RESTRICTED_REGISTER_USED;
688 else
689 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
690 } else {
691 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
692 }
693 }
694
695
696 static INLINE void Process0Operands(enum OperandName *restricted_register,
697 uint32_t *instruction_info_collected) {
698 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
699 * instruction, not with regular instruction. */
700 if (*restricted_register == REG_RSP) {
701 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
702 } else if (*restricted_register == REG_RBP) {
703 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
704 }
705 *restricted_register = NO_REG;
706 }
707
708 static INLINE void Process1Operand(enum OperandName *restricted_register,
709 uint32_t *instruction_info_collected,
710 uint8_t rex_prefix,
711 uint32_t operand_states) {
712 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
713 * instruction, not with regular instruction. */
714 if (*restricted_register == REG_RSP) {
715 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
716 } else if (*restricted_register == REG_RBP) {
717 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
718 }
719 *restricted_register = NO_REG;
720 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
721 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
722 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
723 *instruction_info_collected |= R15_MODIFIED;
724 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
725 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
726 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
727 *instruction_info_collected |= BPL_MODIFIED;
728 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
729 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
730 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
731 *instruction_info_collected |= SPL_MODIFIED;
732 }
733 }
734
735 static INLINE void Process1OperandZeroExtends(
736 enum OperandName *restricted_register,
737 uint32_t *instruction_info_collected,
738 uint8_t rex_prefix,
739 uint32_t operand_states) {
740 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
741 * instruction, not with regular instruction. */
742 if (*restricted_register == REG_RSP) {
743 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
744 } else if (*restricted_register == REG_RBP) {
745 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
746 }
747 *restricted_register = NO_REG;
748 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
749 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
750 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
751 *instruction_info_collected |= R15_MODIFIED;
752 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
753 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
754 *instruction_info_collected |= BPL_MODIFIED;
755 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
756 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
757 *instruction_info_collected |= SPL_MODIFIED;
758 /* Take 2 bits of operand type from operand_states as *restricted_register,
759 * make sure operand_states denotes a register (4th bit == 0). */
760 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
761 *restricted_register = operand_states & 0x0f;
762 }
763 }
764
765 static INLINE void Process2Operands(enum OperandName *restricted_register,
766 uint32_t *instruction_info_collected,
767 uint8_t rex_prefix,
768 uint32_t operand_states) {
769 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
770 * instruction, not with regular instruction. */
771 if (*restricted_register == REG_RSP) {
772 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
773 } else if (*restricted_register == REG_RBP) {
774 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
775 }
776 *restricted_register = NO_REG;
777 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
778 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
779 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
780 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
781 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
783 *instruction_info_collected |= R15_MODIFIED;
784 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
785 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
786 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
787 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
788 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
789 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
790 *instruction_info_collected |= BPL_MODIFIED;
791 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
792 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
793 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
794 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
795 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
796 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
797 *instruction_info_collected |= SPL_MODIFIED;
798 }
799 }
800
801 static INLINE void Process2OperandsZeroExtends(
802 enum OperandName *restricted_register,
803 uint32_t *instruction_info_collected,
804 uint8_t rex_prefix,
805 uint32_t operand_states) {
806 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
807 * instruction, not with regular instruction. */
808 if (*restricted_register == REG_RSP) {
809 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
810 } else if (*restricted_register == REG_RBP) {
811 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
812 }
813 *restricted_register = NO_REG;
814 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
815 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
816 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
817 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
818 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
820 *instruction_info_collected |= R15_MODIFIED;
821 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
822 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
823 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
824 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
825 *instruction_info_collected |= BPL_MODIFIED;
826 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
827 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
828 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
829 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
830 *instruction_info_collected |= SPL_MODIFIED;
831 /* Take 2 bits of operand type from operand_states as *restricted_register,
832 * make sure operand_states denotes a register (4th bit == 0). */
833 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
834 *restricted_register = operand_states & 0x0f;
835 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
836 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
837 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) {
838 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
839 }
840 /* Take 2 bits of operand type from operand_states as *restricted_register,
841 * make sure operand_states denotes a register (12th bit == 0). */
842 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
843 *restricted_register = (operand_states & 0x0f00) >> 8;
844 }
845 }
846
847 /*
848 * This function merges "dangerous" instruction with sandboxing instructions to
849 * get a "superinstruction" and unmarks in-between jump targets.
850 */
851 static INLINE void ExpandSuperinstructionBySandboxingBytes(
852 size_t sandbox_instructions_size,
853 const uint8_t **instruction_begin,
854 const uint8_t *data,
855 bitmap_word *valid_targets) {
856 *instruction_begin -= sandbox_instructions_size;
857 /*
858 * We need to unmark start of the "dangerous" instruction itself, too, but we
859 * don't need to mark the beginning of the whole "superinstruction" - that's
860 * why we move start by one byte and don't change the length.
861 */
862 UnmarkValidJumpTargets((*instruction_begin + 1 - data),
863 sandbox_instructions_size,
864 valid_targets);
865 }
866
867 /*
868 * Return TRUE if naclcall or nacljmp uses the same register in all three
869 * instructions.
870 *
871 * This version is for the case where "add %src_register, %dst_register" with
872 * dst in RM field and src in REG field of ModR/M byte is used.
873 *
874 * There are five possible forms:
875 *
876 * 0: 83 eX e0 and $~0x1f,E86
877 * 3: 4? 01 fX add RBASE,R86
878 * 6: ff eX jmpq *R86
879 * ^ ^
880 * instruction_begin current_position
881 *
882 * 0: 4? 83 eX e0 and $~0x1f,E86
883 * 4: 4? 01 fX add RBASE,R86
884 * 7: ff eX jmpq *R86
885 * ^ ^
886 * instruction_begin current_position
887 *
888 * 0: 83 eX e0 and $~0x1f,E86
889 * 3: 4? 01 fX add RBASE,R86
890 * 6: 4? ff eX jmpq *R86
891 * ^ ^
892 * instruction_begin current_position
893 *
894 * 0: 4? 83 eX e0 and $~0x1f,E86
895 * 4: 4? 01 fX add RBASE,R86
896 * 7: 4? ff eX jmpq *R86
897 * ^ ^
898 * instruction_begin current_position
899 *
900 * 0: 4? 83 eX e0 and $~0x1f,E64
901 * 4: 4? 01 fX add RBASE,R64
902 * 7: 4? ff eX jmpq *R64
903 * ^ ^
904 * instruction_begin current_position
905 *
906 * We don't care about "?" (they are checked by DFA).
907 */
908 static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin,
909 const uint8_t *current_position) {
910 return
911 RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) &&
912 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
913 }
914
915 /*
916 * Return TRUE if naclcall or nacljmp uses the same register in all three
917 * instructions.
918 *
919 * This version is for the case where "add %src_register, %dst_register" with
920 * dst in REG field and src in RM field of ModR/M byte is used.
921 *
922 * There are five possible forms:
923 *
924 * 0: 83 eX e0 and $~0x1f,E86
925 * 3: 4? 03 Xf add RBASE,R86
926 * 6: ff eX jmpq *R86
927 * ^ ^
928 * instruction_begin current_position
929 *
930 * 0: 4? 83 eX e0 and $~0x1f,E86
931 * 4: 4? 03 Xf add RBASE,R86
932 * 7: ff eX jmpq *R86
933 * ^ ^
934 * instruction_begin current_position
935 *
936 * 0: 83 eX e0 and $~0x1f,E86
937 * 3: 4? 03 Xf add RBASE,R86
938 * 6: 4? ff eX jmpq *R86
939 * ^ ^
940 * instruction_begin current_position
941 *
942 * 0: 4? 83 eX e0 and $~0x1f,E86
943 * 4: 4? 03 Xf add RBASE,R86
944 * 7: 4? ff eX jmpq *R86
945 * ^ ^
946 * instruction_begin current_position
947 *
948 * 0: 4? 83 eX e0 and $~0x1f,E64
949 * 4: 4? 03 Xf add RBASE,R64
950 * 7: 4? ff eX jmpq *R64
951 * ^ ^
952 * instruction_begin current_position
953 *
954 * We don't care about "?" (they are checked by DFA).
955 */
956 static INLINE Bool VerifyNaclCallOrJmpAddToReg(
957 const uint8_t *instruction_begin,
958 const uint8_t *current_position) {
959 return
960 RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) &&
961 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
962 }
963
964 /*
965 * This function checks that naclcall or nacljmp are correct (that is: three
966 * component instructions match) and if that is true then it merges call or jmp
967 * with a sandboxing to get a "superinstruction" and removes in-between jump
968 * targets. If it's not true then it triggers "unrecognized instruction" error
969 * condition.
970 *
971 * This version is for the case where "add with dst register in RM field"
972 * (opcode 0x01) and "add without REX prefix" is used.
973 *
974 * There are two possibile forms:
975 *
976 * 0: 83 eX e0 and $~0x1f,E86
977 * 3: 4? 01 fX add RBASE,R86
978 * 6: ff eX jmpq *R86
979 * ^ ^
980 * instruction_begin current_position
981 *
982 * 0: 83 eX e0 and $~0x1f,E86
983 * 3: 4? 01 fX add RBASE,R86
984 * 6: 4? ff eX jmpq *R86
985 * ^ ^
986 * instruction_begin current_position
987 */
988 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
989 uint32_t *instruction_info_collected,
990 const uint8_t **instruction_begin,
991 const uint8_t *current_position,
992 const uint8_t *data,
993 bitmap_word *valid_targets) {
994 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
995 ExpandSuperinstructionBySandboxingBytes(
996 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
997 else
998 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
999 }
1000
1001 /*
1002 * This function checks that naclcall or nacljmp are correct (that is: three
1003 * component instructions match) and if that is true then it merges call or jmp
1004 * with a sandboxing to get a "superinstruction" and removes in-between jump
1005 * targets. If it's not true then it triggers "unrecognized instruction" error
1006 * condition.
1007 *
1008 * This version is for the case where "add with dst register in REG field"
1009 * (opcode 0x03) and "add without REX prefix" is used.
1010 *
1011 * There are two possibile forms:
1012 *
1013 * 0: 83 eX e0 and $~0x1f,E86
1014 * 3: 4? 03 Xf add RBASE,R86
1015 * 6: ff eX jmpq *R86
1016 * ^ ^
1017 * instruction_begin current_position
1018 *
1019 * 0: 83 eX e0 and $~0x1f,E86
1020 * 3: 4? 03 Xf add RBASE,R86
1021 * 6: 4? ff eX jmpq *R86
1022 * ^ ^
1023 * instruction_begin current_position
1024 */
1025 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
1026 uint32_t *instruction_info_collected,
1027 const uint8_t **instruction_begin,
1028 const uint8_t *current_position,
1029 const uint8_t *data,
1030 bitmap_word *valid_targets) {
1031 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1032 ExpandSuperinstructionBySandboxingBytes(
1033 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1034 else
1035 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1036 }
1037
1038 /*
1039 * This function checks that naclcall or nacljmp are correct (that is: three
1040 * component instructions match) and if that is true then it merges call or jmp
1041 * with a sandboxing to get a "superinstruction" and removes in-between jump
1042 * targets. If it's not true then it triggers "unrecognized instruction" error
1043 * condition.
1044 *
1045 * This version is for the case where "add with dst register in RM field"
1046 * (opcode 0x01) and "add without REX prefix" is used.
1047 *
1048 * There are three possibile forms:
1049 *
1050 * 0: 4? 83 eX e0 and $~0x1f,E86
1051 * 4: 4? 01 fX add RBASE,R86
1052 * 7: ff eX jmpq *R86
1053 * ^ ^
1054 * instruction_begin current_position
1055 *
1056 * 0: 4? 83 eX e0 and $~0x1f,E86
1057 * 4: 4? 01 fX add RBASE,R86
1058 * 7: 4? ff eX jmpq *R86
1059 * ^ ^
1060 * instruction_begin current_position
1061 *
1062 * 0: 4? 83 eX e0 and $~0x1f,E64
1063 * 4: 4? 01 fX add RBASE,R64
1064 * 7: 4? ff eX jmpq *R64
1065 * ^ ^
1066 * instruction_begin current_position
1067 */
1068 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
1069 uint32_t *instruction_info_collected,
1070 const uint8_t **instruction_begin,
1071 const uint8_t *current_position,
1072 const uint8_t *data,
1073 bitmap_word *valid_targets) {
1074 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1075 ExpandSuperinstructionBySandboxingBytes(
1076 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1077 else
1078 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1079 }
1080
1081 /*
1082 * This function checks that naclcall or nacljmp are correct (that is: three
1083 * component instructions match) and if that is true then it merges call or jmp
1084 * with a sandboxing to get a "superinstruction" and removes in-between jump
1085 * targets. If it's not true then it triggers "unrecognized instruction" error
1086 * condition.
1087 *
1088 * This version is for the case where "add with dst register in REG field"
1089 * (opcode 0x03) and "add without REX prefix" is used.
1090 *
1091 * There are three possibile forms:
1092 *
1093 * 0: 4? 83 eX e0 and $~0x1f,E86
1094 * 4: 4? 03 Xf add RBASE,R86
1095 * 7: ff eX jmpq *R86
1096 * ^ ^
1097 * instruction_begin current_position
1098 *
1099 * 0: 4? 83 eX e0 and $~0x1f,E86
1100 * 4: 4? 03 Xf add RBASE,R86
1101 * 7: 4? ff eX jmpq *R86
1102 * ^ ^
1103 * instruction_begin current_position
1104 *
1105 * 0: 4? 83 eX e0 and $~0x1f,E64
1106 * 4: 4? 03 Xf add RBASE,R64
1107 * 7: 4? ff eX jmpq *R64
1108 * ^ ^
1109 * instruction_begin current_position
1110 */
1111 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
1112 uint32_t *instruction_info_collected,
1113 const uint8_t **instruction_begin,
1114 const uint8_t *current_position,
1115 const uint8_t *data,
1116 bitmap_word *valid_targets) {
1117 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1118 ExpandSuperinstructionBySandboxingBytes(
1119 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1120 else
1121 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1122 }
1123
1124
1125 Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
1126 uint32_t options,
1127 const NaClCPUFeaturesX86 *cpu_features,
1128 ValidationCallbackFunc user_callback,
1129 void *callback_data) {
1130 bitmap_word valid_targets_small;
1131 bitmap_word jump_dests_small;
1132 bitmap_word *valid_targets;
1133 bitmap_word *jump_dests;
1134 const uint8_t *current_position;
1135 const uint8_t *end_of_bundle;
1136 int result = TRUE;
1137
1138 CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
1139 CHECK(size % kBundleSize == 0);
1140
1141 /*
1142 * For a very small sequences (one bundle) malloc is too expensive.
1143 *
1144 * Note1: we allocate one extra bit, because we set valid jump target bits
1145 * _after_ instructions, so there will be one at the end of the chunk.
1146 *
1147 * Note2: we don't ever mark first bit as a valid jump target but this is
1148 * not a problem because any aligned address is valid jump target.
1149 */
1150 if ((size + 1) <= (sizeof valid_targets_small * 8)) {
1151 valid_targets_small = 0;
1152 valid_targets = &valid_targets_small;
1153 jump_dests_small = 0;
1154 jump_dests = &jump_dests_small;
1155 } else {
1156 valid_targets = BitmapAllocate(size + 1);
1157 jump_dests = BitmapAllocate(size + 1);
1158 if (!valid_targets || !jump_dests) {
1159 free(jump_dests);
1160 free(valid_targets);
1161 errno = ENOMEM;
1162 return FALSE;
1163 }
1164 }
1165
1166 /*
1167 * This option is usually used in tests: we will process the whole chunk
1168 * in one pass. Usually each bundle is processed separately which means
1169 * instructions (and super-instructions) can not cross borders of the bundle.
1170 */
1171 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1172 end_of_bundle = data + size;
1173 else
1174 end_of_bundle = data + kBundleSize;
1175
1176 /*
1177 * Main loop. Here we process the data array bundle-after-bundle.
1178 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1179 * It collects the two arrays: valid_targets and jump_dests which are used
1180 * to test direct jumps later.
1181 */
1182 for (current_position = data;
1183 current_position < data + size;
1184 current_position = end_of_bundle,
1185 end_of_bundle = current_position + kBundleSize) {
1186 /* Start of the instruction being processed. */
1187 const uint8_t *instruction_begin = current_position;
1188 /* Only used locally in the end_of_instruction_cleanup action. */
1189 const uint8_t *instruction_end;
1190 int current_state;
1191 uint32_t instruction_info_collected = 0;
1192 /* Keeps one byte of information per operand in the current instruction:
1193 * 2 bits for register kinds,
1194 * 5 bits for register numbers (16 regs plus RIZ). */
1195 uint32_t operand_states = 0;
1196 enum OperandName base = NO_REG;
1197 enum OperandName index = NO_REG;
1198 enum OperandName restricted_register =
1199 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1200 uint8_t rex_prefix = FALSE;
1201 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1202 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1203 uint8_t vex_prefix3 = 0x00;
1204
1205 %% write init;
1206 %% write exec;
1207
1208 /*
1209 * Ragel DFA accepted the bundle, but we still need to make sure the last
1210 * instruction haven't left %rbp or %rsp in restricted state.
1211 */
1212 if (restricted_register == REG_RBP)
1213 result &= user_callback(end_of_bundle, end_of_bundle,
1214 RESTRICTED_RBP_UNPROCESSED |
1215 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
1216 RESTRICTED_REGISTER_MASK), callback_data);
1217 else if (restricted_register == REG_RSP)
1218 result &= user_callback(end_of_bundle, end_of_bundle,
1219 RESTRICTED_RSP_UNPROCESSED |
1220 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
1221 RESTRICTED_REGISTER_MASK), callback_data);
1222 }
1223
1224 /*
1225 * Check the direct jumps. All the targets from jump_dests must be in
1226 * valid_targets.
1227 */
1228 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
1229 user_callback, callback_data);
1230
1231 /* We only use malloc for a large code sequences */
1232 if (jump_dests != &jump_dests_small) free(jump_dests);
1233 if (valid_targets != &valid_targets_small) free(valid_targets);
1234 if (!result) errno = EINVAL;
1235 return result;
1236 }
OLDNEW
« no previous file with comments | « src/trusted/validator_ragel/unreviewed/validator_x86_32.rl ('k') | src/trusted/validator_ragel/validator.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698