Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 /*
8 * This is the core of amd64-mode validator. Please note that this file
9 * combines ragel machine description and C language actions. Please read
10 * validator_internals.html first to understand how the whole thing is built:
11 * it explains how the byte sequences are constructed, what constructs like
12 * "@{}" or "REX_WRX?" mean, etc.
13 */
14
15 #include <assert.h>
16 #include <errno.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include "native_client/src/trusted/validator_ragel/bitmap.h"
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna l.h"
24
25 %%{
26 machine x86_64_validator;
27 alphtype unsigned char;
28 variable p current_position;
29 variable pe end_of_bundle;
30 variable eof end_of_bundle;
31 variable cs current_state;
32
33 include byte_machine "byte_machines.rl";
34
35 include prefixes_parsing_validator
36 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
37 include rex_actions
38 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
39 include rex_parsing
40 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
41 include vex_actions_amd64
42 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
43 include vex_parsing_amd64
44 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
45 include displacement_fields_actions
46 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
47 include displacement_fields_parsing
48 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
49 include modrm_actions_amd64
50 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
51 include modrm_parsing
52 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
53 include operand_actions_amd64
54 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
55 include immediate_fields_actions
56 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
57 include immediate_fields_parsing_amd64
58 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
59 include relative_fields_validator_actions
60 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
61 include relative_fields_parsing
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
63 include cpuid_actions
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl";
65
66 action check_access {
67 CheckAccess(instruction_begin - data, base, index, restricted_register,
68 valid_targets, &instruction_info_collected);
69 }
70
71 # Action which marks last byte as not immediate. Most 3DNow! instructions,
72 # some AVX and XOP instructions have this proerty. It's referenced by
73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
74 # file.
75 action last_byte_is_not_immediate {
76 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
77 }
78
79 action modifiable_instruction {
80 instruction_info_collected |= MODIFIABLE_INSTRUCTION;
81 }
82
83 action process_0_operands {
84 Process0Operands(&restricted_register, &instruction_info_collected);
85 }
86 action process_1_operand {
87 Process1Operand(&restricted_register, &instruction_info_collected,
88 rex_prefix, operand_states);
89 }
90 action process_1_operand_zero_extends {
91 Process1OperandZeroExtends(&restricted_register,
92 &instruction_info_collected, rex_prefix,
93 operand_states);
94 }
95 action process_2_operands {
96 Process2Operands(&restricted_register, &instruction_info_collected,
97 rex_prefix, operand_states);
98 }
99 action process_2_operands_zero_extends {
100 Process2OperandsZeroExtends(&restricted_register,
101 &instruction_info_collected, rex_prefix,
102 operand_states);
103 }
104
105 include decode_x86_64 "validator_x86_64_instruction.rl";
106
107 # Special %rbp modifications - the ones which don't need a sandboxing.
108 #
109 # Note that there are two different opcodes for "mov": in x86-64 there are two
110 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
111 # from REG field to RM or in the other direction thus there are two encodings
112 # for the register-to-register move.
113 rbp_modifications =
114 (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp
115 b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
116 @process_0_operands;
117
118 # Special instructions used for %rbp sandboxing.
119 #
120 # This is the "second half" of the %rbp sandboxing. Any zero-extending
121 # instruction which stores the data in %ebp can be first half, but unlike
122 # the situation with other "normal" registers you can not just write to
123 # %ebp and continue: such activity MUST restore the status quo immediately
124 # via one of these instructions.
125 rbp_sandboxing =
126 (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
127 b_0100_10x1 0x03 0xef | # add %r15,%rbp
128 # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
130 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
132 # Note: restricted_register keeps the restricted register as explained in
133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
134 #
135 # "Normal" instructions can not be used in a place where %rbp is restricted.
136 # But since these instructions are "second half" of the %rbp sandboxing they
137 # can be used *only* when %rbp is restricted.
138 #
139 # That is (normal instruction):
140 # mov %eax,%ebp
141 # mov %esi,%edi <- Error: %ebp is restricted
142 # vs
143 # mov %esi,%edi
144 # add %r15,%rbp <- Error: %ebp is *not* restricted
145 # vs
146 # mov %eax,%ebp
147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be
148 #
149 # Check this precondition and mark the beginning of the instruction as
150 # invalid jump for target.
151 @{ if (restricted_register == REG_RBP)
152 instruction_info_collected |= RESTRICTED_REGISTER_USED;
153 else
154 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
155 restricted_register = NO_REG;
156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
157 };
158
159 # Special %rsp modifications - the ones which don't need a sandboxing.
160 #
161 # Note that there are two different opcodes for "mov": in x86-64 there are two
162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
163 # from REG field to RM or in the other direction thus there are two encodings
164 # for the register-to-register move.
165 rsp_modifications =
166 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
167 b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp
168 # Superfluous bits are not supported:
169 # http://code.google.com/p/nativeclient/issues/detail?id=3012
170 b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
171 @process_0_operands;
172
173 # Special instructions used for %rsp sandboxing.
174 #
175 # This is the "second half" of the %rsp sandboxing. Any zero-extending
176 # instruction which stores the data in %esp can be first half, but unlike
177 # the situation with other "normal" registers you can not just write to
178 # %esp and continue: such activity MUST restore the status quo immediately
179 # via one of these instructions.
180 rsp_sandboxing =
181 (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
182 b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
183 # OR can be used as well, see
184 # http://code.google.com/p/nativeclient/issues/detail?id=3070
185 b_0100_11x0 0x09 0xfc | # or %r15,%rsp
186 b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp
187 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp
188 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp
189 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
190 # Note: restricted_register keeps the restricted register as explained in
191 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8 6-64-systems
192 #
193 # "Normal" instructions can not be used in a place where %rsp is restricted.
194 # But since these instructions are "second half" of the %rsp sandboxing they
195 # can be used *only* when %rsp is restricted.
196 #
197 # That is (normal instruction):
198 # mov %eax,%esp
199 # mov %esi,%edi <- Error: %esp is restricted
200 # vs
201 # mov %esi,%edi
202 # add %r15,%rsp <- Error: %esp is *not* restricted
203 # vs
204 # mov %eax,%esp
205 # add %r15,%rsp <- Ok: %rsp is restricted as it should be
206 #
207 # Check this precondition and mark the beginning of the instruction as
208 # invalid jump for target.
209 @{ if (restricted_register == REG_RSP)
210 instruction_info_collected |= RESTRICTED_REGISTER_USED;
211 else
212 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
213 restricted_register = NO_REG;
214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
215 };
216
217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
218 # and $~0x1f, %eXX
219 # and RBASE, %rXX
220 # jmpq *%rXX (or: callq *%rXX)
221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
222 # just as part of the naclcall/nacljmp, but also as a standolene instruction).
223 #
224 # This means that when naclcall_or_nacljmp ragel machine will be combined with
225 # "normal_instruction*" regular action process_1_operand_zero_extends will be
226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
227 # instruction. This action will check if %rbp/%rsp is legally modified thus
228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
229 #
230 # There are number of variants present which differ by the REX prefix usage:
231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
232 # or "callq" is the same register and it's much simpler to do if one single
233 # action handles only fixed number of bytes.
234 #
235 # Additional complication arises because x86-64 contains two different "add"
236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction
237 # used: both can encode "add %src_register, %dst_register", but the first one
238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
239 # byte for the dst while last one uses field RM of the ModR/M byte for the src
240 # and field REG of the ModR/M byte for dst. Both should be allowed.
241 #
242 # See AMD/Intel manual for clarification "add" instruction encoding.
243 #
244 # REGISTER USAGE ABBREVIATIONS:
245 # E86: legacy ia32 registers (all eight: %eax to %edi)
246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
248 # R64: new amd64 registers (only seven: %r8 to %r14)
249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
250 naclcall_or_nacljmp =
251 # This block encodes call and jump "superinstruction" of the following form:
252 # 0: 83 e_ e0 and $~0x1f,E86
253 # 3: 4_ 01 f_ add RBASE,R86
254 # 6: ff e_ jmpq *R86
255 #### INSTRUCTION ONE (three bytes)
256 # and $~0x1f, E86
257 (0x83 b_11_100_xxx 0xe0
258 #### INSTRUCTION TWO (three bytes)
259 # add RBASE, R86 (0x01 opcode)
260 b_0100_11x0 0x01 b_11_111_xxx
261 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
262 # callq R86
263 ((REX_WRX? 0xff b_11_010_xxx) |
264 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
265 # jmpq R86
266 (REX_WRX? 0xff b_11_100_xxx)))
267 @{
268 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
269 &instruction_begin, current_position,
270 data, valid_targets);
271 } |
272
273 # This block encodes call and jump "superinstruction" of the following form:
274 # 0: 83 e_ e0 and $~0x1f,E86
275 # 3: 4_ 03 _f add RBASE,R86
276 # 6: ff e_ jmpq *R86
277 #### INSTRUCTION ONE (three bytes)
278 # and $~0x1f, E86
279 (0x83 b_11_100_xxx 0xe0
280 #### INSTRUCTION TWO (three bytes)
281 # add RBASE, R86 (0x03 opcode)
282 b_0100_10x1 0x03 b_11_xxx_111
283 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
284 # callq R86
285 ((REX_WRX? 0xff b_11_010_xxx) |
286 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
287 # jmpq R86
288 (REX_WRX? 0xff b_11_100_xxx)))
289 @{
290 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
291 &instruction_begin, current_position,
292 data, valid_targets);
293 } |
294
295 # This block encodes call and jump "superinstruction" of the following form:
296 # 0: 4_ 83 e_ e0 and $~0x1f,E86
297 # 4: 4_ 01 f_ add RBASE,R86
298 # 7: ff e_ jmpq *R86
299 #### INSTRUCTION ONE (four bytes)
300 # and $~0x1f, E86
301 ((REX_RX 0x83 b_11_100_xxx 0xe0
302 #### INSTRUCTION TWO (three bytes)
303 # add RBASE, R86 (0x01 opcode)
304 b_0100_11x0 0x01 b_11_111_xxx
305 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
306 # callq R86
307 ((REX_WRX? 0xff b_11_010_xxx) |
308 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
309 # jmpq R86
310 (REX_WRX? 0xff b_11_100_xxx))) |
311
312 # This block encodes call and jump "superinstruction" of the following form:
313 # 0: 4_ 83 e_ e0 and $~0x1f,E64
314 # 4: 4_ 01 f_ add RBASE,R64
315 # 7: 4_ ff e_ jmpq *R64
316 #### INSTRUCTION ONE (four bytes)
317 # and $~0x1f, E64
318 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
319 #### INSTRUCTION TWO (three bytes)
320 # add RBASE, R64 (0x01 opcode)
321 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
322 #### INSTRUCTION THREE: call (three bytes)
323 # callq R64
324 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
325 #### INSTRUCTION THREE: jmp (three bytes)
326 # jmpq R64
327 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
328 @{
329 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
330 &instruction_begin, current_position,
331 data, valid_targets);
332 } |
333
334 # This block encodes call and jump "superinstruction" of the following form:
335 # 0: 4_ 83 e_ e0 and $~0x1f,E86
336 # 4: 4_ 03 _f add RBASE,R86
337 # 7: ff e_ jmpq *R86
338 #### INSTRUCTION ONE (four bytes)
339 # and $~0x1f, E86
340 ((REX_RX 0x83 b_11_100_xxx 0xe0
341 #### INSTRUCTION TWO (three bytes)
342 # add RBASE, R86 (0x03 opcode)
343 b_0100_10x1 0x03 b_11_xxx_111
344 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
345 # callq R86
346 ((REX_WRX? 0xff b_11_010_xxx) |
347 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
348 # jmpq R86
349 (REX_WRX? 0xff b_11_100_xxx))) |
350
351 # This block encodes call and jump "superinstruction" of the following form:
352 # 0: 4_ 83 e_ e0 and $~0x1f,E64
353 # 4: 4_ 03 _f add RBASE,R64
354 # 7: 4_ ff e_ jmpq *R64
355 #### INSTRUCTION ONE (four bytes)
356 # and $~0x1f, E64
357 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
358 #### INSTRUCTION TWO (three bytes)
359 # add RBASE, R64 (0x03 opcode)
360 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
361 #### INSTRUCTION THREE: call (three bytes)
362 # callq R64
363 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
364 #### INSTRUCTION THREE: jmp (three bytes)
365 # jmpq R64
366 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
367 @{
368 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
369 &instruction_begin, current_position,
370 data, valid_targets);
371 };
372
373 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
374
375 # maskmovq %mmX,%mmY (EMMX or SSE)
376 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
377
378 # maskmovdqu %xmmX, %xmmY (SSE2)
379 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
380
381 # vmaskmovdqu %xmmX, %xmmY (AVX)
382 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
383 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
384
385 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
386
387 # Temporary fix: for string instructions combination of data16 and rep(ne)
388 # prefixes is disallowed to mimic old validator behavior.
389 # See http://code.google.com/p/nativeclient/issues/detail?id=1950
390
391 # data16rep = (data16 | rep data16 | data16 rep);
392 # data16condrep = (data16 | condrep data16 | data16 condrep);
393 data16rep = data16;
394 data16condrep = data16;
395
396 # String instructions which use only %ds:(%rsi)
397 string_instruction_rsi_no_rdi =
398 (rep? 0xac | # lods %ds:(%rsi),%al
399 data16rep 0xad | # lods %ds:(%rsi),%ax
400 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
401
402 # String instructions which use only %ds:(%rdi)
403 string_instruction_rdi_no_rsi =
404 condrep? 0xae | # scas %es:(%rdi),%al
405 data16condrep 0xaf | # scas %es:(%rdi),%ax
406 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
407
408 rep? 0xaa | # stos %al,%es:(%rdi)
409 data16rep 0xab | # stos %ax,%es:(%rdi)
410 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
411
412 # String instructions which use both %ds:(%rsi) and %es:(%rdi)
413 string_instruction_rsi_rdi =
414 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
415 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
416 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
417
418 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
419 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
420 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
421
422 # "Superinstruction" which includes %rsi sandboxing.
423 #
424 # There are two variants which handle spurious REX prefixes.
425 #
426 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
427 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
428 # be used to move from REG field to RM or in the other direction thus there
429 # are two encodings for the register-to-register move (and since REG and RM
430 # are identical here only opcode differs).
431 sandbox_instruction_rsi_no_rdi =
432 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
433 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
434 string_instruction_rsi_no_rdi
435 @{
436 ExpandSuperinstructionBySandboxingBytes(
437 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
438 } |
439
440 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
441 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
442 string_instruction_rsi_no_rdi
443 @{
444 ExpandSuperinstructionBySandboxingBytes(
445 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
446 };
447
448 # "Superinstruction" which includes %rdi sandboxing.
449 #
450 # There are two variants which handle spurious REX prefixes.
451 #
452 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
453 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
454 # be used to move from REG field to RM or in the other direction thus there
455 # are two encodings for the register-to-register move (and since REG and RM
456 # are identical here only opcode differs).
457 sandbox_instruction_rdi_no_rsi =
458 (0x89 | 0x8b) 0xff # mov %edi,%edi
459 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
460 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
461 @{
462 ExpandSuperinstructionBySandboxingBytes(
463 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
464 } |
465
466 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
467 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
468 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
469 @{
470 ExpandSuperinstructionBySandboxingBytes(
471 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
472 };
473
474
475 # "Superinstruction" which includes both %rsi and %rdi sandboxing.
476 #
477 # There are four variants which handle spurious REX prefixes.
478 #
479 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
480 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
481 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
482 # from REG field to RM or in the other direction thus there are two encodings
483 # for the register-to-register move (and since REG and RM are identical here
484 # only opcode differs).
485 sandbox_instruction_rsi_rdi =
486 (0x89 | 0x8b) 0xf6 # mov %esi,%esi
487 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
488 (0x89 | 0x8b) 0xff # mov %edi,%edi
489 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
490 string_instruction_rsi_rdi
491 @{
492 ExpandSuperinstructionBySandboxingBytes(
493 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
494 &instruction_begin, data, valid_targets);
495 } |
496
497 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
498 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
499 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
500 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
501
502 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
503 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
504 (0x89 | 0x8b) 0xff # mov %edi,%edi
505 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
506 string_instruction_rsi_rdi
507 @{
508 ExpandSuperinstructionBySandboxingBytes(
509 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
510 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
511 &instruction_begin, data, valid_targets);
512 } |
513
514 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
515 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
516 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
517 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
518 string_instruction_rsi_rdi
519 @{
520 ExpandSuperinstructionBySandboxingBytes(
521 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
522 &instruction_begin, data, valid_targets);
523 };
524
525 # All the "special" instructions (== instructions which obey non-standard
526 # rules). Three groups:
527 # * %rsp/%rsp related instructions (these instructions are special because
528 # they must be in the range %r15...%r15+4294967295 except momentarily they
529 # can be in the range 0...4294967295)
530 # * string instructions (which can not use %r15 as base and thus need special
531 # handling both in compiler and validator)
532 # * naclcall/nacljmp (indirect jumps need special care)
533 special_instruction =
534 (rbp_modifications |
535 rsp_modifications |
536 rbp_sandboxing |
537 rsp_sandboxing |
538 sandbox_instruction_rsi_no_rdi |
539 sandbox_instruction_rdi_no_rsi |
540 sandbox_instruction_rsi_rdi |
541 naclcall_or_nacljmp)
542 # Mark the instruction as special - currently this information is used only
543 # in tests, but in the future we may use it for dynamic code modification
544 # support.
545 @{
546 instruction_info_collected |= SPECIAL_INSTRUCTION;
547 };
548
549 # Remove special instructions which are only allowed in special cases.
550 normal_instruction = one_instruction - special_instruction;
551
552 # Check if call is properly aligned.
553 #
554 # For direct call we explicitly encode all variations. For indirect call
555 # we accept all the special instructions which ends with register-addressed
556 # indirect call.
557 call_alignment =
558 ((normal_instruction &
559 # Direct call
560 ((data16 REX_RXB? 0xe8 rel16) |
561 (REX_WRXB? 0xe8 rel32) |
562 (data16 REXW_RXB 0xe8 rel32))) |
563 (special_instruction &
564 # Indirect call
565 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
566 modrm_registers))))
567 # Call instruction must aligned to the end of bundle. Previously this was
568 # strict requirement, today it's just warning to aid with debugging.
569 @{
570 if (((current_position - data) & kBundleMask) != kBundleMask)
571 instruction_info_collected |= BAD_CALL_ALIGNMENT;
572 };
573
574 # This action calls user's callback (if needed) and cleans up validator's
575 # internal state.
576 #
577 # We call the user callback if there are validation errors or if the
578 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
579 #
580 # After that we move instruction_begin and clean all the variables which
581 # only used in the processing of a single instruction (prefixes, operand
582 # states and instruction_info_collected).
583 action end_of_instruction_cleanup {
584 /* Call user-supplied callback. */
585 instruction_end = current_position + 1;
586 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
587 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
588 result &= user_callback(
589 instruction_begin, instruction_end,
590 instruction_info_collected |
591 ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
592 RESTRICTED_REGISTER_MASK), callback_data);
593 }
594
595 /* On successful match the instruction_begin must point to the next byte
596 * to be able to report the new offset as the start of instruction
597 * causing error. */
598 instruction_begin = instruction_end;
599
600 /* Mark start of the next instruction as a valid target for jump.
601 * Note: we mark start of the next instruction here, not start of the
602 * current one because memory access check should be able to clear this
603 * bit when restricted register is used. */
604 MarkValidJumpTarget(instruction_begin - data, valid_targets);
605
606 /* Clear variables. */
607 instruction_info_collected = 0;
608 SET_REX_PREFIX(FALSE);
609 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
610 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
611 SET_VEX_PREFIX3(0x00);
612 operand_states = 0;
613 base = 0;
614 index = 0;
615 }
616
617 # This action reports fatal error detected by DFA.
618 action report_fatal_error {
619 result &= user_callback(instruction_begin, current_position,
620 UNRECOGNIZED_INSTRUCTION, callback_data);
621 /*
622 * Process the next bundle: "continue" here is for the "for" cycle in
623 * the ValidateChunkAMD64 function.
624 *
625 * It does not affect the case which we really care about (when code
626 * is validatable), but makes it possible to detect more errors in one
627 * run in tools like ncval.
628 */
629 continue;
630 }
631
632 # This is main ragel machine: it does 99% of validation work. There are only
633 # one thing to do with bundle if this machine accepts the bundle:
634 # * check for the state of the restricted_register at the end of the bundle.
635 # It's an error is %rbp or %rsp is restricted at the end of the bundle.
636 # Additionally if all the bundles are fine you need to check that direct jumps
637 # are corect. Thiis is done in the following way:
638 # * DFA fills two arrays: valid_targets and jump_dests.
639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
640 # All other checks are done here.
641
642 main := ((call_alignment | normal_instruction | special_instruction)
643 @end_of_instruction_cleanup)*
644 $!report_fatal_error;
645
646 }%%
647
648 %% write data;
649
650 enum OperandKind {
651 OPERAND_SANDBOX_IRRELEVANT = 0,
652 /*
653 * Currently we do not distinguish 8bit and 16bit modifications from
654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
655 *
656 * 8bit operands must be distinguished from other types because the REX prefix
657 * regulates the choice between %ah and %spl, as well as %ch and %bpl.
658 */
659 OPERAND_SANDBOX_8BIT,
660 OPERAND_SANDBOX_RESTRICTED,
661 OPERAND_SANDBOX_UNRESTRICTED
662 };
663
664 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3))
665 #define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N)
666 #define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \
667 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3))
668 #define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \
669 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
670 #define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \
671 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3))
672 #define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \
673 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
674 #define CHECK_OPERAND(N, S, T) \
675 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3)))
676
677 static INLINE void CheckAccess(ptrdiff_t instruction_begin,
678 enum OperandName base,
679 enum OperandName index,
680 uint8_t restricted_register,
681 bitmap_word *valid_targets,
682 uint32_t *instruction_info_collected) {
683 if ((base == REG_RIP) || (base == REG_R15) ||
684 (base == REG_RSP) || (base == REG_RBP)) {
685 if ((index == NO_REG) || (index == REG_RIZ))
686 { /* do nothing. */ }
687 else if (index == restricted_register)
688 BitmapClearBit(valid_targets, instruction_begin),
689 *instruction_info_collected |= RESTRICTED_REGISTER_USED;
690 else
691 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
692 } else {
693 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
694 }
695 }
696
697
698 static INLINE void Process0Operands(enum OperandName *restricted_register,
699 uint32_t *instruction_info_collected) {
700 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
701 * instruction, not with regular instruction. */
702 if (*restricted_register == REG_RSP) {
703 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
704 } else if (*restricted_register == REG_RBP) {
705 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
706 }
707 *restricted_register = NO_REG;
708 }
709
710 static INLINE void Process1Operand(enum OperandName *restricted_register,
711 uint32_t *instruction_info_collected,
712 uint8_t rex_prefix,
713 uint32_t operand_states) {
714 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
715 * instruction, not with regular instruction. */
716 if (*restricted_register == REG_RSP) {
717 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
718 } else if (*restricted_register == REG_RBP) {
719 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
720 }
721 *restricted_register = NO_REG;
722 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
723 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
724 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
725 *instruction_info_collected |= R15_MODIFIED;
726 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
727 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
728 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
729 *instruction_info_collected |= BPL_MODIFIED;
730 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
731 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
732 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
733 *instruction_info_collected |= SPL_MODIFIED;
734 }
735 }
736
737 static INLINE void Process1OperandZeroExtends(
738 enum OperandName *restricted_register,
739 uint32_t *instruction_info_collected,
740 uint8_t rex_prefix,
741 uint32_t operand_states) {
742 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
743 * instruction, not with regular instruction. */
744 if (*restricted_register == REG_RSP) {
745 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
746 } else if (*restricted_register == REG_RBP) {
747 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
748 }
749 *restricted_register = NO_REG;
750 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
751 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
752 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
753 *instruction_info_collected |= R15_MODIFIED;
754 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
755 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
756 *instruction_info_collected |= BPL_MODIFIED;
757 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
758 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
759 *instruction_info_collected |= SPL_MODIFIED;
760 /* Take 2 bits of operand type from operand_states as *restricted_register,
761 * make sure operand_states denotes a register (4th bit == 0). */
762 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
763 *restricted_register = operand_states & 0x0f;
764 }
765 }
766
767 static INLINE void Process2Operands(enum OperandName *restricted_register,
768 uint32_t *instruction_info_collected,
769 uint8_t rex_prefix,
770 uint32_t operand_states) {
771 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
772 * instruction, not with regular instruction. */
773 if (*restricted_register == REG_RSP) {
774 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
775 } else if (*restricted_register == REG_RBP) {
776 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
777 }
778 *restricted_register = NO_REG;
779 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
780 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
781 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
783 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
784 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
785 *instruction_info_collected |= R15_MODIFIED;
786 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
787 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
788 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
789 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
790 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
791 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
792 *instruction_info_collected |= BPL_MODIFIED;
793 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
794 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
795 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
796 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
797 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
798 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
799 *instruction_info_collected |= SPL_MODIFIED;
800 }
801 }
802
803 static INLINE void Process2OperandsZeroExtends(
804 enum OperandName *restricted_register,
805 uint32_t *instruction_info_collected,
806 uint8_t rex_prefix,
807 uint32_t operand_states) {
808 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
809 * instruction, not with regular instruction. */
810 if (*restricted_register == REG_RSP) {
811 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
812 } else if (*restricted_register == REG_RBP) {
813 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
814 }
815 *restricted_register = NO_REG;
816 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
817 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
818 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
820 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
821 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
822 *instruction_info_collected |= R15_MODIFIED;
823 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
824 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
825 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
826 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
827 *instruction_info_collected |= BPL_MODIFIED;
828 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
829 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
830 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
831 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
832 *instruction_info_collected |= SPL_MODIFIED;
833 /* Take 2 bits of operand type from operand_states as *restricted_register,
834 * make sure operand_states denotes a register (4th bit == 0). */
835 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
836 *restricted_register = operand_states & 0x0f;
837 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
838 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
839 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) {
840 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
841 }
842 /* Take 2 bits of operand type from operand_states as *restricted_register,
843 * make sure operand_states denotes a register (12th bit == 0). */
844 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
845 *restricted_register = (operand_states & 0x0f00) >> 8;
846 }
847 }
848
849 /*
850 * This function merges "dangerous" instruction with sandboxing instructions to
851 * get a "superinstruction" and unmarks in-between jump targets.
852 */
853 static INLINE void ExpandSuperinstructionBySandboxingBytes(
854 size_t sandbox_instructions_size,
855 const uint8_t **instruction_begin,
856 const uint8_t *data,
857 bitmap_word *valid_targets) {
858 *instruction_begin -= sandbox_instructions_size;
859 /*
860 * We need to unmark start of the "dangerous" instruction itself, too, but we
861 * don't need to mark the beginning of the whole "superinstruction" - that's
862 * why we move start by one byte and don't change the length.
863 */
864 UnmarkValidJumpTargets((*instruction_begin + 1 - data),
865 sandbox_instructions_size,
866 valid_targets);
867 }
868
869 /*
870 * Return TRUE if naclcall or nacljmp uses the same register in all three
871 * instructions.
872 *
873 * This version is for the case where "add %src_register, %dst_register" with
874 * dst in RM field and src in REG field of ModR/M byte is used.
875 *
876 * There are five possible forms:
877 *
878 * 0: 83 eX e0 and $~0x1f,E86
879 * 3: 4? 01 fX add RBASE,R86
880 * 6: ff eX jmpq *R86
881 * ^ ^
882 * instruction_begin current_position
883 *
884 * 0: 4? 83 eX e0 and $~0x1f,E86
885 * 4: 4? 01 fX add RBASE,R86
886 * 7: ff eX jmpq *R86
887 * ^ ^
888 * instruction_begin current_position
889 *
890 * 0: 83 eX e0 and $~0x1f,E86
891 * 3: 4? 01 fX add RBASE,R86
892 * 6: 4? ff eX jmpq *R86
893 * ^ ^
894 * instruction_begin current_position
895 *
896 * 0: 4? 83 eX e0 and $~0x1f,E86
897 * 4: 4? 01 fX add RBASE,R86
898 * 7: 4? ff eX jmpq *R86
899 * ^ ^
900 * instruction_begin current_position
901 *
902 * 0: 4? 83 eX e0 and $~0x1f,E64
903 * 4: 4? 01 fX add RBASE,R64
904 * 7: 4? ff eX jmpq *R64
905 * ^ ^
906 * instruction_begin current_position
907 *
908 * We don't care about "?" (they are checked by DFA).
909 */
910 static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin,
911 const uint8_t *current_position) {
912 return
913 RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) &&
914 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
915 }
916
917 /*
918 * Return TRUE if naclcall or nacljmp uses the same register in all three
919 * instructions.
920 *
921 * This version is for the case where "add %src_register, %dst_register" with
922 * dst in REG field and src in RM field of ModR/M byte is used.
923 *
924 * There are five possible forms:
925 *
926 * 0: 83 eX e0 and $~0x1f,E86
927 * 3: 4? 03 Xf add RBASE,R86
928 * 6: ff eX jmpq *R86
929 * ^ ^
930 * instruction_begin current_position
931 *
932 * 0: 4? 83 eX e0 and $~0x1f,E86
933 * 4: 4? 03 Xf add RBASE,R86
934 * 7: ff eX jmpq *R86
935 * ^ ^
936 * instruction_begin current_position
937 *
938 * 0: 83 eX e0 and $~0x1f,E86
939 * 3: 4? 03 Xf add RBASE,R86
940 * 6: 4? ff eX jmpq *R86
941 * ^ ^
942 * instruction_begin current_position
943 *
944 * 0: 4? 83 eX e0 and $~0x1f,E86
945 * 4: 4? 03 Xf add RBASE,R86
946 * 7: 4? ff eX jmpq *R86
947 * ^ ^
948 * instruction_begin current_position
949 *
950 * 0: 4? 83 eX e0 and $~0x1f,E64
951 * 4: 4? 03 Xf add RBASE,R64
952 * 7: 4? ff eX jmpq *R64
953 * ^ ^
954 * instruction_begin current_position
955 *
956 * We don't care about "?" (they are checked by DFA).
957 */
958 static INLINE Bool VerifyNaclCallOrJmpAddToReg(
959 const uint8_t *instruction_begin,
960 const uint8_t *current_position) {
961 return
962 RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) &&
963 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
964 }
965
966 /*
967 * This function checks that naclcall or nacljmp are correct (that is: three
968 * component instructions match) and if that is true then it merges call or jmp
969 * with a sandboxing to get a "superinstruction" and removes in-between jump
970 * targets. If it's not true then it triggers "unrecognized instruction" error
971 * condition.
972 *
973 * This version is for the case where "add with dst register in RM field"
974 * (opcode 0x01) and "add without REX prefix" is used.
975 *
976 * There are two possibile forms:
977 *
978 * 0: 83 eX e0 and $~0x1f,E86
979 * 3: 4? 01 fX add RBASE,R86
980 * 6: ff eX jmpq *R86
981 * ^ ^
982 * instruction_begin current_position
983 *
984 * 0: 83 eX e0 and $~0x1f,E86
985 * 3: 4? 01 fX add RBASE,R86
986 * 6: 4? ff eX jmpq *R86
987 * ^ ^
988 * instruction_begin current_position
989 */
990 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
991 uint32_t *instruction_info_collected,
992 const uint8_t **instruction_begin,
993 const uint8_t *current_position,
994 const uint8_t *data,
995 bitmap_word *valid_targets) {
996 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
997 ExpandSuperinstructionBySandboxingBytes(
998 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
999 else
1000 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1001 }
1002
1003 /*
1004 * This function checks that naclcall or nacljmp are correct (that is: three
1005 * component instructions match) and if that is true then it merges call or jmp
1006 * with a sandboxing to get a "superinstruction" and removes in-between jump
1007 * targets. If it's not true then it triggers "unrecognized instruction" error
1008 * condition.
1009 *
1010 * This version is for the case where "add with dst register in REG field"
1011 * (opcode 0x03) and "add without REX prefix" is used.
1012 *
1013 * There are two possibile forms:
1014 *
1015 * 0: 83 eX e0 and $~0x1f,E86
1016 * 3: 4? 03 Xf add RBASE,R86
1017 * 6: ff eX jmpq *R86
1018 * ^ ^
1019 * instruction_begin current_position
1020 *
1021 * 0: 83 eX e0 and $~0x1f,E86
1022 * 3: 4? 03 Xf add RBASE,R86
1023 * 6: 4? ff eX jmpq *R86
1024 * ^ ^
1025 * instruction_begin current_position
1026 */
1027 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
1028 uint32_t *instruction_info_collected,
1029 const uint8_t **instruction_begin,
1030 const uint8_t *current_position,
1031 const uint8_t *data,
1032 bitmap_word *valid_targets) {
1033 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1034 ExpandSuperinstructionBySandboxingBytes(
1035 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1036 else
1037 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1038 }
1039
1040 /*
1041 * This function checks that naclcall or nacljmp are correct (that is: three
1042 * component instructions match) and if that is true then it merges call or jmp
1043 * with a sandboxing to get a "superinstruction" and removes in-between jump
1044 * targets. If it's not true then it triggers "unrecognized instruction" error
1045 * condition.
1046 *
1047 * This version is for the case where "add with dst register in RM field"
1048 * (opcode 0x01) and "add without REX prefix" is used.
1049 *
1050 * There are three possibile forms:
1051 *
1052 * 0: 4? 83 eX e0 and $~0x1f,E86
1053 * 4: 4? 01 fX add RBASE,R86
1054 * 7: ff eX jmpq *R86
1055 * ^ ^
1056 * instruction_begin current_position
1057 *
1058 * 0: 4? 83 eX e0 and $~0x1f,E86
1059 * 4: 4? 01 fX add RBASE,R86
1060 * 7: 4? ff eX jmpq *R86
1061 * ^ ^
1062 * instruction_begin current_position
1063 *
1064 * 0: 4? 83 eX e0 and $~0x1f,E64
1065 * 4: 4? 01 fX add RBASE,R64
1066 * 7: 4? ff eX jmpq *R64
1067 * ^ ^
1068 * instruction_begin current_position
1069 */
1070 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
1071 uint32_t *instruction_info_collected,
1072 const uint8_t **instruction_begin,
1073 const uint8_t *current_position,
1074 const uint8_t *data,
1075 bitmap_word *valid_targets) {
1076 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
1077 ExpandSuperinstructionBySandboxingBytes(
1078 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1079 else
1080 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1081 }
1082
1083 /*
1084 * This function checks that naclcall or nacljmp are correct (that is: three
1085 * component instructions match) and if that is true then it merges call or jmp
1086 * with a sandboxing to get a "superinstruction" and removes in-between jump
1087 * targets. If it's not true then it triggers "unrecognized instruction" error
1088 * condition.
1089 *
1090 * This version is for the case where "add with dst register in REG field"
1091 * (opcode 0x03) and "add without REX prefix" is used.
1092 *
1093 * There are three possibile forms:
1094 *
1095 * 0: 4? 83 eX e0 and $~0x1f,E86
1096 * 4: 4? 03 Xf add RBASE,R86
1097 * 7: ff eX jmpq *R86
1098 * ^ ^
1099 * instruction_begin current_position
1100 *
1101 * 0: 4? 83 eX e0 and $~0x1f,E86
1102 * 4: 4? 03 Xf add RBASE,R86
1103 * 7: 4? ff eX jmpq *R86
1104 * ^ ^
1105 * instruction_begin current_position
1106 *
1107 * 0: 4? 83 eX e0 and $~0x1f,E64
1108 * 4: 4? 03 Xf add RBASE,R64
1109 * 7: 4? ff eX jmpq *R64
1110 * ^ ^
1111 * instruction_begin current_position
1112 */
1113 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
1114 uint32_t *instruction_info_collected,
1115 const uint8_t **instruction_begin,
1116 const uint8_t *current_position,
1117 const uint8_t *data,
1118 bitmap_word *valid_targets) {
1119 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
1120 ExpandSuperinstructionBySandboxingBytes(
1121 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
1122 else
1123 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
1124 }
1125
1126
1127 Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
1128 uint32_t options,
1129 const NaClCPUFeaturesX86 *cpu_features,
1130 ValidationCallbackFunc user_callback,
1131 void *callback_data) {
1132 bitmap_word valid_targets_small;
1133 bitmap_word jump_dests_small;
1134 bitmap_word *valid_targets;
1135 bitmap_word *jump_dests;
1136 const uint8_t *current_position;
1137 const uint8_t *end_of_bundle;
1138 int result = TRUE;
1139
1140 CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
1141 CHECK(size % kBundleSize == 0);
1142
1143 /*
1144 * For a very small sequences (one bundle) malloc is too expensive.
1145 *
1146 * Note1: we allocate one extra bit, because we set valid jump target bits
1147 * _after_ instructions, so there will be one at the end of the chunk.
1148 *
1149 * Note2: we don't ever mark first bit as a valid jump target but this is
1150 * not a problem because any aligned address is valid jump target.
1151 */
1152 if ((size + 1) <= (sizeof valid_targets_small * 8)) {
1153 valid_targets_small = 0;
1154 valid_targets = &valid_targets_small;
1155 jump_dests_small = 0;
1156 jump_dests = &jump_dests_small;
1157 } else {
1158 valid_targets = BitmapAllocate(size + 1);
1159 jump_dests = BitmapAllocate(size + 1);
1160 if (!valid_targets || !jump_dests) {
1161 free(jump_dests);
1162 free(valid_targets);
1163 errno = ENOMEM;
1164 return FALSE;
1165 }
1166 }
1167
1168 /*
1169 * This option is usually used in tests: we will process the whole chunk
1170 * in one pass. Usually each bundle is processed separately which means
1171 * instructions (and super-instructions) can not cross borders of the bundle.
1172 */
1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
1174 end_of_bundle = data + size;
1175 else
1176 end_of_bundle = data + kBundleSize;
1177
1178 /*
1179 * Main loop. Here we process the data array bundle-after-bundle.
1180 * Ragel-produced DFA does all the checks with one exception: direct jumps.
1181 * It collects the two arrays: valid_targets and jump_dests which are used
1182 * to test direct jumps later.
1183 */
1184 for (current_position = data;
1185 current_position < data + size;
1186 current_position = end_of_bundle,
1187 end_of_bundle = current_position + kBundleSize) {
1188 /* Start of the instruction being processed. */
1189 const uint8_t *instruction_begin = current_position;
1190 /* Only used locally in the end_of_instruction_cleanup action. */
1191 const uint8_t *instruction_end;
1192 int current_state;
1193 uint32_t instruction_info_collected = 0;
1194 /* Keeps one byte of information per operand in the current instruction:
1195 * 2 bits for register kinds,
1196 * 5 bits for register numbers (16 regs plus RIZ). */
1197 uint32_t operand_states = 0;
1198 enum OperandName base = NO_REG;
1199 enum OperandName index = NO_REG;
1200 enum OperandName restricted_register =
1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
1202 uint8_t rex_prefix = FALSE;
1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
1204 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
1205 uint8_t vex_prefix3 = 0x00;
1206
1207 %% write init;
1208 %% write exec;
1209
1210 /*
1211 * Ragel DFA accepted the bundle, but we still need to make sure the last
1212 * instruction haven't left %rbp or %rsp in restricted state.
1213 */
1214 if (restricted_register == REG_RBP)
1215 result &= user_callback(end_of_bundle, end_of_bundle,
1216 RESTRICTED_RBP_UNPROCESSED |
1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
1218 RESTRICTED_REGISTER_MASK), callback_data);
1219 else if (restricted_register == REG_RSP)
1220 result &= user_callback(end_of_bundle, end_of_bundle,
1221 RESTRICTED_RSP_UNPROCESSED |
1222 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
1223 RESTRICTED_REGISTER_MASK), callback_data);
1224 }
1225
1226 /*
1227 * Check the direct jumps. All the targets from jump_dests must be in
1228 * valid_targets.
1229 */
1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
1231 user_callback, callback_data);
1232
1233 /* We only use malloc for a large code sequences */
1234 if (jump_dests != &jump_dests_small) free(jump_dests);
1235 if (valid_targets != &valid_targets_small) free(valid_targets);
1236 if (!result) errno = EINVAL;
1237 return result;
1238 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698