OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
3 * Use of this source code is governed by a BSD-style license that can be | |
4 * found in the LICENSE file. | |
5 */ | |
6 | |
7 /* | |
8 * This is the core of amd64-mode validator. Please note that this file | |
9 * combines ragel machine description and C language actions. Please read | |
10 * validator_internals.html first to understand how the whole thing is built: | |
11 * it explains how the byte sequences are constructed, what constructs like | |
12 * "@{}" or "REX_WRX?" mean, etc. | |
13 */ | |
14 | |
15 #include <assert.h> | |
16 #include <errno.h> | |
17 #include <stddef.h> | |
18 #include <stdio.h> | |
19 #include <stdlib.h> | |
20 #include <string.h> | |
21 | |
22 #include "native_client/src/trusted/validator_ragel/bitmap.h" | |
23 #include "native_client/src/trusted/validator_ragel/unreviewed/validator_interna
l.h" | |
24 | |
25 %%{ | |
26 machine x86_64_validator; | |
27 alphtype unsigned char; | |
28 variable p current_position; | |
29 variable pe end_of_bundle; | |
30 variable eof end_of_bundle; | |
31 variable cs current_state; | |
32 | |
33 include byte_machine "byte_machines.rl"; | |
34 | |
35 include prefixes_parsing_validator | |
36 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
37 include rex_actions | |
38 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
39 include rex_parsing | |
40 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
41 include vex_actions_amd64 | |
42 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
43 include vex_parsing_amd64 | |
44 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
45 include displacement_fields_actions | |
46 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
47 include displacement_fields_parsing | |
48 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
49 include modrm_actions_amd64 | |
50 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
51 include modrm_parsing | |
52 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
53 include operand_actions_amd64 | |
54 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
55 include immediate_fields_actions | |
56 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
57 include immediate_fields_parsing_amd64 | |
58 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
59 include relative_fields_validator_actions | |
60 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
61 include relative_fields_parsing | |
62 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
63 include cpuid_actions | |
64 "native_client/src/trusted/validator_ragel/unreviewed/parse_instruction.rl"; | |
65 | |
66 action check_access { | |
67 CheckAccess(instruction_begin - data, base, index, restricted_register, | |
68 valid_targets, &instruction_info_collected); | |
69 } | |
70 | |
71 # Action which marks last byte as not immediate. Most 3DNow! instructions, | |
72 # some AVX and XOP instructions have this proerty. It's referenced by | |
73 # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl" | |
74 # file. | |
75 action last_byte_is_not_immediate { | |
76 instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE; | |
77 } | |
78 | |
79 action modifiable_instruction { | |
80 instruction_info_collected |= MODIFIABLE_INSTRUCTION; | |
81 } | |
82 | |
83 action process_0_operands { | |
84 Process0Operands(&restricted_register, &instruction_info_collected); | |
85 } | |
86 action process_1_operand { | |
87 Process1Operand(&restricted_register, &instruction_info_collected, | |
88 rex_prefix, operand_states); | |
89 } | |
90 action process_1_operand_zero_extends { | |
91 Process1OperandZeroExtends(&restricted_register, | |
92 &instruction_info_collected, rex_prefix, | |
93 operand_states); | |
94 } | |
95 action process_2_operands { | |
96 Process2Operands(&restricted_register, &instruction_info_collected, | |
97 rex_prefix, operand_states); | |
98 } | |
99 action process_2_operands_zero_extends { | |
100 Process2OperandsZeroExtends(&restricted_register, | |
101 &instruction_info_collected, rex_prefix, | |
102 operand_states); | |
103 } | |
104 | |
105 include decode_x86_64 "validator_x86_64_instruction.rl"; | |
106 | |
107 # Special %rbp modifications - the ones which don't need a sandboxing. | |
108 # | |
109 # Note that there are two different opcodes for "mov": in x86-64 there are two | |
110 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
111 # from REG field to RM or in the other direction thus there are two encodings | |
112 # for the register-to-register move. | |
113 rbp_modifications = | |
114 (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp | |
115 b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp | |
116 @process_0_operands; | |
117 | |
118 # Special instructions used for %rbp sandboxing. | |
119 # | |
120 # This is the "second half" of the %rbp sandboxing. Any zero-extending | |
121 # instruction which stores the data in %ebp can be first half, but unlike | |
122 # the situation with other "normal" registers you can not just write to | |
123 # %ebp and continue: such activity MUST restore the status quo immediately | |
124 # via one of these instructions. | |
125 rbp_sandboxing = | |
126 (b_0100_11x0 0x01 0xfd | # add %r15,%rbp | |
127 b_0100_10x1 0x03 0xef | # add %r15,%rbp | |
128 # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp' | |
129 # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp'). | |
130 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp | |
131 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp | |
132 # Note: restricted_register keeps the restricted register as explained in | |
133 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems | |
134 # | |
135 # "Normal" instructions can not be used in a place where %rbp is restricted. | |
136 # But since these instructions are "second half" of the %rbp sandboxing they | |
137 # can be used *only* when %rbp is restricted. | |
138 # | |
139 # That is (normal instruction): | |
140 # mov %eax,%ebp | |
141 # mov %esi,%edi <- Error: %ebp is restricted | |
142 # vs | |
143 # mov %esi,%edi | |
144 # add %r15,%rbp <- Error: %ebp is *not* restricted | |
145 # vs | |
146 # mov %eax,%ebp | |
147 # add %r15,%rbp <- Ok: %rbp is restricted as it should be | |
148 # | |
149 # Check this precondition and mark the beginning of the instruction as | |
150 # invalid jump for target. | |
151 @{ if (restricted_register == REG_RBP) | |
152 instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
153 else | |
154 instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED; | |
155 restricted_register = NO_REG; | |
156 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | |
157 }; | |
158 | |
159 # Special %rsp modifications - the ones which don't need a sandboxing. | |
160 # | |
161 # Note that there are two different opcodes for "mov": in x86-64 there are two | |
162 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
163 # from REG field to RM or in the other direction thus there are two encodings | |
164 # for the register-to-register move. | |
165 rsp_modifications = | |
166 (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp | |
167 b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp | |
168 # Superfluous bits are not supported: | |
169 # http://code.google.com/p/nativeclient/issues/detail?id=3012 | |
170 b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp | |
171 @process_0_operands; | |
172 | |
173 # Special instructions used for %rsp sandboxing. | |
174 # | |
175 # This is the "second half" of the %rsp sandboxing. Any zero-extending | |
176 # instruction which stores the data in %esp can be first half, but unlike | |
177 # the situation with other "normal" registers you can not just write to | |
178 # %esp and continue: such activity MUST restore the status quo immediately | |
179 # via one of these instructions. | |
180 rsp_sandboxing = | |
181 (b_0100_11x0 0x01 0xfc | # add %r15,%rsp | |
182 b_0100_10x1 0x03 0xe7 | # add %r15,%rsp | |
183 # OR can be used as well, see | |
184 # http://code.google.com/p/nativeclient/issues/detail?id=3070 | |
185 b_0100_11x0 0x09 0xfc | # or %r15,%rsp | |
186 b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp | |
187 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp | |
188 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp | |
189 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp | |
190 # Note: restricted_register keeps the restricted register as explained in | |
191 # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x8
6-64-systems | |
192 # | |
193 # "Normal" instructions can not be used in a place where %rsp is restricted. | |
194 # But since these instructions are "second half" of the %rsp sandboxing they | |
195 # can be used *only* when %rsp is restricted. | |
196 # | |
197 # That is (normal instruction): | |
198 # mov %eax,%esp | |
199 # mov %esi,%edi <- Error: %esp is restricted | |
200 # vs | |
201 # mov %esi,%edi | |
202 # add %r15,%rsp <- Error: %esp is *not* restricted | |
203 # vs | |
204 # mov %eax,%esp | |
205 # add %r15,%rsp <- Ok: %rsp is restricted as it should be | |
206 # | |
207 # Check this precondition and mark the beginning of the instruction as | |
208 # invalid jump for target. | |
209 @{ if (restricted_register == REG_RSP) | |
210 instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
211 else | |
212 instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED; | |
213 restricted_register = NO_REG; | |
214 UnmarkValidJumpTarget((instruction_begin - data), valid_targets); | |
215 }; | |
216 | |
217 # naclcall or nacljmp. These are three-instruction indirection-jump sequences. | |
218 # and $~0x1f, %eXX | |
219 # and RBASE, %rXX | |
220 # jmpq *%rXX (or: callq *%rXX) | |
221 # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not | |
222 # just as part of the naclcall/nacljmp, but also as a standolene instruction). | |
223 # | |
224 # This means that when naclcall_or_nacljmp ragel machine will be combined with | |
225 # "normal_instruction*" regular action process_1_operand_zero_extends will be | |
226 # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64 | |
227 # instruction. This action will check if %rbp/%rsp is legally modified thus | |
228 # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine. | |
229 # | |
230 # There are number of variants present which differ by the REX prefix usage: | |
231 # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq" | |
232 # or "callq" is the same register and it's much simpler to do if one single | |
233 # action handles only fixed number of bytes. | |
234 # | |
235 # Additional complication arises because x86-64 contains two different "add" | |
236 # instruction: with "0x01" and "0x03" opcode. They differ in the direction | |
237 # used: both can encode "add %src_register, %dst_register", but the first one | |
238 # uses field REG of the ModR/M byte for the src and field RM of the ModR/M | |
239 # byte for the dst while last one uses field RM of the ModR/M byte for the src | |
240 # and field REG of the ModR/M byte for dst. Both should be allowed. | |
241 # | |
242 # See AMD/Intel manual for clarification "add" instruction encoding. | |
243 # | |
244 # REGISTER USAGE ABBREVIATIONS: | |
245 # E86: legacy ia32 registers (all eight: %eax to %edi) | |
246 # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi) | |
247 # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d) | |
248 # R64: new amd64 registers (only seven: %r8 to %r14) | |
249 # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64) | |
250 naclcall_or_nacljmp = | |
251 # This block encodes call and jump "superinstruction" of the following form: | |
252 # 0: 83 e_ e0 and $~0x1f,E86 | |
253 # 3: 4_ 01 f_ add RBASE,R86 | |
254 # 6: ff e_ jmpq *R86 | |
255 #### INSTRUCTION ONE (three bytes) | |
256 # and $~0x1f, E86 | |
257 (0x83 b_11_100_xxx 0xe0 | |
258 #### INSTRUCTION TWO (three bytes) | |
259 # add RBASE, R86 (0x01 opcode) | |
260 b_0100_11x0 0x01 b_11_111_xxx | |
261 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
262 # callq R86 | |
263 ((REX_WRX? 0xff b_11_010_xxx) | | |
264 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
265 # jmpq R86 | |
266 (REX_WRX? 0xff b_11_100_xxx))) | |
267 @{ | |
268 ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected, | |
269 &instruction_begin, current_position, | |
270 data, valid_targets); | |
271 } | | |
272 | |
273 # This block encodes call and jump "superinstruction" of the following form: | |
274 # 0: 83 e_ e0 and $~0x1f,E86 | |
275 # 3: 4_ 03 _f add RBASE,R86 | |
276 # 6: ff e_ jmpq *R86 | |
277 #### INSTRUCTION ONE (three bytes) | |
278 # and $~0x1f, E86 | |
279 (0x83 b_11_100_xxx 0xe0 | |
280 #### INSTRUCTION TWO (three bytes) | |
281 # add RBASE, R86 (0x03 opcode) | |
282 b_0100_10x1 0x03 b_11_xxx_111 | |
283 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
284 # callq R86 | |
285 ((REX_WRX? 0xff b_11_010_xxx) | | |
286 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
287 # jmpq R86 | |
288 (REX_WRX? 0xff b_11_100_xxx))) | |
289 @{ | |
290 ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected, | |
291 &instruction_begin, current_position, | |
292 data, valid_targets); | |
293 } | | |
294 | |
295 # This block encodes call and jump "superinstruction" of the following form: | |
296 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | |
297 # 4: 4_ 01 f_ add RBASE,R86 | |
298 # 7: ff e_ jmpq *R86 | |
299 #### INSTRUCTION ONE (four bytes) | |
300 # and $~0x1f, E86 | |
301 ((REX_RX 0x83 b_11_100_xxx 0xe0 | |
302 #### INSTRUCTION TWO (three bytes) | |
303 # add RBASE, R86 (0x01 opcode) | |
304 b_0100_11x0 0x01 b_11_111_xxx | |
305 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
306 # callq R86 | |
307 ((REX_WRX? 0xff b_11_010_xxx) | | |
308 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
309 # jmpq R86 | |
310 (REX_WRX? 0xff b_11_100_xxx))) | | |
311 | |
312 # This block encodes call and jump "superinstruction" of the following form: | |
313 # 0: 4_ 83 e_ e0 and $~0x1f,E64 | |
314 # 4: 4_ 01 f_ add RBASE,R64 | |
315 # 7: 4_ ff e_ jmpq *R64 | |
316 #### INSTRUCTION ONE (four bytes) | |
317 # and $~0x1f, E64 | |
318 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0 | |
319 #### INSTRUCTION TWO (three bytes) | |
320 # add RBASE, R64 (0x01 opcode) | |
321 b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111) | |
322 #### INSTRUCTION THREE: call (three bytes) | |
323 # callq R64 | |
324 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | |
325 #### INSTRUCTION THREE: jmp (three bytes) | |
326 # jmpq R64 | |
327 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | |
328 @{ | |
329 ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected, | |
330 &instruction_begin, current_position, | |
331 data, valid_targets); | |
332 } | | |
333 | |
334 # This block encodes call and jump "superinstruction" of the following form: | |
335 # 0: 4_ 83 e_ e0 and $~0x1f,E86 | |
336 # 4: 4_ 03 _f add RBASE,R86 | |
337 # 7: ff e_ jmpq *R86 | |
338 #### INSTRUCTION ONE (four bytes) | |
339 # and $~0x1f, E86 | |
340 ((REX_RX 0x83 b_11_100_xxx 0xe0 | |
341 #### INSTRUCTION TWO (three bytes) | |
342 # add RBASE, R86 (0x03 opcode) | |
343 b_0100_10x1 0x03 b_11_xxx_111 | |
344 #### INSTRUCTION THREE: call (two bytes plus optional REX prefix) | |
345 # callq R86 | |
346 ((REX_WRX? 0xff b_11_010_xxx) | | |
347 #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix) | |
348 # jmpq R86 | |
349 (REX_WRX? 0xff b_11_100_xxx))) | | |
350 | |
351 # This block encodes call and jump "superinstruction" of the following form: | |
352 # 0: 4_ 83 e_ e0 and $~0x1f,E64 | |
353 # 4: 4_ 03 _f add RBASE,R64 | |
354 # 7: 4_ ff e_ jmpq *R64 | |
355 #### INSTRUCTION ONE (four bytes) | |
356 # and $~0x1f, E64 | |
357 (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0 | |
358 #### INSTRUCTION TWO (three bytes) | |
359 # add RBASE, R64 (0x03 opcode) | |
360 b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111) | |
361 #### INSTRUCTION THREE: call (three bytes) | |
362 # callq R64 | |
363 ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) | | |
364 #### INSTRUCTION THREE: jmp (three bytes) | |
365 # jmpq R64 | |
366 (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111))))) | |
367 @{ | |
368 ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected, | |
369 &instruction_begin, current_position, | |
370 data, valid_targets); | |
371 }; | |
372 | |
373 # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand | |
374 | |
375 # maskmovq %mmX,%mmY (EMMX or SSE) | |
376 maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers; | |
377 | |
378 # maskmovdqu %xmmX, %xmmY (SSE2) | |
379 maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers; | |
380 | |
381 # vmaskmovdqu %xmmX, %xmmY (AVX) | |
382 vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) | | |
383 (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers; | |
384 | |
385 mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu; | |
386 | |
387 # Temporary fix: for string instructions combination of data16 and rep(ne) | |
388 # prefixes is disallowed to mimic old validator behavior. | |
389 # See http://code.google.com/p/nativeclient/issues/detail?id=1950 | |
390 | |
391 # data16rep = (data16 | rep data16 | data16 rep); | |
392 # data16condrep = (data16 | condrep data16 | data16 condrep); | |
393 data16rep = data16; | |
394 data16condrep = data16; | |
395 | |
396 # String instructions which use only %ds:(%rsi) | |
397 string_instruction_rsi_no_rdi = | |
398 (rep? 0xac | # lods %ds:(%rsi),%al | |
399 data16rep 0xad | # lods %ds:(%rsi),%ax | |
400 rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax | |
401 | |
402 # String instructions which use only %ds:(%rdi) | |
403 string_instruction_rdi_no_rsi = | |
404 condrep? 0xae | # scas %es:(%rdi),%al | |
405 data16condrep 0xaf | # scas %es:(%rdi),%ax | |
406 condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax | |
407 | |
408 rep? 0xaa | # stos %al,%es:(%rdi) | |
409 data16rep 0xab | # stos %ax,%es:(%rdi) | |
410 rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi) | |
411 | |
412 # String instructions which use both %ds:(%rsi) and %es:(%rdi) | |
413 string_instruction_rsi_rdi = | |
414 condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi) | |
415 data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi) | |
416 condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi) | |
417 | |
418 rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi) | |
419 data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi) | |
420 rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi) | |
421 | |
422 # "Superinstruction" which includes %rsi sandboxing. | |
423 # | |
424 # There are two variants which handle spurious REX prefixes. | |
425 # | |
426 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64 | |
427 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | |
428 # be used to move from REG field to RM or in the other direction thus there | |
429 # are two encodings for the register-to-register move (and since REG and RM | |
430 # are identical here only opcode differs). | |
431 sandbox_instruction_rsi_no_rdi = | |
432 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
433 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
434 string_instruction_rsi_no_rdi | |
435 @{ | |
436 ExpandSuperinstructionBySandboxingBytes( | |
437 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
438 } | | |
439 | |
440 REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
441 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
442 string_instruction_rsi_no_rdi | |
443 @{ | |
444 ExpandSuperinstructionBySandboxingBytes( | |
445 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
446 }; | |
447 | |
448 # "Superinstruction" which includes %rdi sandboxing. | |
449 # | |
450 # There are two variants which handle spurious REX prefixes. | |
451 # | |
452 # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 | |
453 # there are two fields in ModR/M byte (REG field and RM field) and "mov" may | |
454 # be used to move from REG field to RM or in the other direction thus there | |
455 # are two encodings for the register-to-register move (and since REG and RM | |
456 # are identical here only opcode differs). | |
457 sandbox_instruction_rdi_no_rsi = | |
458 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
459 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | |
460 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | |
461 @{ | |
462 ExpandSuperinstructionBySandboxingBytes( | |
463 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
464 } | | |
465 | |
466 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | |
467 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | |
468 (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction) | |
469 @{ | |
470 ExpandSuperinstructionBySandboxingBytes( | |
471 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets); | |
472 }; | |
473 | |
474 | |
475 # "Superinstruction" which includes both %rsi and %rdi sandboxing. | |
476 # | |
477 # There are four variants which handle spurious REX prefixes. | |
478 # | |
479 # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both | |
480 # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two | |
481 # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move | |
482 # from REG field to RM or in the other direction thus there are two encodings | |
483 # for the register-to-register move (and since REG and RM are identical here | |
484 # only opcode differs). | |
485 sandbox_instruction_rsi_rdi = | |
486 (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
487 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
488 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
489 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi | |
490 string_instruction_rsi_rdi | |
491 @{ | |
492 ExpandSuperinstructionBySandboxingBytes( | |
493 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */, | |
494 &instruction_begin, data, valid_targets); | |
495 } | | |
496 | |
497 (((0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
498 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
499 REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi | |
500 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi | |
501 | |
502 (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi | |
503 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi | |
504 (0x89 | 0x8b) 0xff # mov %edi,%edi | |
505 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi | |
506 string_instruction_rsi_rdi | |
507 @{ | |
508 ExpandSuperinstructionBySandboxingBytes( | |
509 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */ | |
510 /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */, | |
511 &instruction_begin, data, valid_targets); | |
512 } | | |
513 | |
514 REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi | |
515 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi | |
516 REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi | |
517 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi | |
518 string_instruction_rsi_rdi | |
519 @{ | |
520 ExpandSuperinstructionBySandboxingBytes( | |
521 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */, | |
522 &instruction_begin, data, valid_targets); | |
523 }; | |
524 | |
525 # All the "special" instructions (== instructions which obey non-standard | |
526 # rules). Three groups: | |
527 # * %rsp/%rsp related instructions (these instructions are special because | |
528 # they must be in the range %r15...%r15+4294967295 except momentarily they | |
529 # can be in the range 0...4294967295) | |
530 # * string instructions (which can not use %r15 as base and thus need special | |
531 # handling both in compiler and validator) | |
532 # * naclcall/nacljmp (indirect jumps need special care) | |
533 special_instruction = | |
534 (rbp_modifications | | |
535 rsp_modifications | | |
536 rbp_sandboxing | | |
537 rsp_sandboxing | | |
538 sandbox_instruction_rsi_no_rdi | | |
539 sandbox_instruction_rdi_no_rsi | | |
540 sandbox_instruction_rsi_rdi | | |
541 naclcall_or_nacljmp) | |
542 # Mark the instruction as special - currently this information is used only | |
543 # in tests, but in the future we may use it for dynamic code modification | |
544 # support. | |
545 @{ | |
546 instruction_info_collected |= SPECIAL_INSTRUCTION; | |
547 }; | |
548 | |
549 # Remove special instructions which are only allowed in special cases. | |
550 normal_instruction = one_instruction - special_instruction; | |
551 | |
552 # Check if call is properly aligned. | |
553 # | |
554 # For direct call we explicitly encode all variations. For indirect call | |
555 # we accept all the special instructions which ends with register-addressed | |
556 # indirect call. | |
557 call_alignment = | |
558 ((normal_instruction & | |
559 # Direct call | |
560 ((data16 REX_RXB? 0xe8 rel16) | | |
561 (REX_WRXB? 0xe8 rel32) | | |
562 (data16 REXW_RXB 0xe8 rel32))) | | |
563 (special_instruction & | |
564 # Indirect call | |
565 (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* & | |
566 modrm_registers)))) | |
567 # Call instruction must aligned to the end of bundle. Previously this was | |
568 # strict requirement, today it's just warning to aid with debugging. | |
569 @{ | |
570 if (((current_position - data) & kBundleMask) != kBundleMask) | |
571 instruction_info_collected |= BAD_CALL_ALIGNMENT; | |
572 }; | |
573 | |
574 # This action calls user's callback (if needed) and cleans up validator's | |
575 # internal state. | |
576 # | |
577 # We call the user callback if there are validation errors or if the | |
578 # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used. | |
579 # | |
580 # After that we move instruction_begin and clean all the variables which | |
581 # only used in the processing of a single instruction (prefixes, operand | |
582 # states and instruction_info_collected). | |
583 action end_of_instruction_cleanup { | |
584 /* Call user-supplied callback. */ | |
585 instruction_end = current_position + 1; | |
586 if ((instruction_info_collected & VALIDATION_ERRORS_MASK) || | |
587 (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) { | |
588 result &= user_callback( | |
589 instruction_begin, instruction_end, | |
590 instruction_info_collected | | |
591 ((restricted_register << RESTRICTED_REGISTER_SHIFT) & | |
592 RESTRICTED_REGISTER_MASK), callback_data); | |
593 } | |
594 | |
595 /* On successful match the instruction_begin must point to the next byte | |
596 * to be able to report the new offset as the start of instruction | |
597 * causing error. */ | |
598 instruction_begin = instruction_end; | |
599 | |
600 /* Mark start of the next instruction as a valid target for jump. | |
601 * Note: we mark start of the next instruction here, not start of the | |
602 * current one because memory access check should be able to clear this | |
603 * bit when restricted register is used. */ | |
604 MarkValidJumpTarget(instruction_begin - data, valid_targets); | |
605 | |
606 /* Clear variables. */ | |
607 instruction_info_collected = 0; | |
608 SET_REX_PREFIX(FALSE); | |
609 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | |
610 SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B); | |
611 SET_VEX_PREFIX3(0x00); | |
612 operand_states = 0; | |
613 base = 0; | |
614 index = 0; | |
615 } | |
616 | |
617 # This action reports fatal error detected by DFA. | |
618 action report_fatal_error { | |
619 result &= user_callback(instruction_begin, current_position, | |
620 UNRECOGNIZED_INSTRUCTION, callback_data); | |
621 /* | |
622 * Process the next bundle: "continue" here is for the "for" cycle in | |
623 * the ValidateChunkAMD64 function. | |
624 * | |
625 * It does not affect the case which we really care about (when code | |
626 * is validatable), but makes it possible to detect more errors in one | |
627 * run in tools like ncval. | |
628 */ | |
629 continue; | |
630 } | |
631 | |
632 # This is main ragel machine: it does 99% of validation work. There are only | |
633 # one thing to do with bundle if this machine accepts the bundle: | |
634 # * check for the state of the restricted_register at the end of the bundle. | |
635 # It's an error is %rbp or %rsp is restricted at the end of the bundle. | |
636 # Additionally if all the bundles are fine you need to check that direct jumps | |
637 # are corect. Thiis is done in the following way: | |
638 # * DFA fills two arrays: valid_targets and jump_dests. | |
639 # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0". | |
640 # All other checks are done here. | |
641 | |
642 main := ((call_alignment | normal_instruction | special_instruction) | |
643 @end_of_instruction_cleanup)* | |
644 $!report_fatal_error; | |
645 | |
646 }%% | |
647 | |
648 %% write data; | |
649 | |
650 enum OperandKind { | |
651 OPERAND_SANDBOX_IRRELEVANT = 0, | |
652 /* | |
653 * Currently we do not distinguish 8bit and 16bit modifications from | |
654 * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator. | |
655 * | |
656 * 8bit operands must be distinguished from other types because the REX prefix | |
657 * regulates the choice between %ah and %spl, as well as %ch and %bpl. | |
658 */ | |
659 OPERAND_SANDBOX_8BIT, | |
660 OPERAND_SANDBOX_RESTRICTED, | |
661 OPERAND_SANDBOX_UNRESTRICTED | |
662 }; | |
663 | |
664 #define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3)) | |
665 #define SET_OPERAND_TYPE(N, T) SET_OPERAND_TYPE_ ## T(N) | |
666 #define SET_OPERAND_TYPE_OPERAND_TYPE_8_BIT(N) \ | |
667 operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3)) | |
668 #define SET_OPERAND_TYPE_OPERAND_TYPE_16_BIT(N) \ | |
669 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | |
670 #define SET_OPERAND_TYPE_OPERAND_TYPE_32_BIT(N) \ | |
671 operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3)) | |
672 #define SET_OPERAND_TYPE_OPERAND_TYPE_64_BIT(N) \ | |
673 operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3)) | |
674 #define CHECK_OPERAND(N, S, T) \ | |
675 ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3))) | |
676 | |
677 static INLINE void CheckAccess(ptrdiff_t instruction_begin, | |
678 enum OperandName base, | |
679 enum OperandName index, | |
680 uint8_t restricted_register, | |
681 bitmap_word *valid_targets, | |
682 uint32_t *instruction_info_collected) { | |
683 if ((base == REG_RIP) || (base == REG_R15) || | |
684 (base == REG_RSP) || (base == REG_RBP)) { | |
685 if ((index == NO_REG) || (index == REG_RIZ)) | |
686 { /* do nothing. */ } | |
687 else if (index == restricted_register) | |
688 BitmapClearBit(valid_targets, instruction_begin), | |
689 *instruction_info_collected |= RESTRICTED_REGISTER_USED; | |
690 else | |
691 *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER; | |
692 } else { | |
693 *instruction_info_collected |= FORBIDDEN_BASE_REGISTER; | |
694 } | |
695 } | |
696 | |
697 | |
698 static INLINE void Process0Operands(enum OperandName *restricted_register, | |
699 uint32_t *instruction_info_collected) { | |
700 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
701 * instruction, not with regular instruction. */ | |
702 if (*restricted_register == REG_RSP) { | |
703 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
704 } else if (*restricted_register == REG_RBP) { | |
705 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
706 } | |
707 *restricted_register = NO_REG; | |
708 } | |
709 | |
710 static INLINE void Process1Operand(enum OperandName *restricted_register, | |
711 uint32_t *instruction_info_collected, | |
712 uint8_t rex_prefix, | |
713 uint32_t operand_states) { | |
714 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
715 * instruction, not with regular instruction. */ | |
716 if (*restricted_register == REG_RSP) { | |
717 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
718 } else if (*restricted_register == REG_RBP) { | |
719 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
720 } | |
721 *restricted_register = NO_REG; | |
722 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
723 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
724 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
725 *instruction_info_collected |= R15_MODIFIED; | |
726 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
727 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
728 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
729 *instruction_info_collected |= BPL_MODIFIED; | |
730 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
731 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
732 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
733 *instruction_info_collected |= SPL_MODIFIED; | |
734 } | |
735 } | |
736 | |
737 static INLINE void Process1OperandZeroExtends( | |
738 enum OperandName *restricted_register, | |
739 uint32_t *instruction_info_collected, | |
740 uint8_t rex_prefix, | |
741 uint32_t operand_states) { | |
742 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
743 * instruction, not with regular instruction. */ | |
744 if (*restricted_register == REG_RSP) { | |
745 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
746 } else if (*restricted_register == REG_RBP) { | |
747 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
748 } | |
749 *restricted_register = NO_REG; | |
750 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
751 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
752 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
753 *instruction_info_collected |= R15_MODIFIED; | |
754 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
755 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
756 *instruction_info_collected |= BPL_MODIFIED; | |
757 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
758 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
759 *instruction_info_collected |= SPL_MODIFIED; | |
760 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
761 * make sure operand_states denotes a register (4th bit == 0). */ | |
762 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { | |
763 *restricted_register = operand_states & 0x0f; | |
764 } | |
765 } | |
766 | |
767 static INLINE void Process2Operands(enum OperandName *restricted_register, | |
768 uint32_t *instruction_info_collected, | |
769 uint8_t rex_prefix, | |
770 uint32_t operand_states) { | |
771 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
772 * instruction, not with regular instruction. */ | |
773 if (*restricted_register == REG_RSP) { | |
774 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
775 } else if (*restricted_register == REG_RBP) { | |
776 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
777 } | |
778 *restricted_register = NO_REG; | |
779 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
780 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
781 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || | |
782 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || | |
783 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
784 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
785 *instruction_info_collected |= R15_MODIFIED; | |
786 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
787 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
788 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || | |
789 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
790 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) || | |
791 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
792 *instruction_info_collected |= BPL_MODIFIED; | |
793 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
794 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
795 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || | |
796 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
797 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) || | |
798 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
799 *instruction_info_collected |= SPL_MODIFIED; | |
800 } | |
801 } | |
802 | |
803 static INLINE void Process2OperandsZeroExtends( | |
804 enum OperandName *restricted_register, | |
805 uint32_t *instruction_info_collected, | |
806 uint8_t rex_prefix, | |
807 uint32_t operand_states) { | |
808 /* Restricted %rsp or %rbp must be processed by appropriate nacl-special | |
809 * instruction, not with regular instruction. */ | |
810 if (*restricted_register == REG_RSP) { | |
811 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
812 } else if (*restricted_register == REG_RBP) { | |
813 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
814 } | |
815 *restricted_register = NO_REG; | |
816 if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) || | |
817 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
818 CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) || | |
819 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) || | |
820 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) || | |
821 CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) { | |
822 *instruction_info_collected |= R15_MODIFIED; | |
823 } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
824 CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) || | |
825 (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
826 CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
827 *instruction_info_collected |= BPL_MODIFIED; | |
828 } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
829 CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) || | |
830 (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) || | |
831 CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) { | |
832 *instruction_info_collected |= SPL_MODIFIED; | |
833 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
834 * make sure operand_states denotes a register (4th bit == 0). */ | |
835 } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) { | |
836 *restricted_register = operand_states & 0x0f; | |
837 if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) { | |
838 *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED; | |
839 } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) { | |
840 *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED; | |
841 } | |
842 /* Take 2 bits of operand type from operand_states as *restricted_register, | |
843 * make sure operand_states denotes a register (12th bit == 0). */ | |
844 } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) { | |
845 *restricted_register = (operand_states & 0x0f00) >> 8; | |
846 } | |
847 } | |
848 | |
849 /* | |
850 * This function merges "dangerous" instruction with sandboxing instructions to | |
851 * get a "superinstruction" and unmarks in-between jump targets. | |
852 */ | |
853 static INLINE void ExpandSuperinstructionBySandboxingBytes( | |
854 size_t sandbox_instructions_size, | |
855 const uint8_t **instruction_begin, | |
856 const uint8_t *data, | |
857 bitmap_word *valid_targets) { | |
858 *instruction_begin -= sandbox_instructions_size; | |
859 /* | |
860 * We need to unmark start of the "dangerous" instruction itself, too, but we | |
861 * don't need to mark the beginning of the whole "superinstruction" - that's | |
862 * why we move start by one byte and don't change the length. | |
863 */ | |
864 UnmarkValidJumpTargets((*instruction_begin + 1 - data), | |
865 sandbox_instructions_size, | |
866 valid_targets); | |
867 } | |
868 | |
869 /* | |
870 * Return TRUE if naclcall or nacljmp uses the same register in all three | |
871 * instructions. | |
872 * | |
873 * This version is for the case where "add %src_register, %dst_register" with | |
874 * dst in RM field and src in REG field of ModR/M byte is used. | |
875 * | |
876 * There are five possible forms: | |
877 * | |
878 * 0: 83 eX e0 and $~0x1f,E86 | |
879 * 3: 4? 01 fX add RBASE,R86 | |
880 * 6: ff eX jmpq *R86 | |
881 * ^ ^ | |
882 * instruction_begin current_position | |
883 * | |
884 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
885 * 4: 4? 01 fX add RBASE,R86 | |
886 * 7: ff eX jmpq *R86 | |
887 * ^ ^ | |
888 * instruction_begin current_position | |
889 * | |
890 * 0: 83 eX e0 and $~0x1f,E86 | |
891 * 3: 4? 01 fX add RBASE,R86 | |
892 * 6: 4? ff eX jmpq *R86 | |
893 * ^ ^ | |
894 * instruction_begin current_position | |
895 * | |
896 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
897 * 4: 4? 01 fX add RBASE,R86 | |
898 * 7: 4? ff eX jmpq *R86 | |
899 * ^ ^ | |
900 * instruction_begin current_position | |
901 * | |
902 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
903 * 4: 4? 01 fX add RBASE,R64 | |
904 * 7: 4? ff eX jmpq *R64 | |
905 * ^ ^ | |
906 * instruction_begin current_position | |
907 * | |
908 * We don't care about "?" (they are checked by DFA). | |
909 */ | |
910 static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin, | |
911 const uint8_t *current_position) { | |
912 return | |
913 RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) && | |
914 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]); | |
915 } | |
916 | |
917 /* | |
918 * Return TRUE if naclcall or nacljmp uses the same register in all three | |
919 * instructions. | |
920 * | |
921 * This version is for the case where "add %src_register, %dst_register" with | |
922 * dst in REG field and src in RM field of ModR/M byte is used. | |
923 * | |
924 * There are five possible forms: | |
925 * | |
926 * 0: 83 eX e0 and $~0x1f,E86 | |
927 * 3: 4? 03 Xf add RBASE,R86 | |
928 * 6: ff eX jmpq *R86 | |
929 * ^ ^ | |
930 * instruction_begin current_position | |
931 * | |
932 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
933 * 4: 4? 03 Xf add RBASE,R86 | |
934 * 7: ff eX jmpq *R86 | |
935 * ^ ^ | |
936 * instruction_begin current_position | |
937 * | |
938 * 0: 83 eX e0 and $~0x1f,E86 | |
939 * 3: 4? 03 Xf add RBASE,R86 | |
940 * 6: 4? ff eX jmpq *R86 | |
941 * ^ ^ | |
942 * instruction_begin current_position | |
943 * | |
944 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
945 * 4: 4? 03 Xf add RBASE,R86 | |
946 * 7: 4? ff eX jmpq *R86 | |
947 * ^ ^ | |
948 * instruction_begin current_position | |
949 * | |
950 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
951 * 4: 4? 03 Xf add RBASE,R64 | |
952 * 7: 4? ff eX jmpq *R64 | |
953 * ^ ^ | |
954 * instruction_begin current_position | |
955 * | |
956 * We don't care about "?" (they are checked by DFA). | |
957 */ | |
958 static INLINE Bool VerifyNaclCallOrJmpAddToReg( | |
959 const uint8_t *instruction_begin, | |
960 const uint8_t *current_position) { | |
961 return | |
962 RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) && | |
963 RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]); | |
964 } | |
965 | |
966 /* | |
967 * This function checks that naclcall or nacljmp are correct (that is: three | |
968 * component instructions match) and if that is true then it merges call or jmp | |
969 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
970 * targets. If it's not true then it triggers "unrecognized instruction" error | |
971 * condition. | |
972 * | |
973 * This version is for the case where "add with dst register in RM field" | |
974 * (opcode 0x01) and "add without REX prefix" is used. | |
975 * | |
976 * There are two possibile forms: | |
977 * | |
978 * 0: 83 eX e0 and $~0x1f,E86 | |
979 * 3: 4? 01 fX add RBASE,R86 | |
980 * 6: ff eX jmpq *R86 | |
981 * ^ ^ | |
982 * instruction_begin current_position | |
983 * | |
984 * 0: 83 eX e0 and $~0x1f,E86 | |
985 * 3: 4? 01 fX add RBASE,R86 | |
986 * 6: 4? ff eX jmpq *R86 | |
987 * ^ ^ | |
988 * instruction_begin current_position | |
989 */ | |
990 static INLINE void ProcessNaclCallOrJmpAddToRMNoRex( | |
991 uint32_t *instruction_info_collected, | |
992 const uint8_t **instruction_begin, | |
993 const uint8_t *current_position, | |
994 const uint8_t *data, | |
995 bitmap_word *valid_targets) { | |
996 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | |
997 ExpandSuperinstructionBySandboxingBytes( | |
998 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
999 else | |
1000 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
1001 } | |
1002 | |
1003 /* | |
1004 * This function checks that naclcall or nacljmp are correct (that is: three | |
1005 * component instructions match) and if that is true then it merges call or jmp | |
1006 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
1007 * targets. If it's not true then it triggers "unrecognized instruction" error | |
1008 * condition. | |
1009 * | |
1010 * This version is for the case where "add with dst register in REG field" | |
1011 * (opcode 0x03) and "add without REX prefix" is used. | |
1012 * | |
1013 * There are two possibile forms: | |
1014 * | |
1015 * 0: 83 eX e0 and $~0x1f,E86 | |
1016 * 3: 4? 03 Xf add RBASE,R86 | |
1017 * 6: ff eX jmpq *R86 | |
1018 * ^ ^ | |
1019 * instruction_begin current_position | |
1020 * | |
1021 * 0: 83 eX e0 and $~0x1f,E86 | |
1022 * 3: 4? 03 Xf add RBASE,R86 | |
1023 * 6: 4? ff eX jmpq *R86 | |
1024 * ^ ^ | |
1025 * instruction_begin current_position | |
1026 */ | |
1027 static INLINE void ProcessNaclCallOrJmpAddToRegNoRex( | |
1028 uint32_t *instruction_info_collected, | |
1029 const uint8_t **instruction_begin, | |
1030 const uint8_t *current_position, | |
1031 const uint8_t *data, | |
1032 bitmap_word *valid_targets) { | |
1033 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | |
1034 ExpandSuperinstructionBySandboxingBytes( | |
1035 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
1036 else | |
1037 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
1038 } | |
1039 | |
1040 /* | |
1041 * This function checks that naclcall or nacljmp are correct (that is: three | |
1042 * component instructions match) and if that is true then it merges call or jmp | |
1043 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
1044 * targets. If it's not true then it triggers "unrecognized instruction" error | |
1045 * condition. | |
1046 * | |
1047 * This version is for the case where "add with dst register in RM field" | |
1048 * (opcode 0x01) and "add without REX prefix" is used. | |
1049 * | |
1050 * There are three possibile forms: | |
1051 * | |
1052 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
1053 * 4: 4? 01 fX add RBASE,R86 | |
1054 * 7: ff eX jmpq *R86 | |
1055 * ^ ^ | |
1056 * instruction_begin current_position | |
1057 * | |
1058 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
1059 * 4: 4? 01 fX add RBASE,R86 | |
1060 * 7: 4? ff eX jmpq *R86 | |
1061 * ^ ^ | |
1062 * instruction_begin current_position | |
1063 * | |
1064 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
1065 * 4: 4? 01 fX add RBASE,R64 | |
1066 * 7: 4? ff eX jmpq *R64 | |
1067 * ^ ^ | |
1068 * instruction_begin current_position | |
1069 */ | |
1070 static INLINE void ProcessNaclCallOrJmpAddToRMWithRex( | |
1071 uint32_t *instruction_info_collected, | |
1072 const uint8_t **instruction_begin, | |
1073 const uint8_t *current_position, | |
1074 const uint8_t *data, | |
1075 bitmap_word *valid_targets) { | |
1076 if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position)) | |
1077 ExpandSuperinstructionBySandboxingBytes( | |
1078 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
1079 else | |
1080 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
1081 } | |
1082 | |
1083 /* | |
1084 * This function checks that naclcall or nacljmp are correct (that is: three | |
1085 * component instructions match) and if that is true then it merges call or jmp | |
1086 * with a sandboxing to get a "superinstruction" and removes in-between jump | |
1087 * targets. If it's not true then it triggers "unrecognized instruction" error | |
1088 * condition. | |
1089 * | |
1090 * This version is for the case where "add with dst register in REG field" | |
1091 * (opcode 0x03) and "add without REX prefix" is used. | |
1092 * | |
1093 * There are three possibile forms: | |
1094 * | |
1095 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
1096 * 4: 4? 03 Xf add RBASE,R86 | |
1097 * 7: ff eX jmpq *R86 | |
1098 * ^ ^ | |
1099 * instruction_begin current_position | |
1100 * | |
1101 * 0: 4? 83 eX e0 and $~0x1f,E86 | |
1102 * 4: 4? 03 Xf add RBASE,R86 | |
1103 * 7: 4? ff eX jmpq *R86 | |
1104 * ^ ^ | |
1105 * instruction_begin current_position | |
1106 * | |
1107 * 0: 4? 83 eX e0 and $~0x1f,E64 | |
1108 * 4: 4? 03 Xf add RBASE,R64 | |
1109 * 7: 4? ff eX jmpq *R64 | |
1110 * ^ ^ | |
1111 * instruction_begin current_position | |
1112 */ | |
1113 static INLINE void ProcessNaclCallOrJmpAddToRegWithRex( | |
1114 uint32_t *instruction_info_collected, | |
1115 const uint8_t **instruction_begin, | |
1116 const uint8_t *current_position, | |
1117 const uint8_t *data, | |
1118 bitmap_word *valid_targets) { | |
1119 if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position)) | |
1120 ExpandSuperinstructionBySandboxingBytes( | |
1121 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets); | |
1122 else | |
1123 *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION; | |
1124 } | |
1125 | |
1126 | |
1127 Bool ValidateChunkAMD64(const uint8_t *data, size_t size, | |
1128 uint32_t options, | |
1129 const NaClCPUFeaturesX86 *cpu_features, | |
1130 ValidationCallbackFunc user_callback, | |
1131 void *callback_data) { | |
1132 bitmap_word valid_targets_small; | |
1133 bitmap_word jump_dests_small; | |
1134 bitmap_word *valid_targets; | |
1135 bitmap_word *jump_dests; | |
1136 const uint8_t *current_position; | |
1137 const uint8_t *end_of_bundle; | |
1138 int result = TRUE; | |
1139 | |
1140 CHECK(sizeof valid_targets_small == sizeof jump_dests_small); | |
1141 CHECK(size % kBundleSize == 0); | |
1142 | |
1143 /* | |
1144 * For a very small sequences (one bundle) malloc is too expensive. | |
1145 * | |
1146 * Note1: we allocate one extra bit, because we set valid jump target bits | |
1147 * _after_ instructions, so there will be one at the end of the chunk. | |
1148 * | |
1149 * Note2: we don't ever mark first bit as a valid jump target but this is | |
1150 * not a problem because any aligned address is valid jump target. | |
1151 */ | |
1152 if ((size + 1) <= (sizeof valid_targets_small * 8)) { | |
1153 valid_targets_small = 0; | |
1154 valid_targets = &valid_targets_small; | |
1155 jump_dests_small = 0; | |
1156 jump_dests = &jump_dests_small; | |
1157 } else { | |
1158 valid_targets = BitmapAllocate(size + 1); | |
1159 jump_dests = BitmapAllocate(size + 1); | |
1160 if (!valid_targets || !jump_dests) { | |
1161 free(jump_dests); | |
1162 free(valid_targets); | |
1163 errno = ENOMEM; | |
1164 return FALSE; | |
1165 } | |
1166 } | |
1167 | |
1168 /* | |
1169 * This option is usually used in tests: we will process the whole chunk | |
1170 * in one pass. Usually each bundle is processed separately which means | |
1171 * instructions (and super-instructions) can not cross borders of the bundle. | |
1172 */ | |
1173 if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM) | |
1174 end_of_bundle = data + size; | |
1175 else | |
1176 end_of_bundle = data + kBundleSize; | |
1177 | |
1178 /* | |
1179 * Main loop. Here we process the data array bundle-after-bundle. | |
1180 * Ragel-produced DFA does all the checks with one exception: direct jumps. | |
1181 * It collects the two arrays: valid_targets and jump_dests which are used | |
1182 * to test direct jumps later. | |
1183 */ | |
1184 for (current_position = data; | |
1185 current_position < data + size; | |
1186 current_position = end_of_bundle, | |
1187 end_of_bundle = current_position + kBundleSize) { | |
1188 /* Start of the instruction being processed. */ | |
1189 const uint8_t *instruction_begin = current_position; | |
1190 /* Only used locally in the end_of_instruction_cleanup action. */ | |
1191 const uint8_t *instruction_end; | |
1192 int current_state; | |
1193 uint32_t instruction_info_collected = 0; | |
1194 /* Keeps one byte of information per operand in the current instruction: | |
1195 * 2 bits for register kinds, | |
1196 * 5 bits for register numbers (16 regs plus RIZ). */ | |
1197 uint32_t operand_states = 0; | |
1198 enum OperandName base = NO_REG; | |
1199 enum OperandName index = NO_REG; | |
1200 enum OperandName restricted_register = | |
1201 EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options); | |
1202 uint8_t rex_prefix = FALSE; | |
1203 /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */ | |
1204 uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B; | |
1205 uint8_t vex_prefix3 = 0x00; | |
1206 | |
1207 %% write init; | |
1208 %% write exec; | |
1209 | |
1210 /* | |
1211 * Ragel DFA accepted the bundle, but we still need to make sure the last | |
1212 * instruction haven't left %rbp or %rsp in restricted state. | |
1213 */ | |
1214 if (restricted_register == REG_RBP) | |
1215 result &= user_callback(end_of_bundle, end_of_bundle, | |
1216 RESTRICTED_RBP_UNPROCESSED | | |
1217 ((REG_RBP << RESTRICTED_REGISTER_SHIFT) & | |
1218 RESTRICTED_REGISTER_MASK), callback_data); | |
1219 else if (restricted_register == REG_RSP) | |
1220 result &= user_callback(end_of_bundle, end_of_bundle, | |
1221 RESTRICTED_RSP_UNPROCESSED | | |
1222 ((REG_RSP << RESTRICTED_REGISTER_SHIFT) & | |
1223 RESTRICTED_REGISTER_MASK), callback_data); | |
1224 } | |
1225 | |
1226 /* | |
1227 * Check the direct jumps. All the targets from jump_dests must be in | |
1228 * valid_targets. | |
1229 */ | |
1230 result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests, | |
1231 user_callback, callback_data); | |
1232 | |
1233 /* We only use malloc for a large code sequences */ | |
1234 if (jump_dests != &jump_dests_small) free(jump_dests); | |
1235 if (valid_targets != &valid_targets_small) free(valid_targets); | |
1236 if (!result) errno = EINVAL; | |
1237 return result; | |
1238 } | |
OLD | NEW |