Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1007)

Unified Diff: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl

Issue 11000033: Move validator_x86_XX.rl out of unreviewed. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/trusted/validator_ragel/unreviewed/validator_x86_32.rl ('k') | src/trusted/validator_ragel/validator.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/trusted/validator_ragel/unreviewed/validator_x86_64.rl
===================================================================
--- src/trusted/validator_ragel/unreviewed/validator_x86_64.rl (revision 11037)
+++ src/trusted/validator_ragel/unreviewed/validator_x86_64.rl (working copy)
@@ -1,1236 +0,0 @@
-/*
- * Copyright (c) 2012 The Native Client Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-/*
- * This is the core of amd64-mode validator. Please note that this file
- * combines ragel machine description and C language actions. Please read
- * validator_internals.html first to understand how the whole thing is built:
- * it explains how the byte sequences are constructed, what constructs like
- * "@{}" or "REX_WRX?" mean, etc.
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "native_client/src/trusted/validator_ragel/bitmap.h"
-#include "native_client/src/trusted/validator_ragel/unreviewed/validator_internal.h"
-
-%%{
- machine x86_64_validator;
- alphtype unsigned char;
- variable p current_position;
- variable pe end_of_bundle;
- variable eof end_of_bundle;
- variable cs current_state;
-
- include byte_machine "byte_machines.rl";
-
- include prefixes_parsing_validator
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include rex_actions
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include rex_parsing
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include vex_actions_amd64
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include vex_parsing_amd64
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include displacement_fields_parsing
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include modrm_actions_amd64
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include modrm_parsing
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include operand_format_actions
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include operand_source_actions_amd64
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include immediate_fields_parsing
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include relative_fields_validator_actions
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include relative_fields_parsing
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
- include cpuid_actions
- "native_client/src/trusted/validator_ragel/parse_instruction.rl";
-
- action check_access {
- CheckAccess(instruction_begin - data, base, index, restricted_register,
- valid_targets, &instruction_info_collected);
- }
-
- # Action which marks last byte as not immediate. Most 3DNow! instructions,
- # some AVX and XOP instructions have this proerty. It's referenced by
- # decode_x86_32 machine in [autogenerated] "validator_x86_32_instruction.rl"
- # file.
- action last_byte_is_not_immediate {
- instruction_info_collected |= LAST_BYTE_IS_NOT_IMMEDIATE;
- }
-
- action modifiable_instruction {
- instruction_info_collected |= MODIFIABLE_INSTRUCTION;
- }
-
- action process_0_operands {
- Process0Operands(&restricted_register, &instruction_info_collected);
- }
- action process_1_operand {
- Process1Operand(&restricted_register, &instruction_info_collected,
- rex_prefix, operand_states);
- }
- action process_1_operand_zero_extends {
- Process1OperandZeroExtends(&restricted_register,
- &instruction_info_collected, rex_prefix,
- operand_states);
- }
- action process_2_operands {
- Process2Operands(&restricted_register, &instruction_info_collected,
- rex_prefix, operand_states);
- }
- action process_2_operands_zero_extends {
- Process2OperandsZeroExtends(&restricted_register,
- &instruction_info_collected, rex_prefix,
- operand_states);
- }
-
- include decode_x86_64 "validator_x86_64_instruction.rl";
-
- # Special %rbp modifications - the ones which don't need a sandboxing.
- #
- # Note that there are two different opcodes for "mov": in x86-64 there are two
- # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
- # from REG field to RM or in the other direction thus there are two encodings
- # for the register-to-register move.
- rbp_modifications =
- (b_0100_10x0 0x89 0xe5 | # mov %rsp,%rbp
- b_0100_10x0 0x8b 0xec) # mov %rsp,%rbp
- @process_0_operands;
-
- # Special instructions used for %rbp sandboxing.
- #
- # This is the "second half" of the %rbp sandboxing. Any zero-extending
- # instruction which stores the data in %ebp can be first half, but unlike
- # the situation with other "normal" registers you can not just write to
- # %ebp and continue: such activity MUST restore the status quo immediately
- # via one of these instructions.
- rbp_sandboxing =
- (b_0100_11x0 0x01 0xfd | # add %r15,%rbp
- b_0100_10x1 0x03 0xef | # add %r15,%rbp
- # Note that unlike %rsp case, there is no 'lea (%rbp,%r15,1),%rbp'
- # instruction (it gets assembled as 'lea 0x00(%rbp,%r15,1),%rbp').
- 0x4a 0x8d 0x6c 0x3d 0x00 | # lea 0x00(%rbp,%r15,1),%rbp
- 0x4a 0x8d 0xac 0x3d 0x00 0x00 0x00 0x00) # lea 0x00000000(%rbp,%r15,1),%rbp
- # Note: restricted_register keeps the restricted register as explained in
- # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
- #
- # "Normal" instructions can not be used in a place where %rbp is restricted.
- # But since these instructions are "second half" of the %rbp sandboxing they
- # can be used *only* when %rbp is restricted.
- #
- # That is (normal instruction):
- # mov %eax,%ebp
- # mov %esi,%edi <- Error: %ebp is restricted
- # vs
- # mov %esi,%edi
- # add %r15,%rbp <- Error: %ebp is *not* restricted
- # vs
- # mov %eax,%ebp
- # add %r15,%rbp <- Ok: %rbp is restricted as it should be
- #
- # Check this precondition and mark the beginning of the instruction as
- # invalid jump for target.
- @{ if (restricted_register == REG_RBP)
- instruction_info_collected |= RESTRICTED_REGISTER_USED;
- else
- instruction_info_collected |= UNRESTRICTED_RBP_PROCESSED;
- restricted_register = NO_REG;
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
- };
-
- # Special %rsp modifications - the ones which don't need a sandboxing.
- #
- # Note that there are two different opcodes for "mov": in x86-64 there are two
- # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
- # from REG field to RM or in the other direction thus there are two encodings
- # for the register-to-register move.
- rsp_modifications =
- (b_0100_10x0 0x89 0xec | # mov %rbp,%rsp
- b_0100_10x0 0x8b 0xe5 | # mov %rbp,%rsp
- # Superfluous bits are not supported:
- # http://code.google.com/p/nativeclient/issues/detail?id=3012
- b_0100_1000 0x83 0xe4 (0x80 .. 0xff)) # and $XXX,%rsp
- @process_0_operands;
-
- # Special instructions used for %rsp sandboxing.
- #
- # This is the "second half" of the %rsp sandboxing. Any zero-extending
- # instruction which stores the data in %esp can be first half, but unlike
- # the situation with other "normal" registers you can not just write to
- # %esp and continue: such activity MUST restore the status quo immediately
- # via one of these instructions.
- rsp_sandboxing =
- (b_0100_11x0 0x01 0xfc | # add %r15,%rsp
- b_0100_10x1 0x03 0xe7 | # add %r15,%rsp
- # OR can be used as well, see
- # http://code.google.com/p/nativeclient/issues/detail?id=3070
- b_0100_11x0 0x09 0xfc | # or %r15,%rsp
- b_0100_10x1 0x0b 0xe7 | # or %r15,%rsp
- 0x4a 0x8d 0x24 0x3c | # lea (%rsp,%r15,1),%rsp
- 0x4a 0x8d 0x64 0x3c 0x00 | # lea 0x00(%rsp,%r15,1),%rsp
- 0x4a 0x8d 0xa4 0x3c 0x00 0x00 0x00 0x00) # lea 0x00000000(%rsp,%r15,1),%rsp
- # Note: restricted_register keeps the restricted register as explained in
- # http://www.chromium.org/nativeclient/design-documents/nacl-sfi-model-on-x86-64-systems
- #
- # "Normal" instructions can not be used in a place where %rsp is restricted.
- # But since these instructions are "second half" of the %rsp sandboxing they
- # can be used *only* when %rsp is restricted.
- #
- # That is (normal instruction):
- # mov %eax,%esp
- # mov %esi,%edi <- Error: %esp is restricted
- # vs
- # mov %esi,%edi
- # add %r15,%rsp <- Error: %esp is *not* restricted
- # vs
- # mov %eax,%esp
- # add %r15,%rsp <- Ok: %rsp is restricted as it should be
- #
- # Check this precondition and mark the beginning of the instruction as
- # invalid jump for target.
- @{ if (restricted_register == REG_RSP)
- instruction_info_collected |= RESTRICTED_REGISTER_USED;
- else
- instruction_info_collected |= UNRESTRICTED_RSP_PROCESSED;
- restricted_register = NO_REG;
- UnmarkValidJumpTarget((instruction_begin - data), valid_targets);
- };
-
- # naclcall or nacljmp. These are three-instruction indirection-jump sequences.
- # and $~0x1f, %eXX
- # and RBASE, %rXX
- # jmpq *%rXX (or: callq *%rXX)
- # Note: first "and $~0x1f, %eXX" is a normal instruction (it can occur not
- # just as part of the naclcall/nacljmp, but also as a standolene instruction).
- #
- # This means that when naclcall_or_nacljmp ragel machine will be combined with
- # "normal_instruction*" regular action process_1_operand_zero_extends will be
- # triggered when main ragel machine will accept "and $~0x1f, %eXX" x86-64
- # instruction. This action will check if %rbp/%rsp is legally modified thus
- # we don't need to duplicate this logic in naclcall_or_nacljmp ragel machine.
- #
- # There are number of variants present which differ by the REX prefix usage:
- # we need to make sure "%eXX" in "and", "%rXX" in "add", and "%eXX" in "jmpq"
- # or "callq" is the same register and it's much simpler to do if one single
- # action handles only fixed number of bytes.
- #
- # Additional complication arises because x86-64 contains two different "add"
- # instruction: with "0x01" and "0x03" opcode. They differ in the direction
- # used: both can encode "add %src_register, %dst_register", but the first one
- # uses field REG of the ModR/M byte for the src and field RM of the ModR/M
- # byte for the dst while last one uses field RM of the ModR/M byte for the src
- # and field REG of the ModR/M byte for dst. Both should be allowed.
- #
- # See AMD/Intel manual for clarification "add" instruction encoding.
- #
- # REGISTER USAGE ABBREVIATIONS:
- # E86: legacy ia32 registers (all eight: %eax to %edi)
- # R86: 64-bit counterparts for legacy 386 registers (%rax to %rdi)
- # E64: 32-bit counterparts for new amd64 registers (%r8d to %r14d)
- # R64: new amd64 registers (only seven: %r8 to %r14)
- # RBASE: %r15 (used as "base of untrusted world" in NaCl for amd64)
- naclcall_or_nacljmp =
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 83 e_ e0 and $~0x1f,E86
- # 3: 4_ 01 f_ add RBASE,R86
- # 6: ff e_ jmpq *R86
- #### INSTRUCTION ONE (three bytes)
- # and $~0x1f, E86
- (0x83 b_11_100_xxx 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R86 (0x01 opcode)
- b_0100_11x0 0x01 b_11_111_xxx
- #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
- # callq R86
- ((REX_WRX? 0xff b_11_010_xxx) |
- #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
- # jmpq R86
- (REX_WRX? 0xff b_11_100_xxx)))
- @{
- ProcessNaclCallOrJmpAddToRMNoRex(&instruction_info_collected,
- &instruction_begin, current_position,
- data, valid_targets);
- } |
-
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 83 e_ e0 and $~0x1f,E86
- # 3: 4_ 03 _f add RBASE,R86
- # 6: ff e_ jmpq *R86
- #### INSTRUCTION ONE (three bytes)
- # and $~0x1f, E86
- (0x83 b_11_100_xxx 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R86 (0x03 opcode)
- b_0100_10x1 0x03 b_11_xxx_111
- #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
- # callq R86
- ((REX_WRX? 0xff b_11_010_xxx) |
- #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
- # jmpq R86
- (REX_WRX? 0xff b_11_100_xxx)))
- @{
- ProcessNaclCallOrJmpAddToRegNoRex(&instruction_info_collected,
- &instruction_begin, current_position,
- data, valid_targets);
- } |
-
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 4_ 83 e_ e0 and $~0x1f,E86
- # 4: 4_ 01 f_ add RBASE,R86
- # 7: ff e_ jmpq *R86
- #### INSTRUCTION ONE (four bytes)
- # and $~0x1f, E86
- ((REX_RX 0x83 b_11_100_xxx 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R86 (0x01 opcode)
- b_0100_11x0 0x01 b_11_111_xxx
- #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
- # callq R86
- ((REX_WRX? 0xff b_11_010_xxx) |
- #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
- # jmpq R86
- (REX_WRX? 0xff b_11_100_xxx))) |
-
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 4_ 83 e_ e0 and $~0x1f,E64
- # 4: 4_ 01 f_ add RBASE,R64
- # 7: 4_ ff e_ jmpq *R64
- #### INSTRUCTION ONE (four bytes)
- # and $~0x1f, E64
- (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R64 (0x01 opcode)
- b_0100_11x1 0x01 (b_11_111_xxx - b_11_111_111)
- #### INSTRUCTION THREE: call (three bytes)
- # callq R64
- ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
- #### INSTRUCTION THREE: jmp (three bytes)
- # jmpq R64
- (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
- @{
- ProcessNaclCallOrJmpAddToRMWithRex(&instruction_info_collected,
- &instruction_begin, current_position,
- data, valid_targets);
- } |
-
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 4_ 83 e_ e0 and $~0x1f,E86
- # 4: 4_ 03 _f add RBASE,R86
- # 7: ff e_ jmpq *R86
- #### INSTRUCTION ONE (four bytes)
- # and $~0x1f, E86
- ((REX_RX 0x83 b_11_100_xxx 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R86 (0x03 opcode)
- b_0100_10x1 0x03 b_11_xxx_111
- #### INSTRUCTION THREE: call (two bytes plus optional REX prefix)
- # callq R86
- ((REX_WRX? 0xff b_11_010_xxx) |
- #### INSTRUCTION THREE: jmp (two bytes plus optional REX prefix)
- # jmpq R86
- (REX_WRX? 0xff b_11_100_xxx))) |
-
- # This block encodes call and jump "superinstruction" of the following form:
- # 0: 4_ 83 e_ e0 and $~0x1f,E64
- # 4: 4_ 03 _f add RBASE,R64
- # 7: 4_ ff e_ jmpq *R64
- #### INSTRUCTION ONE (four bytes)
- # and $~0x1f, E64
- (b_0100_0xx1 0x83 (b_11_100_xxx - b_11_100_111) 0xe0
- #### INSTRUCTION TWO (three bytes)
- # add RBASE, R64 (0x03 opcode)
- b_0100_11x1 0x03 (b_11_xxx_111 - b_11_111_111)
- #### INSTRUCTION THREE: call (three bytes)
- # callq R64
- ((b_0100_xxx1 0xff (b_11_010_xxx - b_11_010_111)) |
- #### INSTRUCTION THREE: jmp (three bytes)
- # jmpq R64
- (b_0100_xxx1 0xff (b_11_100_xxx - b_11_100_111)))))
- @{
- ProcessNaclCallOrJmpAddToRegWithRex(&instruction_info_collected,
- &instruction_begin, current_position,
- data, valid_targets);
- };
-
- # EMMX/SSE/SSE2/AVX instructions which have implicit %ds:(%rsi) operand
-
- # maskmovq %mmX,%mmY (EMMX or SSE)
- maskmovq = REX_WRXB? 0x0f 0xf7 @CPUFeature_EMMXSSE modrm_registers;
-
- # maskmovdqu %xmmX, %xmmY (SSE2)
- maskmovdqu = 0x66 REX_WRXB? 0x0f 0xf7 @CPUFeature_SSE2 modrm_registers;
-
- # vmaskmovdqu %xmmX, %xmmY (AVX)
- vmaskmovdqu = ((0xc4 (VEX_RB & VEX_map00001) b_0_1111_0_01) |
- (0xc5 b_X_1111_0_01)) 0xf7 @CPUFeature_AVX modrm_registers;
-
- mmx_sse_rdi_instruction = maskmovq | maskmovdqu | vmaskmovdqu;
-
- # Temporary fix: for string instructions combination of data16 and rep(ne)
- # prefixes is disallowed to mimic old validator behavior.
- # See http://code.google.com/p/nativeclient/issues/detail?id=1950
-
- # data16rep = (data16 | rep data16 | data16 rep);
- # data16condrep = (data16 | condrep data16 | data16 condrep);
- data16rep = data16;
- data16condrep = data16;
-
- # String instructions which use only %ds:(%rsi)
- string_instruction_rsi_no_rdi =
- (rep? 0xac | # lods %ds:(%rsi),%al
- data16rep 0xad | # lods %ds:(%rsi),%ax
- rep? REXW_NONE? 0xad); # lods %ds:(%rsi),%eax/%rax
-
- # String instructions which use only %ds:(%rdi)
- string_instruction_rdi_no_rsi =
- condrep? 0xae | # scas %es:(%rdi),%al
- data16condrep 0xaf | # scas %es:(%rdi),%ax
- condrep? REXW_NONE? 0xaf | # scas %es:(%rdi),%eax/%rax
-
- rep? 0xaa | # stos %al,%es:(%rdi)
- data16rep 0xab | # stos %ax,%es:(%rdi)
- rep? REXW_NONE? 0xab; # stos %eax/%rax,%es:(%rdi)
-
- # String instructions which use both %ds:(%rsi) and %es:(%rdi)
- string_instruction_rsi_rdi =
- condrep? 0xa6 | # cmpsb %es:(%rdi),%ds:(%rsi)
- data16condrep 0xa7 | # cmpsw %es:(%rdi),%ds:(%rsi)
- condrep? REXW_NONE? 0xa7 | # cmps[lq] %es:(%rdi),%ds:(%rsi)
-
- rep? 0xa4 | # movsb %ds:(%rsi),%es:(%rdi)
- data16rep 0xa5 | # movsw %ds:(%rsi),%es:(%rdi)
- rep? REXW_NONE? 0xa5; # movs[lq] %ds:(%rsi),%es:(%rdi)
-
- # "Superinstruction" which includes %rsi sandboxing.
- #
- # There are two variants which handle spurious REX prefixes.
- #
- # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %edi,%edi": in x86-64
- # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
- # be used to move from REG field to RM or in the other direction thus there
- # are two encodings for the register-to-register move (and since REG and RM
- # are identical here only opcode differs).
- sandbox_instruction_rsi_no_rdi =
- (0x89 | 0x8b) 0xf6 # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
- string_instruction_rsi_no_rdi
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
- } |
-
- REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
- string_instruction_rsi_no_rdi
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
- };
-
- # "Superinstruction" which includes %rdi sandboxing.
- #
- # There are two variants which handle spurious REX prefixes.
- #
- # Note that both "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64
- # there are two fields in ModR/M byte (REG field and RM field) and "mov" may
- # be used to move from REG field to RM or in the other direction thus there
- # are two encodings for the register-to-register move (and since REG and RM
- # are identical here only opcode differs).
- sandbox_instruction_rdi_no_rsi =
- (0x89 | 0x8b) 0xff # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
- (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 2 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
- } |
-
- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
- (string_instruction_rdi_no_rsi | mmx_sse_rdi_instruction)
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 3 /* mov */ + 4 /* lea */, &instruction_begin, data, valid_targets);
- };
-
-
- # "Superinstruction" which includes both %rsi and %rdi sandboxing.
- #
- # There are four variants which handle spurious REX prefixes.
- #
- # Note that both "0x89 0xf6" and "0x8b 0xf6" encode "mov %esi,%esi" while both
- # "0x89 0xff" and "0x8b 0xff" encode "mov %edi,%edi": in x86-64 there are two
- # fields in ModR/M byte (REG field and RM field) and "mov" may be used to move
- # from REG field to RM or in the other direction thus there are two encodings
- # for the register-to-register move (and since REG and RM are identical here
- # only opcode differs).
- sandbox_instruction_rsi_rdi =
- (0x89 | 0x8b) 0xf6 # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
- (0x89 | 0x8b) 0xff # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f # lea (%r15,%rdi,1),%rdi
- string_instruction_rsi_rdi
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 2 /* mov */ + 4 /* lea */ + 2 /* mov */ + 4 /* lea */,
- &instruction_begin, data, valid_targets);
- } |
-
- (((0x89 | 0x8b) 0xf6 # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
- REX_X (0x89 | 0x8b) 0xff # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f) | # lea (%r15,%rdi,1),%rdi
-
- (REX_X (0x89 | 0x8b) 0xf6 # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 # lea (%r15,%rsi,1),%rsi
- (0x89 | 0x8b) 0xff # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f)) # lea (%r15,%rdi,1),%rdi
- string_instruction_rsi_rdi
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 2 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */
- /* == 3 (* mov *) + 4 (* lea *) + 2 (* mov *) + 4 (* lea *) */,
- &instruction_begin, data, valid_targets);
- } |
-
- REX_X (0x89 | 0x8b) 0xf6 . # mov %esi,%esi
- 0x49 0x8d 0x34 0x37 . # lea (%r15,%rsi,1),%rsi
- REX_X (0x89 | 0x8b) 0xff . # mov %edi,%edi
- 0x49 0x8d 0x3c 0x3f . # lea (%r15,%rdi,1),%rdi
- string_instruction_rsi_rdi
- @{
- ExpandSuperinstructionBySandboxingBytes(
- 3 /* mov */ + 4 /* lea */ + 3 /* mov */ + 4 /* lea */,
- &instruction_begin, data, valid_targets);
- };
-
- # All the "special" instructions (== instructions which obey non-standard
- # rules). Three groups:
- # * %rsp/%rsp related instructions (these instructions are special because
- # they must be in the range %r15...%r15+4294967295 except momentarily they
- # can be in the range 0...4294967295)
- # * string instructions (which can not use %r15 as base and thus need special
- # handling both in compiler and validator)
- # * naclcall/nacljmp (indirect jumps need special care)
- special_instruction =
- (rbp_modifications |
- rsp_modifications |
- rbp_sandboxing |
- rsp_sandboxing |
- sandbox_instruction_rsi_no_rdi |
- sandbox_instruction_rdi_no_rsi |
- sandbox_instruction_rsi_rdi |
- naclcall_or_nacljmp)
- # Mark the instruction as special - currently this information is used only
- # in tests, but in the future we may use it for dynamic code modification
- # support.
- @{
- instruction_info_collected |= SPECIAL_INSTRUCTION;
- };
-
- # Remove special instructions which are only allowed in special cases.
- normal_instruction = one_instruction - special_instruction;
-
- # Check if call is properly aligned.
- #
- # For direct call we explicitly encode all variations. For indirect call
- # we accept all the special instructions which ends with register-addressed
- # indirect call.
- call_alignment =
- ((normal_instruction &
- # Direct call
- ((data16 REX_RXB? 0xe8 rel16) |
- (REX_WRXB? 0xe8 rel32) |
- (data16 REXW_RXB 0xe8 rel32))) |
- (special_instruction &
- # Indirect call
- (any* data16? REX_WRXB? 0xff ((opcode_2 | opcode_3) any* &
- modrm_registers))))
- # Call instruction must aligned to the end of bundle. Previously this was
- # strict requirement, today it's just warning to aid with debugging.
- @{
- if (((current_position - data) & kBundleMask) != kBundleMask)
- instruction_info_collected |= BAD_CALL_ALIGNMENT;
- };
-
- # This action calls user's callback (if needed) and cleans up validator's
- # internal state.
- #
- # We call the user callback if there are validation errors or if the
- # CALL_USER_CALLBACK_ON_EACH_INSTRUCTION option is used.
- #
- # After that we move instruction_begin and clean all the variables which
- # only used in the processing of a single instruction (prefixes, operand
- # states and instruction_info_collected).
- action end_of_instruction_cleanup {
- /* Call user-supplied callback. */
- instruction_end = current_position + 1;
- if ((instruction_info_collected & VALIDATION_ERRORS_MASK) ||
- (options & CALL_USER_CALLBACK_ON_EACH_INSTRUCTION)) {
- result &= user_callback(
- instruction_begin, instruction_end,
- instruction_info_collected |
- ((restricted_register << RESTRICTED_REGISTER_SHIFT) &
- RESTRICTED_REGISTER_MASK), callback_data);
- }
-
- /* On successful match the instruction_begin must point to the next byte
- * to be able to report the new offset as the start of instruction
- * causing error. */
- instruction_begin = instruction_end;
-
- /* Mark start of the next instruction as a valid target for jump.
- * Note: we mark start of the next instruction here, not start of the
- * current one because memory access check should be able to clear this
- * bit when restricted register is used. */
- MarkValidJumpTarget(instruction_begin - data, valid_targets);
-
- /* Clear variables. */
- instruction_info_collected = 0;
- SET_REX_PREFIX(FALSE);
- /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
- SET_VEX_PREFIX2(VEX_R | VEX_X | VEX_B);
- SET_VEX_PREFIX3(0x00);
- operand_states = 0;
- base = 0;
- index = 0;
- }
-
- # This action reports fatal error detected by DFA.
- action report_fatal_error {
- result &= user_callback(instruction_begin, current_position,
- UNRECOGNIZED_INSTRUCTION, callback_data);
- /*
- * Process the next bundle: "continue" here is for the "for" cycle in
- * the ValidateChunkAMD64 function.
- *
- * It does not affect the case which we really care about (when code
- * is validatable), but makes it possible to detect more errors in one
- * run in tools like ncval.
- */
- continue;
- }
-
- # This is main ragel machine: it does 99% of validation work. There are only
- # one thing to do with bundle if this machine accepts the bundle:
- # * check for the state of the restricted_register at the end of the bundle.
- # It's an error is %rbp or %rsp is restricted at the end of the bundle.
- # Additionally if all the bundles are fine you need to check that direct jumps
- # are corect. Thiis is done in the following way:
- # * DFA fills two arrays: valid_targets and jump_dests.
- # * ProcessInvalidJumpTargets checks that "jump_dests & !valid_targets == 0".
- # All other checks are done here.
-
- main := ((call_alignment | normal_instruction | special_instruction)
- @end_of_instruction_cleanup)*
- $!report_fatal_error;
-
-}%%
-
-%% write data;
-
-enum OperandKind {
- OPERAND_SANDBOX_IRRELEVANT = 0,
- /*
- * Currently we do not distinguish 8bit and 16bit modifications from
- * OPERAND_SANDBOX_UNRESTRICTED to match the behavior of the old validator.
- *
- * 8bit operands must be distinguished from other types because the REX prefix
- * regulates the choice between %ah and %spl, as well as %ch and %bpl.
- */
- OPERAND_SANDBOX_8BIT,
- OPERAND_SANDBOX_RESTRICTED,
- OPERAND_SANDBOX_UNRESTRICTED
-};
-
-#define SET_OPERAND_NAME(N, S) operand_states |= ((S) << ((N) << 3))
-#define SET_OPERAND_FORMAT(N, T) SET_OPERAND_FORMAT_ ## T(N)
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_8_BIT(N) \
- operand_states |= OPERAND_SANDBOX_8BIT << (5 + ((N) << 3))
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_16_BIT(N) \
- operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_32_BIT(N) \
- operand_states |= OPERAND_SANDBOX_RESTRICTED << (5 + ((N) << 3))
-#define SET_OPERAND_FORMAT_OPERAND_FORMAT_64_BIT(N) \
- operand_states |= OPERAND_SANDBOX_UNRESTRICTED << (5 + ((N) << 3))
-#define CHECK_OPERAND(N, S, T) \
- ((operand_states & (0xff << ((N) << 3))) == ((S | (T << 5)) << ((N) << 3)))
-
-static INLINE void CheckAccess(ptrdiff_t instruction_begin,
- enum OperandName base,
- enum OperandName index,
- uint8_t restricted_register,
- bitmap_word *valid_targets,
- uint32_t *instruction_info_collected) {
- if ((base == REG_RIP) || (base == REG_R15) ||
- (base == REG_RSP) || (base == REG_RBP)) {
- if ((index == NO_REG) || (index == REG_RIZ))
- { /* do nothing. */ }
- else if (index == restricted_register)
- BitmapClearBit(valid_targets, instruction_begin),
- *instruction_info_collected |= RESTRICTED_REGISTER_USED;
- else
- *instruction_info_collected |= UNRESTRICTED_INDEX_REGISTER;
- } else {
- *instruction_info_collected |= FORBIDDEN_BASE_REGISTER;
- }
-}
-
-
-static INLINE void Process0Operands(enum OperandName *restricted_register,
- uint32_t *instruction_info_collected) {
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
- * instruction, not with regular instruction. */
- if (*restricted_register == REG_RSP) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (*restricted_register == REG_RBP) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- *restricted_register = NO_REG;
-}
-
-static INLINE void Process1Operand(enum OperandName *restricted_register,
- uint32_t *instruction_info_collected,
- uint8_t rex_prefix,
- uint32_t operand_states) {
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
- * instruction, not with regular instruction. */
- if (*restricted_register == REG_RSP) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (*restricted_register == REG_RBP) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- *restricted_register = NO_REG;
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= R15_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= BPL_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= SPL_MODIFIED;
- }
-}
-
-static INLINE void Process1OperandZeroExtends(
- enum OperandName *restricted_register,
- uint32_t *instruction_info_collected,
- uint8_t rex_prefix,
- uint32_t operand_states) {
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
- * instruction, not with regular instruction. */
- if (*restricted_register == REG_RSP) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (*restricted_register == REG_RBP) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- *restricted_register = NO_REG;
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= R15_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= BPL_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= SPL_MODIFIED;
- /* Take 2 bits of operand type from operand_states as *restricted_register,
- * make sure operand_states denotes a register (4th bit == 0). */
- } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
- *restricted_register = operand_states & 0x0f;
- }
-}
-
-static INLINE void Process2Operands(enum OperandName *restricted_register,
- uint32_t *instruction_info_collected,
- uint8_t rex_prefix,
- uint32_t operand_states) {
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
- * instruction, not with regular instruction. */
- if (*restricted_register == REG_RSP) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (*restricted_register == REG_RBP) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- *restricted_register = NO_REG;
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= R15_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
- (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= BPL_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
- (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= SPL_MODIFIED;
- }
-}
-
-static INLINE void Process2OperandsZeroExtends(
- enum OperandName *restricted_register,
- uint32_t *instruction_info_collected,
- uint8_t rex_prefix,
- uint32_t operand_states) {
- /* Restricted %rsp or %rbp must be processed by appropriate nacl-special
- * instruction, not with regular instruction. */
- if (*restricted_register == REG_RSP) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (*restricted_register == REG_RBP) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- *restricted_register = NO_REG;
- if (CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(0, REG_R15, OPERAND_SANDBOX_UNRESTRICTED) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_8BIT) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_RESTRICTED) ||
- CHECK_OPERAND(1, REG_R15, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= R15_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED) ||
- (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= BPL_MODIFIED;
- } else if ((CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(0, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED) ||
- (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_8BIT) && rex_prefix) ||
- CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_UNRESTRICTED)) {
- *instruction_info_collected |= SPL_MODIFIED;
- /* Take 2 bits of operand type from operand_states as *restricted_register,
- * make sure operand_states denotes a register (4th bit == 0). */
- } else if ((operand_states & 0x70) == (OPERAND_SANDBOX_RESTRICTED << 5)) {
- *restricted_register = operand_states & 0x0f;
- if (CHECK_OPERAND(1, REG_RSP, OPERAND_SANDBOX_RESTRICTED)) {
- *instruction_info_collected |= RESTRICTED_RSP_UNPROCESSED;
- } else if (CHECK_OPERAND(1, REG_RBP, OPERAND_SANDBOX_RESTRICTED)) {
- *instruction_info_collected |= RESTRICTED_RBP_UNPROCESSED;
- }
- /* Take 2 bits of operand type from operand_states as *restricted_register,
- * make sure operand_states denotes a register (12th bit == 0). */
- } else if ((operand_states & 0x7000) == (OPERAND_SANDBOX_RESTRICTED << 13)) {
- *restricted_register = (operand_states & 0x0f00) >> 8;
- }
-}
-
-/*
- * This function merges "dangerous" instruction with sandboxing instructions to
- * get a "superinstruction" and unmarks in-between jump targets.
- */
-static INLINE void ExpandSuperinstructionBySandboxingBytes(
- size_t sandbox_instructions_size,
- const uint8_t **instruction_begin,
- const uint8_t *data,
- bitmap_word *valid_targets) {
- *instruction_begin -= sandbox_instructions_size;
- /*
- * We need to unmark start of the "dangerous" instruction itself, too, but we
- * don't need to mark the beginning of the whole "superinstruction" - that's
- * why we move start by one byte and don't change the length.
- */
- UnmarkValidJumpTargets((*instruction_begin + 1 - data),
- sandbox_instructions_size,
- valid_targets);
-}
-
-/*
- * Return TRUE if naclcall or nacljmp uses the same register in all three
- * instructions.
- *
- * This version is for the case where "add %src_register, %dst_register" with
- * dst in RM field and src in REG field of ModR/M byte is used.
- *
- * There are five possible forms:
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 01 fX add RBASE,R86
- * 6: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 01 fX add RBASE,R86
- * 7: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 01 fX add RBASE,R86
- * 6: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 01 fX add RBASE,R86
- * 7: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E64
- * 4: 4? 01 fX add RBASE,R64
- * 7: 4? ff eX jmpq *R64
- * ^ ^
- * instruction_begin current_position
- *
- * We don't care about "?" (they are checked by DFA).
- */
-static INLINE Bool VerifyNaclCallOrJmpAddToRM(const uint8_t *instruction_begin,
- const uint8_t *current_position) {
- return
- RMFromModRM(instruction_begin[-5]) == RMFromModRM(instruction_begin[-1]) &&
- RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
-}
-
-/*
- * Return TRUE if naclcall or nacljmp uses the same register in all three
- * instructions.
- *
- * This version is for the case where "add %src_register, %dst_register" with
- * dst in REG field and src in RM field of ModR/M byte is used.
- *
- * There are five possible forms:
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 03 Xf add RBASE,R86
- * 6: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 03 Xf add RBASE,R86
- * 7: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 03 Xf add RBASE,R86
- * 6: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 03 Xf add RBASE,R86
- * 7: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E64
- * 4: 4? 03 Xf add RBASE,R64
- * 7: 4? ff eX jmpq *R64
- * ^ ^
- * instruction_begin current_position
- *
- * We don't care about "?" (they are checked by DFA).
- */
-static INLINE Bool VerifyNaclCallOrJmpAddToReg(
- const uint8_t *instruction_begin,
- const uint8_t *current_position) {
- return
- RMFromModRM(instruction_begin[-5]) == RegFromModRM(instruction_begin[-1]) &&
- RMFromModRM(instruction_begin[-5]) == RMFromModRM(current_position[0]);
-}
-
-/*
- * This function checks that naclcall or nacljmp are correct (that is: three
- * component instructions match) and if that is true then it merges call or jmp
- * with a sandboxing to get a "superinstruction" and removes in-between jump
- * targets. If it's not true then it triggers "unrecognized instruction" error
- * condition.
- *
- * This version is for the case where "add with dst register in RM field"
- * (opcode 0x01) and "add without REX prefix" is used.
- *
- * There are two possibile forms:
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 01 fX add RBASE,R86
- * 6: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 01 fX add RBASE,R86
- * 6: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- */
-static INLINE void ProcessNaclCallOrJmpAddToRMNoRex(
- uint32_t *instruction_info_collected,
- const uint8_t **instruction_begin,
- const uint8_t *current_position,
- const uint8_t *data,
- bitmap_word *valid_targets) {
- if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
- ExpandSuperinstructionBySandboxingBytes(
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
- else
- *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
-}
-
-/*
- * This function checks that naclcall or nacljmp are correct (that is: three
- * component instructions match) and if that is true then it merges call or jmp
- * with a sandboxing to get a "superinstruction" and removes in-between jump
- * targets. If it's not true then it triggers "unrecognized instruction" error
- * condition.
- *
- * This version is for the case where "add with dst register in REG field"
- * (opcode 0x03) and "add without REX prefix" is used.
- *
- * There are two possibile forms:
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 03 Xf add RBASE,R86
- * 6: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 83 eX e0 and $~0x1f,E86
- * 3: 4? 03 Xf add RBASE,R86
- * 6: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- */
-static INLINE void ProcessNaclCallOrJmpAddToRegNoRex(
- uint32_t *instruction_info_collected,
- const uint8_t **instruction_begin,
- const uint8_t *current_position,
- const uint8_t *data,
- bitmap_word *valid_targets) {
- if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
- ExpandSuperinstructionBySandboxingBytes(
- 3 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
- else
- *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
-}
-
-/*
- * This function checks that naclcall or nacljmp are correct (that is: three
- * component instructions match) and if that is true then it merges call or jmp
- * with a sandboxing to get a "superinstruction" and removes in-between jump
- * targets. If it's not true then it triggers "unrecognized instruction" error
- * condition.
- *
- * This version is for the case where "add with dst register in RM field"
- * (opcode 0x01) and "add without REX prefix" is used.
- *
- * There are three possibile forms:
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 01 fX add RBASE,R86
- * 7: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 01 fX add RBASE,R86
- * 7: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E64
- * 4: 4? 01 fX add RBASE,R64
- * 7: 4? ff eX jmpq *R64
- * ^ ^
- * instruction_begin current_position
- */
-static INLINE void ProcessNaclCallOrJmpAddToRMWithRex(
- uint32_t *instruction_info_collected,
- const uint8_t **instruction_begin,
- const uint8_t *current_position,
- const uint8_t *data,
- bitmap_word *valid_targets) {
- if (VerifyNaclCallOrJmpAddToRM(*instruction_begin, current_position))
- ExpandSuperinstructionBySandboxingBytes(
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
- else
- *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
-}
-
-/*
- * This function checks that naclcall or nacljmp are correct (that is: three
- * component instructions match) and if that is true then it merges call or jmp
- * with a sandboxing to get a "superinstruction" and removes in-between jump
- * targets. If it's not true then it triggers "unrecognized instruction" error
- * condition.
- *
- * This version is for the case where "add with dst register in REG field"
- * (opcode 0x03) and "add without REX prefix" is used.
- *
- * There are three possibile forms:
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 03 Xf add RBASE,R86
- * 7: ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E86
- * 4: 4? 03 Xf add RBASE,R86
- * 7: 4? ff eX jmpq *R86
- * ^ ^
- * instruction_begin current_position
- *
- * 0: 4? 83 eX e0 and $~0x1f,E64
- * 4: 4? 03 Xf add RBASE,R64
- * 7: 4? ff eX jmpq *R64
- * ^ ^
- * instruction_begin current_position
- */
-static INLINE void ProcessNaclCallOrJmpAddToRegWithRex(
- uint32_t *instruction_info_collected,
- const uint8_t **instruction_begin,
- const uint8_t *current_position,
- const uint8_t *data,
- bitmap_word *valid_targets) {
- if (VerifyNaclCallOrJmpAddToReg(*instruction_begin, current_position))
- ExpandSuperinstructionBySandboxingBytes(
- 4 /* and */ + 3 /* add */, instruction_begin, data, valid_targets);
- else
- *instruction_info_collected |= UNRECOGNIZED_INSTRUCTION;
-}
-
-
-Bool ValidateChunkAMD64(const uint8_t *data, size_t size,
- uint32_t options,
- const NaClCPUFeaturesX86 *cpu_features,
- ValidationCallbackFunc user_callback,
- void *callback_data) {
- bitmap_word valid_targets_small;
- bitmap_word jump_dests_small;
- bitmap_word *valid_targets;
- bitmap_word *jump_dests;
- const uint8_t *current_position;
- const uint8_t *end_of_bundle;
- int result = TRUE;
-
- CHECK(sizeof valid_targets_small == sizeof jump_dests_small);
- CHECK(size % kBundleSize == 0);
-
- /*
- * For a very small sequences (one bundle) malloc is too expensive.
- *
- * Note1: we allocate one extra bit, because we set valid jump target bits
- * _after_ instructions, so there will be one at the end of the chunk.
- *
- * Note2: we don't ever mark first bit as a valid jump target but this is
- * not a problem because any aligned address is valid jump target.
- */
- if ((size + 1) <= (sizeof valid_targets_small * 8)) {
- valid_targets_small = 0;
- valid_targets = &valid_targets_small;
- jump_dests_small = 0;
- jump_dests = &jump_dests_small;
- } else {
- valid_targets = BitmapAllocate(size + 1);
- jump_dests = BitmapAllocate(size + 1);
- if (!valid_targets || !jump_dests) {
- free(jump_dests);
- free(valid_targets);
- errno = ENOMEM;
- return FALSE;
- }
- }
-
- /*
- * This option is usually used in tests: we will process the whole chunk
- * in one pass. Usually each bundle is processed separately which means
- * instructions (and super-instructions) can not cross borders of the bundle.
- */
- if (options & PROCESS_CHUNK_AS_A_CONTIGUOUS_STREAM)
- end_of_bundle = data + size;
- else
- end_of_bundle = data + kBundleSize;
-
- /*
- * Main loop. Here we process the data array bundle-after-bundle.
- * Ragel-produced DFA does all the checks with one exception: direct jumps.
- * It collects the two arrays: valid_targets and jump_dests which are used
- * to test direct jumps later.
- */
- for (current_position = data;
- current_position < data + size;
- current_position = end_of_bundle,
- end_of_bundle = current_position + kBundleSize) {
- /* Start of the instruction being processed. */
- const uint8_t *instruction_begin = current_position;
- /* Only used locally in the end_of_instruction_cleanup action. */
- const uint8_t *instruction_end;
- int current_state;
- uint32_t instruction_info_collected = 0;
- /* Keeps one byte of information per operand in the current instruction:
- * 2 bits for register kinds,
- * 5 bits for register numbers (16 regs plus RIZ). */
- uint32_t operand_states = 0;
- enum OperandName base = NO_REG;
- enum OperandName index = NO_REG;
- enum OperandName restricted_register =
- EXTRACT_RESTRICTED_REGISTER_INITIAL_VALUE(options);
- uint8_t rex_prefix = FALSE;
- /* Top three bits of VEX2 are inverted: see AMD/Intel manual. */
- uint8_t vex_prefix2 = VEX_R | VEX_X | VEX_B;
- uint8_t vex_prefix3 = 0x00;
-
- %% write init;
- %% write exec;
-
- /*
- * Ragel DFA accepted the bundle, but we still need to make sure the last
- * instruction haven't left %rbp or %rsp in restricted state.
- */
- if (restricted_register == REG_RBP)
- result &= user_callback(end_of_bundle, end_of_bundle,
- RESTRICTED_RBP_UNPROCESSED |
- ((REG_RBP << RESTRICTED_REGISTER_SHIFT) &
- RESTRICTED_REGISTER_MASK), callback_data);
- else if (restricted_register == REG_RSP)
- result &= user_callback(end_of_bundle, end_of_bundle,
- RESTRICTED_RSP_UNPROCESSED |
- ((REG_RSP << RESTRICTED_REGISTER_SHIFT) &
- RESTRICTED_REGISTER_MASK), callback_data);
- }
-
- /*
- * Check the direct jumps. All the targets from jump_dests must be in
- * valid_targets.
- */
- result &= ProcessInvalidJumpTargets(data, size, valid_targets, jump_dests,
- user_callback, callback_data);
-
- /* We only use malloc for a large code sequences */
- if (jump_dests != &jump_dests_small) free(jump_dests);
- if (valid_targets != &valid_targets_small) free(valid_targets);
- if (!result) errno = EINVAL;
- return result;
-}
« no previous file with comments | « src/trusted/validator_ragel/unreviewed/validator_x86_32.rl ('k') | src/trusted/validator_ragel/validator.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698