| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include <asm/unistd.h> |
| 6 #include <bits/wordsize.h> |
| 7 #include <errno.h> |
| 8 #include <stdarg.h> |
| 9 |
| 10 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 11 #include "sandbox/linux/seccomp-bpf/syscall.h" |
| 12 |
| 13 |
| 14 namespace playground2 { |
| 15 |
| 16 asm( // We need to be able to tell the kernel exactly where we made a |
| 17 // system call. The C++ compiler likes to sometimes clone or |
| 18 // inline code, which would inadvertently end up duplicating |
| 19 // the entry point. |
| 20 // "gcc" can suppress code duplication with suitable function |
| 21 // attributes, but "clang" doesn't have this ability. |
| 22 // The "clang" developer mailing list suggested that the correct |
| 23 // and portable solution is a file-scope assembly block. |
| 24 // N.B. We do mark our code as a proper function so that backtraces |
| 25 // work correctly. But we make absolutely no attempt to use the |
| 26 // ABI's calling conventions for passing arguments. We will only |
| 27 // ever be called from assembly code and thus can pick more |
| 28 // suitable calling conventions. |
| 29 #if defined(__i386__) |
| 30 ".text\n" |
| 31 ".align 16, 0x90\n" |
| 32 ".type SyscallAsm, @function\n" |
| 33 "SyscallAsm:.cfi_startproc\n" |
| 34 // Check if "%eax" is negative. If so, do not attempt to make a |
| 35 // system call. Instead, compute the return address that is visible |
| 36 // to the kernel after we execute "int $0x80". This address can be |
| 37 // used as a marker that BPF code inspects. |
| 38 "test %eax, %eax\n" |
| 39 "jge 1f\n" |
| 40 // Always, make sure that our code is position-independent, or |
| 41 // address space randomization might not work on i386. This means, |
| 42 // we can't use "lea", but instead have to rely on "call/pop". |
| 43 "call 0f; .cfi_adjust_cfa_offset 4\n" |
| 44 "0:pop %eax; .cfi_adjust_cfa_offset -4\n" |
| 45 "addl $2f-0b, %eax\n" |
| 46 "ret\n" |
| 47 // Save register that we don't want to clobber. On i386, we need to |
| 48 // save relatively aggressively, as there are a couple or registers |
| 49 // that are used internally (e.g. %ebx for position-independent |
| 50 // code, and %ebp for the frame pointer), and as we need to keep at |
| 51 // least a few registers available for the register allocator. |
| 52 "1:push %esi; .cfi_adjust_cfa_offset 4\n" |
| 53 "push %edi; .cfi_adjust_cfa_offset 4\n" |
| 54 "push %ebx; .cfi_adjust_cfa_offset 4\n" |
| 55 "push %ebp; .cfi_adjust_cfa_offset 4\n" |
| 56 // Copy entries from the array holding the arguments into the |
| 57 // correct CPU registers. |
| 58 "movl 0(%edi), %ebx\n" |
| 59 "movl 4(%edi), %ecx\n" |
| 60 "movl 8(%edi), %edx\n" |
| 61 "movl 12(%edi), %esi\n" |
| 62 "movl 20(%edi), %ebp\n" |
| 63 "movl 16(%edi), %edi\n" |
| 64 // Enter the kernel. |
| 65 "int $0x80\n" |
| 66 // This is our "magic" return address that the BPF filter sees. |
| 67 "2:" |
| 68 // Restore any clobbered registers that we didn't declare to the |
| 69 // compiler. |
| 70 "pop %ebp; .cfi_adjust_cfa_offset -4\n" |
| 71 "pop %ebx; .cfi_adjust_cfa_offset -4\n" |
| 72 "pop %edi; .cfi_adjust_cfa_offset -4\n" |
| 73 "pop %esi; .cfi_adjust_cfa_offset -4\n" |
| 74 "ret\n" |
| 75 ".cfi_endproc\n" |
| 76 "9:.size SyscallAsm, 9b-SyscallAsm\n" |
| 77 #elif defined(__x86_64__) |
| 78 ".text\n" |
| 79 ".align 16, 0x90\n" |
| 80 ".type SyscallAsm, @function\n" |
| 81 "SyscallAsm:.cfi_startproc\n" |
| 82 // Check if "%rax" is negative. If so, do not attempt to make a |
| 83 // system call. Instead, compute the return address that is visible |
| 84 // to the kernel after we execute "syscall". This address can be |
| 85 // used as a marker that BPF code inspects. |
| 86 "test %rax, %rax\n" |
| 87 "jge 1f\n" |
| 88 // Always make sure that our code is position-independent, or the |
| 89 // linker will throw a hissy fit on x86-64. |
| 90 "call 0f; .cfi_adjust_cfa_offset 8\n" |
| 91 "0:pop %rax; .cfi_adjust_cfa_offset -8\n" |
| 92 "addq $2f-0b, %rax\n" |
| 93 "ret\n" |
| 94 // We declared all clobbered registers to the compiler. On x86-64, |
| 95 // there really isn't much of a problem with register pressure. So, |
| 96 // we can go ahead and directly copy the entries from the arguments |
| 97 // array into the appropriate CPU registers. |
| 98 "1:movq 0(%r12), %rdi\n" |
| 99 "movq 8(%r12), %rsi\n" |
| 100 "movq 16(%r12), %rdx\n" |
| 101 "movq 24(%r12), %r10\n" |
| 102 "movq 32(%r12), %r8\n" |
| 103 "movq 40(%r12), %r9\n" |
| 104 // Enter the kernel. |
| 105 "syscall\n" |
| 106 // This is our "magic" return address that the BPF filter sees. |
| 107 "2:ret\n" |
| 108 ".cfi_endproc\n" |
| 109 "9:.size SyscallAsm, 9b-SyscallAsm\n" |
| 110 #elif defined(__arm__) |
| 111 // Throughout this file, we use the same mode (ARM vs. thumb) |
| 112 // that the C++ compiler uses. This means, when transfering control |
| 113 // from C++ to assembly code, we do not need to switch modes (e.g. |
| 114 // by using the "bx" instruction). It also means that our assembly |
| 115 // code should not be invoked directly from code that lives in |
| 116 // other compilation units, as we don't bother implementing thumb |
| 117 // interworking. That's OK, as we don't make any of the assembly |
| 118 // symbols public. They are all local to this file. |
| 119 ".text\n" |
| 120 ".align 2\n" |
| 121 ".type SyscallAsm, %function\n" |
| 122 #if defined(__thumb__) |
| 123 ".thumb_func\n" |
| 124 #else |
| 125 ".arm\n" |
| 126 #endif |
| 127 "SyscallAsm:.fnstart\n" |
| 128 "@ args = 0, pretend = 0, frame = 8\n" |
| 129 "@ frame_needed = 1, uses_anonymous_args = 0\n" |
| 130 #if defined(__thumb__) |
| 131 ".cfi_startproc\n" |
| 132 "push {r7, lr}\n" |
| 133 ".cfi_offset 14, -4\n" |
| 134 ".cfi_offset 7, -8\n" |
| 135 "mov r7, sp\n" |
| 136 ".cfi_def_cfa_register 7\n" |
| 137 ".cfi_def_cfa_offset 8\n" |
| 138 #else |
| 139 "stmfd sp!, {fp, lr}\n" |
| 140 "add fp, sp, #4\n" |
| 141 #endif |
| 142 // Check if "r0" is negative. If so, do not attempt to make a |
| 143 // system call. Instead, compute the return address that is visible |
| 144 // to the kernel after we execute "swi 0". This address can be |
| 145 // used as a marker that BPF code inspects. |
| 146 "cmp r0, #0\n" |
| 147 "bge 1f\n" |
| 148 "ldr r0, =2f\n" |
| 149 "b 2f\n" |
| 150 // We declared (almost) all clobbered registers to the compiler. On |
| 151 // ARM there is no particular register pressure. So, we can go |
| 152 // ahead and directly copy the entries from the arguments array |
| 153 // into the appropriate CPU registers. |
| 154 "1:ldr r5, [r6, #20]\n" |
| 155 "ldr r4, [r6, #16]\n" |
| 156 "ldr r3, [r6, #12]\n" |
| 157 "ldr r2, [r6, #8]\n" |
| 158 "ldr r1, [r6, #4]\n" |
| 159 "mov r7, r0\n" |
| 160 "ldr r0, [r6, #0]\n" |
| 161 // Enter the kernel |
| 162 "swi 0\n" |
| 163 // Restore the frame pointer. Also restore the program counter from |
| 164 // the link register; this makes us return to the caller. |
| 165 #if defined(__thumb__) |
| 166 "2:pop {r7, pc}\n" |
| 167 ".cfi_endproc\n" |
| 168 #else |
| 169 "2:ldmfd sp!, {fp, pc}\n" |
| 170 #endif |
| 171 ".fnend\n" |
| 172 "9:.size SyscallAsm, 9b-SyscallAsm\n" |
| 173 #endif |
| 174 ); // asm |
| 175 |
| 176 intptr_t SandboxSyscall(int nr, ...) { |
| 177 // It is most convenient for the caller to pass a variadic list of arguments. |
| 178 // But this is difficult to handle in assembly code without making |
| 179 // assumptions about internal implementation details of "va_list". So, we |
| 180 // first use C code to copy all the arguments into an array, where they are |
| 181 // easily accessible to asm(). |
| 182 // This is preferable over copying them into individual variables, which |
| 183 // can result in too much register pressure. |
| 184 if (sizeof(void *)*8 != __WORDSIZE) { |
| 185 SANDBOX_DIE("This can't happen! " |
| 186 "__WORDSIZE doesn't agree with actual size"); |
| 187 } |
| 188 void *args[6]; |
| 189 va_list ap; |
| 190 |
| 191 // System calls take a system call number (typically passed in %eax or |
| 192 // %rax) and up to six arguments (passed in general-purpose CPU registers). |
| 193 // |
| 194 // On 32bit systems, all variadic arguments are passed on the stack as 32bit |
| 195 // quantities. We can use an arbitrary 32bit type to retrieve them with |
| 196 // va_arg() and then forward them to the kernel in the appropriate CPU |
| 197 // register. We do not need to know whether this is an integer or a pointer |
| 198 // value. |
| 199 // |
| 200 // On 64bit systems, variadic arguments can be either 32bit or 64bit wide, |
| 201 // which would seem to make it more important that we pass the correct type |
| 202 // to va_arg(). And we really can't know what this type is unless we have a |
| 203 // table with function signatures for all system calls. |
| 204 // |
| 205 // Fortunately, on x86-64 this is less critical. The first six function |
| 206 // arguments will be passed in CPU registers, no matter whether they were |
| 207 // named or variadic. This only leaves us with a single argument (if present) |
| 208 // that could be passed on the stack. And since x86-64 is little endian, |
| 209 // it will have the correct value both for 32bit and 64bit quantities. |
| 210 // |
| 211 // N.B. Because of how the x86-64 ABI works, it is possible that 32bit |
| 212 // quantities will have undefined garbage bits in the upper 32 bits of a |
| 213 // 64bit register. This is relatively unlikely for the first five system |
| 214 // call arguments, as the processor does automatic sign extensions and zero |
| 215 // filling so frequently, there rarely is garbage in CPU registers. But it |
| 216 // is quite likely for the last argument, which is passed on the stack. |
| 217 // That's generally OK, because the kernel has the correct function |
| 218 // signatures and knows to only inspect the LSB of a 32bit value. |
| 219 // But callers must be careful in cases, where the compiler cannot tell |
| 220 // the difference (e.g. when passing NULL to any system call, it must |
| 221 // always be cast to a pointer type). |
| 222 // The glibc implementation of syscall() has the exact same issues. |
| 223 // In the unlikely event that this ever becomes a problem, we could add |
| 224 // code that handles six-argument system calls specially. The number of |
| 225 // system calls that take six arguments and expect a 32bit value in the |
| 226 // sixth argument is very limited. |
| 227 va_start(ap, nr); |
| 228 args[0] = va_arg(ap, void *); |
| 229 args[1] = va_arg(ap, void *); |
| 230 args[2] = va_arg(ap, void *); |
| 231 args[3] = va_arg(ap, void *); |
| 232 args[4] = va_arg(ap, void *); |
| 233 args[5] = va_arg(ap, void *); |
| 234 va_end(ap); |
| 235 |
| 236 // Invoke our file-scope assembly code. The constraints have been picked |
| 237 // carefully to match what the rest of the assembly code expects in input, |
| 238 // output, and clobbered registers. |
| 239 #if defined(__i386__) |
| 240 intptr_t ret = nr; |
| 241 asm volatile( |
| 242 "call SyscallAsm\n" |
| 243 // N.B. These are not the calling conventions normally used by the ABI. |
| 244 : "=a"(ret) |
| 245 : "0"(ret), "D"(args) |
| 246 : "esp", "memory", "ecx", "edx"); |
| 247 #elif defined(__x86_64__) |
| 248 intptr_t ret = nr; |
| 249 { |
| 250 register void **data __asm__("r12") = args; |
| 251 asm volatile( |
| 252 "call SyscallAsm\n" |
| 253 // N.B. These are not the calling conventions normally used by the ABI. |
| 254 : "=a"(ret) |
| 255 : "0"(ret), "r"(data) |
| 256 : "rsp", "memory", |
| 257 "rcx", "rdi", "rsi", "rdx", "r8", "r9", "r10", "r11"); |
| 258 } |
| 259 #elif defined(__arm__) |
| 260 intptr_t ret; |
| 261 { |
| 262 register intptr_t inout __asm__("r0") = nr; |
| 263 register void **data __asm__("r6") = args; |
| 264 asm volatile( |
| 265 "bl SyscallAsm\n" |
| 266 // N.B. These are not the calling conventions normally used by the ABI. |
| 267 : "=r"(inout) |
| 268 : "0"(inout), "r"(data) |
| 269 : "lr", "memory", "r1", "r2", "r3", "r4", "r5" |
| 270 #if !defined(__arm__) |
| 271 // In thumb mode, we cannot use "r7" as a general purpose register, as |
| 272 // it is our frame pointer. We have to manually manage and preserve it. |
| 273 // In ARM mode, we have a dedicated frame pointer register and "r7" is |
| 274 // thus available as a general purpose register. We don't preserve it, |
| 275 // but instead mark it as clobbered. |
| 276 , "r7" |
| 277 #endif |
| 278 ); |
| 279 ret = inout; |
| 280 } |
| 281 #else |
| 282 errno = ENOSYS; |
| 283 intptr_t ret = -1; |
| 284 #endif |
| 285 return ret; |
| 286 } |
| 287 |
| 288 } // namespace |
| OLD | NEW |