Chromium Code Reviews| Index: sandbox/linux/seccomp-bpf/sandbox_bpf.cc |
| diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc |
| index eb03995904f362cf589289bc7139577a472958e0..86ff16c6f1d9c80aa99d1c6057d49dff4a6c88aa 100644 |
| --- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc |
| +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc |
| @@ -4,6 +4,7 @@ |
| #include "sandbox/linux/seccomp-bpf/codegen.h" |
| #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| +#include "sandbox/linux/seccomp-bpf/syscall.h" |
| #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" |
| #include "sandbox/linux/seccomp-bpf/verifier.h" |
| @@ -319,6 +320,48 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, |
| return; |
| } |
| +void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { |
| + if (BPF_CLASS(insn->code) == BPF_RET && |
| + insn->k > SECCOMP_RET_TRAP && |
| + insn->k <= SECCOMP_RET_TRAP+trapArraySize_) { |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Better to do insn->k - SECCOMP_RET_TRAP <= trapArr
|
| + const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; |
| + if (!err.safe_) { |
| + bool *is_unsafe = static_cast<bool *>(aux); |
| + *is_unsafe = true; |
| + } |
| + } |
| +} |
| + |
| +void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { |
| + // When inside an UnsafeTrap() callback, we want to allow all system calls. |
| + // This means, we must conditionally disable the sandbox -- and that's not |
| + // something that kernel-side BPF filters can do, as they cannot inspect |
| + // any state other than the syscall arguments. |
| + // But if we redirect all error handlers to user-space, then we can easily |
| + // make this decision. |
| + // The performance penalty for this extra round-trip to user-space is not |
| + // actually that bad, as we only ever pay it for denied system calls; and a |
| + // typical program has very few of these. |
| + if (BPF_CLASS(insn->code) == BPF_RET && |
| + (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
| + insn->k = Trap(ReturnErrno, |
| + reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); |
| + } |
| +} |
| + |
| +ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { |
| + // We need to replicate the behavior of RedirectToUserspace(), so that our |
| + // Verifier can still work correctly. |
| + Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); |
| + const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); |
| + ErrorCode err = evaluator.first(sysnum, evaluator.second); |
| + if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
| + return Trap(ReturnErrno, |
| + reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); |
| + } |
| + return err; |
| +} |
| + |
| void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { |
| if (status_ == STATUS_ENABLED) { |
| SANDBOX_DIE("Cannot change policy after sandbox has started"); |
| @@ -337,8 +380,8 @@ void Sandbox::installFilter(bool quiet) { |
| // Set new SIGSYS handler |
| struct sigaction sa; |
| memset(&sa, 0, sizeof(sa)); |
| - sa.sa_sigaction = &sigSys; |
| - sa.sa_flags = SA_SIGINFO; |
| + sa.sa_sigaction = sigSys; |
| + sa.sa_flags = SA_SIGINFO | SA_NODEFER; |
| if (sigaction(SIGSYS, &sa, NULL) < 0) { |
| goto filter_failed; |
| } |
| @@ -369,33 +412,13 @@ void Sandbox::installFilter(bool quiet) { |
| Instruction *head = |
| gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| offsetof(struct arch_seccomp_data, arch), |
| - gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
| tail = |
| - // Grab the system call number, so that we can implement jump tables. |
| - gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| - offsetof(struct arch_seccomp_data, nr)), |
| + gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
| + NULL, |
| gen->MakeInstruction(BPF_RET+BPF_K, |
| Kill( |
| "Invalid audit architecture in BPF filter").err_))); |
| - // On Intel architectures, verify that system call numbers are in the |
| - // expected number range. The older i386 and x86-64 APIs clear bit 30 |
| - // on all system calls. The newer x32 API always sets bit 30. |
| -#if defined(__i386__) || defined(__x86_64__) |
| - Instruction *invalidX32 = |
| - gen->MakeInstruction(BPF_RET+BPF_K, |
| - Kill("Illegal mixing of system call ABIs").err_); |
| - Instruction *checkX32 = |
| -#if defined(__x86_64__) && defined(__ILP32__) |
| - gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); |
| -#else |
| - gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); |
| -#endif |
| - gen->JoinInstructions(tail, checkX32); |
| - tail = checkX32; |
| -#endif |
| - |
| - |
| { |
| // Evaluate all possible system calls and group their ErrorCodes into |
| // ranges of identical codes. |
| @@ -406,6 +429,108 @@ void Sandbox::installFilter(bool quiet) { |
| Instruction *jumptable = |
| assembleJumpTable(gen, ranges.begin(), ranges.end()); |
| + // If there is at least one UnsafeTrap() in our program, the entire sandbox |
| + // is unsafe. We need to modify the program so that all non- |
| + // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then |
| + // allow us to temporarily disable sandboxing rules inside of callbacks to |
| + // UnsafeTrap(). |
| + has_unsafe_traps_ = false; |
| + gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); |
| + |
| + // Grab the system call number, so that we can implement jump tables. |
| + Instruction *load_nr = |
| + gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| + offsetof(struct arch_seccomp_data, nr)); |
| + |
| + // If our BPF program has unsafe jumps, enable support for them. This |
| + // test happens very early in the BPF filter program. Even before we |
| + // consider looking at system call numbers. |
| + // As support for unsafe jumps essentially defeats all the security |
| + // measures that the sandbox provides, we print a big warning message -- |
| + // and of course, we make sure to only ever enable this feature if it |
| + // is actually requested by the sandbox policy. |
| + if (has_unsafe_traps_) { |
| + if (Syscall(-1) == -1 && errno == ENOSYS) { |
| + SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this " |
| + "architecture"); |
| + } |
| + |
| + EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| + void *aux = evaluators_.begin()->second; |
| + if (!evaluateSyscall(__NR_rt_sigprocmask, aux). |
| + Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || |
| + !evaluateSyscall(__NR_rt_sigreturn, aux). |
| + Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| +#if defined(__NR_sigprocmask) |
| + || !evaluateSyscall(__NR_sigprocmask, aux). |
| + Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| +#endif |
| +#if defined(__NR_sigreturn) |
| + || !evaluateSyscall(__NR_sigreturn, aux). |
| + Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| +#endif |
| + ) { |
| + SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " |
| + "unconditionally allow sigreturn() and sigprocmask()"); |
| + } |
| + |
| + SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); |
| + gen->Traverse(jumptable, RedirectToUserspace, NULL); |
| + |
| + // Allow system calls, if they originate from our magic return address |
| + // (which we can query by calling Syscall(-1)). |
| + uintptr_t syscall_entry_point = static_cast<uintptr_t>(Syscall(-1)); |
| + uint32_t low = static_cast<uint32_t>(syscall_entry_point); |
| +#if __SIZEOF_POINTER__ > 4 |
| + uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); |
| +#endif |
| + |
| + // BPF cannot do native 64bit comparisons. On 64bit architectures, we |
| + // have to compare both 32bit halfs of the instruction pointer. If they |
| + // match what we expect, we return ERR_ALLOWED. If either or both don't |
| + // match, we continue evalutating the rest of the sandbox policy. |
| + Instruction *escape_hatch = |
| + gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| + offsetof(struct arch_seccomp_data, |
| + instruction_pointer) + |
| + (__SIZEOF_POINTER__ > 4 && |
| + __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0), |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
As agreed offline, please add an #error on __BIG_E
|
| + gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, |
| +#if __SIZEOF_POINTER__ > 4 |
| + gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| + offsetof(struct arch_seccomp_data, |
| + instruction_pointer) + |
| + (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4), |
| + gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, |
| +#endif |
| + gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), |
| +#if __SIZEOF_POINTER__ > 4 |
| + load_nr)), |
| +#endif |
| + load_nr)); |
| + gen->JoinInstructions(tail, escape_hatch); |
| + } else { |
| + gen->JoinInstructions(tail, load_nr); |
| + } |
| + tail = load_nr; |
| + |
| + // On Intel architectures, verify that system call numbers are in the |
| + // expected number range. The older i386 and x86-64 APIs clear bit 30 |
| + // on all system calls. The newer x32 API always sets bit 30. |
| +#if defined(__i386__) || defined(__x86_64__) |
| + Instruction *invalidX32 = |
| + gen->MakeInstruction(BPF_RET+BPF_K, |
| + Kill("Illegal mixing of system call ABIs").err_); |
| + Instruction *checkX32 = |
| +#if defined(__x86_64__) && defined(__ILP32__) |
| + gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); |
| +#else |
| + gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); |
| +#endif |
| + gen->JoinInstructions(tail, checkX32); |
| + tail = checkX32; |
| +#endif |
| + |
| // Append jump table to our pre-amble |
| gen->JoinInstructions(tail, jumptable); |
| } |
| @@ -419,9 +544,20 @@ void Sandbox::installFilter(bool quiet) { |
| // correctly. Otherwise, there is an internal error in our BPF compiler. |
| // There is really nothing the caller can do until the bug is fixed. |
| #ifndef NDEBUG |
| - const char *err = NULL; |
| - if (!Verifier::VerifyBPF(*program, evaluators_, &err)) { |
| - SANDBOX_DIE(err); |
| + { |
| + // If we previously rewrote the BPF program so that it calls user-space |
| + // whenever we return an "errno" value from the filter, then we have to |
| + // wrap our system call evaluator to perform the same operation. Otherwise, |
| + // the verifier would also report a mismatch in return codes. |
| + Evaluators redirected_evaluators; |
| + redirected_evaluators.push_back( |
| + std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
nit: two more spaces for indent.
|
| + |
| + const char *err = NULL; |
| + if (!Verifier::VerifyBPF(*program, |
| + has_unsafe_traps_ ? redirected_evaluators : evaluators_, &err)) { |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Nit: ident arguments together.
|
| + SANDBOX_DIE(err); |
| + } |
| } |
| #endif |
| @@ -444,7 +580,6 @@ void Sandbox::installFilter(bool quiet) { |
| // Release memory that is no longer needed |
| evaluators_.clear(); |
| - errMap_.clear(); |
| #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
| // Valgrind is really not happy about our sandbox. Disable it when running |
| @@ -561,27 +696,56 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| goto sigsys_err; |
| } |
| - // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
| - // is what we are showing to TrapFnc callbacks that the system call evaluator |
| - // registered with the sandbox. |
| - struct arch_seccomp_data data = { |
| - sigsys.nr, |
| - SECCOMP_ARCH, |
| - reinterpret_cast<uint64_t>(sigsys.ip), |
| - { |
| - static_cast<uint64_t>(SECCOMP_PARM1(ctx)), |
| - static_cast<uint64_t>(SECCOMP_PARM2(ctx)), |
| - static_cast<uint64_t>(SECCOMP_PARM3(ctx)), |
| - static_cast<uint64_t>(SECCOMP_PARM4(ctx)), |
| - static_cast<uint64_t>(SECCOMP_PARM5(ctx)), |
| - static_cast<uint64_t>(SECCOMP_PARM6(ctx)) |
| + // We need to tell whether we are performing a "normal" callback, or |
| + // whether we were called recursively from within a UnsafeTrap() callback. |
| + // This is a little tricky to do, because we need to somehow get access to |
| + // per-thread data from within a signal context. Normal TLS storage is not |
| + // safely accessible at this time. We could roll our own, but that involves |
| + // a lot of complexity. Instead, we co-opt one bit in the signal mask. |
| + // If BUS is blocked, we assume that we have been called recursively. |
| + // There is a possibility for collision with other code that needs to do |
| + // this, but in practice the risks are low. |
| + intptr_t rc; |
| + if (has_unsafe_traps_ && |
| + sigismember(&ctx->uc_sigmask, SIGBUS)) { |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
This is pretty hack-ish. It's ok-ish since it's a
|
| + errno = old_errno; |
| + if (sigsys.nr == __NR_clone) { |
| + SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); |
| + } |
| + rc = Syscall(sigsys.nr, |
| + SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), |
| + SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), |
| + SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); |
| + } else { |
| + const ErrorCode& err = trapArray_[info->si_errno - 1]; |
| + if (!err.safe_) { |
| + sigset_t mask; |
| + sigemptyset(&mask); |
| + sigaddset(&mask, SIGBUS); |
| + sigprocmask(SIG_BLOCK, &mask, NULL); |
| } |
| - }; |
| - // Now call the TrapFnc callback associated with this particular instance |
| - // of SECCOMP_RET_TRAP. |
| - const ErrorCode& err = trapArray_[info->si_errno - 1]; |
| - intptr_t rc = err.fnc_(data, err.aux_); |
| + // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
| + // is what we are showing to TrapFnc callbacks that the system call |
| + // evaluator registered with the sandbox. |
| + struct arch_seccomp_data data = { |
| + sigsys.nr, |
| + SECCOMP_ARCH, |
| + reinterpret_cast<uint64_t>(sigsys.ip), |
| + { |
| + static_cast<uint64_t>(SECCOMP_PARM1(ctx)), |
| + static_cast<uint64_t>(SECCOMP_PARM2(ctx)), |
| + static_cast<uint64_t>(SECCOMP_PARM3(ctx)), |
| + static_cast<uint64_t>(SECCOMP_PARM4(ctx)), |
| + static_cast<uint64_t>(SECCOMP_PARM5(ctx)), |
| + static_cast<uint64_t>(SECCOMP_PARM6(ctx)) |
| + } |
| + }; |
| + |
| + // Now call the TrapFnc callback associated with this particular instance |
| + // of SECCOMP_RET_TRAP. |
| + rc = err.fnc_(data, err.aux_); |
| + } |
| // Update the CPU register that stores the return code of the system call |
| // that we just handled, and restore "errno" to the value that it had |
| @@ -592,10 +756,21 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| return; |
| } |
| -ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
| +bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const { |
| + if (fnc != o.fnc) { |
| + return fnc < o.fnc; |
| + } else if (aux != o.aux) { |
| + return aux < o.aux; |
| + } else { |
| + return safe < o.safe; |
| + } |
| +} |
| + |
| +ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, |
| + bool safe) { |
| // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
| // of a SECCOMP_RET_TRAP. |
| - std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux); |
| + TrapKey key(fnc, aux, safe); |
| TrapIds::const_iterator iter = trapIds_.find(key); |
| uint16_t id; |
| if (iter != trapIds_.end()) { |
| @@ -618,7 +793,7 @@ ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
| } |
| id = traps_->size() + 1; |
| - traps_->push_back(ErrorCode(fnc, aux, id)); |
| + traps_->push_back(ErrorCode(fnc, aux, safe, id)); |
| trapIds_[key] = id; |
| // We want to access the traps_ vector from our signal handler. But |
| @@ -629,10 +804,23 @@ ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
| // signal handler, where we can safely do so. |
| trapArray_ = &(*traps_)[0]; |
| trapArraySize_ = id; |
| + return traps_->back(); |
| } |
| - ErrorCode err = ErrorCode(fnc, aux, id); |
| - return errMap_[err.err()] = err; |
| + return ErrorCode(fnc, aux, safe, id); |
| +} |
| + |
| +ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
| + return MakeTrap(fnc, aux, true); |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Add a comment near true "/* Safe trap */"
|
| +} |
| + |
| +ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) { |
| + return MakeTrap(fnc, aux, false); |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Same here.
|
| +} |
| + |
| +intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { |
| + int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; |
| + return -err; |
| } |
| intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { |
| @@ -646,10 +834,10 @@ ErrorCode Sandbox::Kill(const char *msg) { |
| Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| int Sandbox::proc_fd_ = -1; |
| Sandbox::Evaluators Sandbox::evaluators_; |
| -Sandbox::ErrMap Sandbox::errMap_; |
| Sandbox::Traps *Sandbox::traps_ = NULL; |
| Sandbox::TrapIds Sandbox::trapIds_; |
| ErrorCode *Sandbox::trapArray_ = NULL; |
| size_t Sandbox::trapArraySize_ = 0; |
| + bool Sandbox::has_unsafe_traps_ = false; |
| } // namespace |