Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1342)

Unified Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11363212: Added support for greylisting of system calls. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed reviewer's comments Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sandbox/linux/seccomp-bpf/sandbox_bpf.cc
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
index eb03995904f362cf589289bc7139577a472958e0..86ff16c6f1d9c80aa99d1c6057d49dff4a6c88aa 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -4,6 +4,7 @@
#include "sandbox/linux/seccomp-bpf/codegen.h"
#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
#include "sandbox/linux/seccomp-bpf/verifier.h"
@@ -319,6 +320,48 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
return;
}
+void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
+ if (BPF_CLASS(insn->code) == BPF_RET &&
+ insn->k > SECCOMP_RET_TRAP &&
+ insn->k <= SECCOMP_RET_TRAP+trapArraySize_) {
jln (very slow on Chromium) 2012/11/20 01:08:31 Better to do insn->k - SECCOMP_RET_TRAP <= trapArr
+ const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];
+ if (!err.safe_) {
+ bool *is_unsafe = static_cast<bool *>(aux);
+ *is_unsafe = true;
+ }
+ }
+}
+
+void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) {
+ // When inside an UnsafeTrap() callback, we want to allow all system calls.
+ // This means, we must conditionally disable the sandbox -- and that's not
+ // something that kernel-side BPF filters can do, as they cannot inspect
+ // any state other than the syscall arguments.
+ // But if we redirect all error handlers to user-space, then we can easily
+ // make this decision.
+ // The performance penalty for this extra round-trip to user-space is not
+ // actually that bad, as we only ever pay it for denied system calls; and a
+ // typical program has very few of these.
+ if (BPF_CLASS(insn->code) == BPF_RET &&
+ (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
+ insn->k = Trap(ReturnErrno,
+ reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
+ }
+}
+
+ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
+ // We need to replicate the behavior of RedirectToUserspace(), so that our
+ // Verifier can still work correctly.
+ Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
+ const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
+ ErrorCode err = evaluator.first(sysnum, evaluator.second);
+ if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
+ return Trap(ReturnErrno,
+ reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
+ }
+ return err;
+}
+
void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {
if (status_ == STATUS_ENABLED) {
SANDBOX_DIE("Cannot change policy after sandbox has started");
@@ -337,8 +380,8 @@ void Sandbox::installFilter(bool quiet) {
// Set new SIGSYS handler
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = &sigSys;
- sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = sigSys;
+ sa.sa_flags = SA_SIGINFO | SA_NODEFER;
if (sigaction(SIGSYS, &sa, NULL) < 0) {
goto filter_failed;
}
@@ -369,33 +412,13 @@ void Sandbox::installFilter(bool quiet) {
Instruction *head =
gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
offsetof(struct arch_seccomp_data, arch),
- gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
tail =
- // Grab the system call number, so that we can implement jump tables.
- gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
- offsetof(struct arch_seccomp_data, nr)),
+ gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
+ NULL,
gen->MakeInstruction(BPF_RET+BPF_K,
Kill(
"Invalid audit architecture in BPF filter").err_)));
- // On Intel architectures, verify that system call numbers are in the
- // expected number range. The older i386 and x86-64 APIs clear bit 30
- // on all system calls. The newer x32 API always sets bit 30.
-#if defined(__i386__) || defined(__x86_64__)
- Instruction *invalidX32 =
- gen->MakeInstruction(BPF_RET+BPF_K,
- Kill("Illegal mixing of system call ABIs").err_);
- Instruction *checkX32 =
-#if defined(__x86_64__) && defined(__ILP32__)
- gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
-#else
- gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
-#endif
- gen->JoinInstructions(tail, checkX32);
- tail = checkX32;
-#endif
-
-
{
// Evaluate all possible system calls and group their ErrorCodes into
// ranges of identical codes.
@@ -406,6 +429,108 @@ void Sandbox::installFilter(bool quiet) {
Instruction *jumptable =
assembleJumpTable(gen, ranges.begin(), ranges.end());
+ // If there is at least one UnsafeTrap() in our program, the entire sandbox
+ // is unsafe. We need to modify the program so that all non-
+ // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then
+ // allow us to temporarily disable sandboxing rules inside of callbacks to
+ // UnsafeTrap().
+ has_unsafe_traps_ = false;
+ gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);
+
+ // Grab the system call number, so that we can implement jump tables.
+ Instruction *load_nr =
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data, nr));
+
+ // If our BPF program has unsafe jumps, enable support for them. This
+ // test happens very early in the BPF filter program. Even before we
+ // consider looking at system call numbers.
+ // As support for unsafe jumps essentially defeats all the security
+ // measures that the sandbox provides, we print a big warning message --
+ // and of course, we make sure to only ever enable this feature if it
+ // is actually requested by the sandbox policy.
+ if (has_unsafe_traps_) {
+ if (Syscall(-1) == -1 && errno == ENOSYS) {
+ SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "
+ "architecture");
+ }
+
+ EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
+ void *aux = evaluators_.begin()->second;
+ if (!evaluateSyscall(__NR_rt_sigprocmask, aux).
+ Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) ||
+ !evaluateSyscall(__NR_rt_sigreturn, aux).
+ Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
+#if defined(__NR_sigprocmask)
+ || !evaluateSyscall(__NR_sigprocmask, aux).
+ Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
+#endif
+#if defined(__NR_sigreturn)
+ || !evaluateSyscall(__NR_sigreturn, aux).
+ Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
+#endif
+ ) {
+ SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "
+ "unconditionally allow sigreturn() and sigprocmask()");
+ }
+
+ SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
+ gen->Traverse(jumptable, RedirectToUserspace, NULL);
+
+ // Allow system calls, if they originate from our magic return address
+ // (which we can query by calling Syscall(-1)).
+ uintptr_t syscall_entry_point = static_cast<uintptr_t>(Syscall(-1));
+ uint32_t low = static_cast<uint32_t>(syscall_entry_point);
+#if __SIZEOF_POINTER__ > 4
+ uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
+#endif
+
+ // BPF cannot do native 64bit comparisons. On 64bit architectures, we
+ // have to compare both 32bit halfs of the instruction pointer. If they
+ // match what we expect, we return ERR_ALLOWED. If either or both don't
+ // match, we continue evalutating the rest of the sandbox policy.
+ Instruction *escape_hatch =
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data,
+ instruction_pointer) +
+ (__SIZEOF_POINTER__ > 4 &&
+ __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),
jln (very slow on Chromium) 2012/11/20 01:08:31 As agreed offline, please add an #error on __BIG_E
+ gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,
+#if __SIZEOF_POINTER__ > 4
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data,
+ instruction_pointer) +
+ (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),
+ gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,
+#endif
+ gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),
+#if __SIZEOF_POINTER__ > 4
+ load_nr)),
+#endif
+ load_nr));
+ gen->JoinInstructions(tail, escape_hatch);
+ } else {
+ gen->JoinInstructions(tail, load_nr);
+ }
+ tail = load_nr;
+
+ // On Intel architectures, verify that system call numbers are in the
+ // expected number range. The older i386 and x86-64 APIs clear bit 30
+ // on all system calls. The newer x32 API always sets bit 30.
+#if defined(__i386__) || defined(__x86_64__)
+ Instruction *invalidX32 =
+ gen->MakeInstruction(BPF_RET+BPF_K,
+ Kill("Illegal mixing of system call ABIs").err_);
+ Instruction *checkX32 =
+#if defined(__x86_64__) && defined(__ILP32__)
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
+#else
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
+#endif
+ gen->JoinInstructions(tail, checkX32);
+ tail = checkX32;
+#endif
+
// Append jump table to our pre-amble
gen->JoinInstructions(tail, jumptable);
}
@@ -419,9 +544,20 @@ void Sandbox::installFilter(bool quiet) {
// correctly. Otherwise, there is an internal error in our BPF compiler.
// There is really nothing the caller can do until the bug is fixed.
#ifndef NDEBUG
- const char *err = NULL;
- if (!Verifier::VerifyBPF(*program, evaluators_, &err)) {
- SANDBOX_DIE(err);
+ {
+ // If we previously rewrote the BPF program so that it calls user-space
+ // whenever we return an "errno" value from the filter, then we have to
+ // wrap our system call evaluator to perform the same operation. Otherwise,
+ // the verifier would also report a mismatch in return codes.
+ Evaluators redirected_evaluators;
+ redirected_evaluators.push_back(
+ std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));
jln (very slow on Chromium) 2012/11/20 01:08:31 nit: two more spaces for indent.
+
+ const char *err = NULL;
+ if (!Verifier::VerifyBPF(*program,
+ has_unsafe_traps_ ? redirected_evaluators : evaluators_, &err)) {
jln (very slow on Chromium) 2012/11/20 01:08:31 Nit: ident arguments together.
+ SANDBOX_DIE(err);
+ }
}
#endif
@@ -444,7 +580,6 @@ void Sandbox::installFilter(bool quiet) {
// Release memory that is no longer needed
evaluators_.clear();
- errMap_.clear();
#if defined(SECCOMP_BPF_VALGRIND_HACKS)
// Valgrind is really not happy about our sandbox. Disable it when running
@@ -561,27 +696,56 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
goto sigsys_err;
}
- // Copy the seccomp-specific data into a arch_seccomp_data structure. This
- // is what we are showing to TrapFnc callbacks that the system call evaluator
- // registered with the sandbox.
- struct arch_seccomp_data data = {
- sigsys.nr,
- SECCOMP_ARCH,
- reinterpret_cast<uint64_t>(sigsys.ip),
- {
- static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM6(ctx))
+ // We need to tell whether we are performing a "normal" callback, or
+ // whether we were called recursively from within a UnsafeTrap() callback.
+ // This is a little tricky to do, because we need to somehow get access to
+ // per-thread data from within a signal context. Normal TLS storage is not
+ // safely accessible at this time. We could roll our own, but that involves
+ // a lot of complexity. Instead, we co-opt one bit in the signal mask.
+ // If BUS is blocked, we assume that we have been called recursively.
+ // There is a possibility for collision with other code that needs to do
+ // this, but in practice the risks are low.
+ intptr_t rc;
+ if (has_unsafe_traps_ &&
+ sigismember(&ctx->uc_sigmask, SIGBUS)) {
jln (very slow on Chromium) 2012/11/20 01:08:31 This is pretty hack-ish. It's ok-ish since it's a
+ errno = old_errno;
+ if (sigsys.nr == __NR_clone) {
+ SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");
+ }
+ rc = Syscall(sigsys.nr,
+ SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),
+ SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),
+ SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));
+ } else {
+ const ErrorCode& err = trapArray_[info->si_errno - 1];
+ if (!err.safe_) {
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGBUS);
+ sigprocmask(SIG_BLOCK, &mask, NULL);
}
- };
- // Now call the TrapFnc callback associated with this particular instance
- // of SECCOMP_RET_TRAP.
- const ErrorCode& err = trapArray_[info->si_errno - 1];
- intptr_t rc = err.fnc_(data, err.aux_);
+ // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+ // is what we are showing to TrapFnc callbacks that the system call
+ // evaluator registered with the sandbox.
+ struct arch_seccomp_data data = {
+ sigsys.nr,
+ SECCOMP_ARCH,
+ reinterpret_cast<uint64_t>(sigsys.ip),
+ {
+ static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM6(ctx))
+ }
+ };
+
+ // Now call the TrapFnc callback associated with this particular instance
+ // of SECCOMP_RET_TRAP.
+ rc = err.fnc_(data, err.aux_);
+ }
// Update the CPU register that stores the return code of the system call
// that we just handled, and restore "errno" to the value that it had
@@ -592,10 +756,21 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
return;
}
-ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
+bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const {
+ if (fnc != o.fnc) {
+ return fnc < o.fnc;
+ } else if (aux != o.aux) {
+ return aux < o.aux;
+ } else {
+ return safe < o.safe;
+ }
+}
+
+ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
+ bool safe) {
// Each unique pair of TrapFnc and auxiliary data make up a distinct instance
// of a SECCOMP_RET_TRAP.
- std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux);
+ TrapKey key(fnc, aux, safe);
TrapIds::const_iterator iter = trapIds_.find(key);
uint16_t id;
if (iter != trapIds_.end()) {
@@ -618,7 +793,7 @@ ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
}
id = traps_->size() + 1;
- traps_->push_back(ErrorCode(fnc, aux, id));
+ traps_->push_back(ErrorCode(fnc, aux, safe, id));
trapIds_[key] = id;
// We want to access the traps_ vector from our signal handler. But
@@ -629,10 +804,23 @@ ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
// signal handler, where we can safely do so.
trapArray_ = &(*traps_)[0];
trapArraySize_ = id;
+ return traps_->back();
}
- ErrorCode err = ErrorCode(fnc, aux, id);
- return errMap_[err.err()] = err;
+ return ErrorCode(fnc, aux, safe, id);
+}
+
+ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
+ return MakeTrap(fnc, aux, true);
jln (very slow on Chromium) 2012/11/20 01:08:31 Add a comment near true "/* Safe trap */"
+}
+
+ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) {
+ return MakeTrap(fnc, aux, false);
jln (very slow on Chromium) 2012/11/20 01:08:31 Same here.
+}
+
+intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
+ int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
+ return -err;
}
intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
@@ -646,10 +834,10 @@ ErrorCode Sandbox::Kill(const char *msg) {
Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
int Sandbox::proc_fd_ = -1;
Sandbox::Evaluators Sandbox::evaluators_;
-Sandbox::ErrMap Sandbox::errMap_;
Sandbox::Traps *Sandbox::traps_ = NULL;
Sandbox::TrapIds Sandbox::trapIds_;
ErrorCode *Sandbox::trapArray_ = NULL;
size_t Sandbox::trapArraySize_ = 0;
+ bool Sandbox::has_unsafe_traps_ = false;
} // namespace

Powered by Google App Engine
This is Rietveld 408576698