Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Unified Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sandbox/linux/seccomp_bpf/sandbox_bpf.cc
===================================================================
--- sandbox/linux/seccomp_bpf/sandbox_bpf.cc (revision 0)
+++ sandbox/linux/seccomp_bpf/sandbox_bpf.cc (revision 0)
@@ -0,0 +1,311 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp_bpf/sandbox_bpf.h"
+
+// The kernel gives us a sandbox, we turn it into a playground :-)
+// This is version 2 of the playground; version 1 was built on top of
+// pre-BPF seccomp mode.
+namespace playground2 {
+
+Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) {
+ switch (signo) {
+ case __NR_getpid:
+ // Return EPERM so that we can check that the filter actually ran.
+ return (ErrorCode)EPERM;
+ case __NR_exit_group:
+ // Allow exit() with a non-default return code.
+ return SB_ALLOWED;
+ default:
+ // Make everything else fail in an easily recognizable way.
+ return (ErrorCode)EINVAL;
+ }
+}
+
+bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {
+ // Block all signals before forking a child process. This prevents an
+ // attacker from manipulating our test by sending us an unexpected signal.
+ sigset_t oldMask, newMask;
+ if (sigfillset(&newMask) ||
+ sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {
+ die("sigprocmask() failed");
+ }
+
+ pid_t pid = fork();
+ if (pid < 0) {
+ // Die if we cannot fork(). We would probably fail a little later
+ // anyway, as the machine is likely very close to running out of
+ // memory.
+ // But what we don't want to do is return "false", as a crafty
+ // attacker might cause fork() to fail at will and could trick us
+ // into running without a sandbox.
+ sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails
+ die("fork() failed unexpectedly");
+ }
+
+ // In the child process
+ if (!pid) {
+ // Test a very simple sandbox policy to verify that we can
+ // successfully turn on sandboxing.
+ suppressLogging_ = true;
+ evaluators_.clear();
+ setSandboxPolicy(probeEvaluator, NULL);
+ setProcFd(proc_fd);
+ startSandbox();
+ if (syscall(__NR_getpid) < 0 && errno == EPERM) {
+ syscall(__NR_exit_group, (intptr_t)100);
+ }
+ die(NULL);
+ }
+
+ // In the parent process
+ if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {
+ die("sigprocmask() failed");
+ }
+ int status;
+ if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
+ die("waitpid() failed unexpectedly");
+ }
+ return WIFEXITED(status) && WEXITSTATUS(status) == 100;
+}
+
+Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {
+ // It the sandbox is currently active, we clearly must have support for
+ // sandboxing.
+ if (status_ == STATUS_ENABLED) {
+ return status_;
+ }
+
+ // Even if the sandbox was previously available, something might have
+ // changed in our run-time environment. Check one more time.
+ if (status_ == STATUS_AVAILABLE) {
+ if (!isSingleThreaded(proc_fd)) {
+ status_ = STATUS_UNAVAILABLE;
+ }
+ return status_;
+ }
+
+ if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {
+ // All state transitions resulting in STATUS_UNAVAILABLE are immediately
+ // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
+ // happen, if and only if they are triggered by the process being multi-
+ // threaded.
+ // In other words, if a single-threaded process is currently in the
+ // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
+ // actually available.
+ status_ == STATUS_AVAILABLE;
+ return status_;
+ }
+
+ // If we have not previously checked for availability of the sandbox or if
+ // we otherwise don't believe to have a good cached value, we have to
+ // perform a thorough check now.
+ if (status_ == STATUS_UNKNOWN) {
+ status_ = kernelSupportSeccompBPF(proc_fd)
+ ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
+
+ // As we are performing our tests from a child process, the run-time
+ // environment that is visible to the sandbox is always guaranteed to be
+ // single-threaded. Let's check here whether the caller is single-
+ // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
+ if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {
+ status_ = STATUS_UNAVAILABLE;
+ }
+ }
+ return status_;
+}
+
+void Sandbox::setProcFd(int proc_fd) {
+ proc_fd_ = proc_fd;
+}
+
+void Sandbox::startSandbox() {
+ if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
+ die("Trying to start sandbox, even though it is known to be unavailable");
+ } else if (status_ == STATUS_ENABLED) {
+ die("Cannot start sandbox recursively. Use multiple calls to "
+ "setSandboxPolicy() to stack policies instead");
+ }
+ if (proc_fd_ < 0) {
+ proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
+ }
+ if (proc_fd_ < 0) {
+ // For now, continue in degraded mode, if we can't access /proc.
+ // In the future, we might want to tighten this requirement.
+ }
+ if (!isSingleThreaded(proc_fd_)) {
+ die("Cannot start sandbox, if process is already multi-threaded");
+ }
+
+ // We no longer need access to any files in /proc. We want to do this
+ // before installing the filters, just in case that our policy denies
+ // close().
+ if (proc_fd_ >= 0) {
+ if (HANDLE_EINTR(close(proc_fd_))) {
+ die("Failed to close file descriptor for /proc");
+ }
+ proc_fd_ = -1;
+ }
+
+ // Install the filters.
+ installFilter();
+
+ // We are now inside the sandbox.
+ status_ = STATUS_ENABLED;
+}
+
+bool Sandbox::isSingleThreaded(int proc_fd) {
+ if (proc_fd < 0) {
+ // Cannot determine whether program is single-threaded. Hope for
+ // the best...
+ return true;
+ }
+
+ struct stat sb;
+ int task = -1;
+ if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
+ fstat(task, &sb) != 0 ||
+ sb.st_nlink != 3 ||
+ HANDLE_EINTR(close(task))) {
+ if (task >= 0) {
+ HANDLE_EINTR(close(task));
+ }
+ return false;
+ }
+ return true;
+}
+
+void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
+ EvaluateArguments argumentEvaluator) {
+ evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));
+}
+
+void Sandbox::installFilter() {
+ // Verify that the user pushed a policy.
+ if (evaluators_.empty()) {
+ filter_failed:
+ die("Failed to configure system call filters");
+ }
+
+ // Set new SIGSYS handler
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = &sigSys;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSYS, &sa, NULL) < 0) {
+ goto filter_failed;
+ }
+
+ // Unmask SIGSYS
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
Chris Evans 2012/06/04 22:21:55 Nit: we took the trouble to check the sigfillset()
+ if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
+ goto filter_failed;
+ }
+
+ // We can't handle stacked evaluators, yet. We'll get there eventually
+ // though. Hang tight.
+ if (evaluators_.size() != 1) {
+ die("Not implemented");
+ }
+
+ // If the architecture doesn't match SECCOMP_ARCH, disallow the
+ // system call.
+ std::vector<struct sock_filter> program;
+ program.push_back((struct sock_filter)
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data, arch)));
+ program.push_back((struct sock_filter)
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
+ program.push_back((struct sock_filter)
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
+
+ // Grab the system call number, so that we can implement jump tables.
+ program.push_back((struct sock_filter)
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));
+
+ // Evaluate all possible system calls and depending on their
+ // exit codes generate a BPF filter.
+ // This is very inefficient right now. We need to be much smarter
+ // eventually.
Jorge Lucangeli Obes 2012/06/04 22:16:14 We'll probably want to fix this (to at least avoid
Chris Evans 2012/06/04 22:21:55 Nit: note the actual run time in a comment. Julien
+ EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
+ for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) {
+ ErrorCode err = evaluateSyscall(sysnum);
+ int ret;
+ switch (err) {
+ case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
+ die("Not implemented");
+ case SB_TRAP:
+ ret = SECCOMP_RET_TRAP;
+ break;
+ case SB_ALLOWED:
+ ret = SECCOMP_RET_ALLOW;
+ break;
+ default:
+ if (err >= static_cast<ErrorCode>(1) &&
+ err <= static_cast<ErrorCode>(4096)) {
+ // We limit errno values to a reasonable range. In fact, the Linux ABI
+ // doesn't support errno values outside of this range.
+ ret = SECCOMP_RET_ERRNO + err;
+ } else {
+ die("Invalid ErrorCode reported by sandbox system call evaluator");
+ }
+ break;
+ }
+ program.push_back((struct sock_filter)
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));
+ program.push_back((struct sock_filter)
+ BPF_STMT(BPF_RET+BPF_K, ret));
+ }
+
+ // Everything that isn't allowed is forbidden. Eventually, we would
+ // like to have a way to log forbidden calls, when in debug mode.
+ program.push_back((struct sock_filter)
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
+
+ // Install BPF filter program
+ const struct sock_fprog prog = { program.size(), &program[0] };
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ goto filter_failed;
+ }
+
+ return;
+}
+
+void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
+ if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) {
Chris Evans 2012/06/04 22:21:55 Nit: add a comment that die() might call LOG(FATAL
+ die("Unexpected SIGSYS received");
+ }
+ ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+ int old_errno = errno;
+
+ // In case of error, set the REG_RESULT CPU register to the default
+ // errno value (i.e. EPERM).
+ // We need to be very careful when doing this, as some of our target
+ // platforms have pointer types and CPU registers that are wider than
+ // ints. Furthermore, the kernel ABI requires us to return a negative
+ // value, but errno values are usually positive. And in fact, it would
+ // be perfectly reasonable for somebody to have defined them as unsigned
+ // properties. This makes the correct incantation of type casts rather
+ // subtle. Sometimes, C++ is just too smart for its own good.
+ void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO;
+
+ // This is where we can add extra code to handle complex system calls.
+ // ...
+
+ ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);
+ errno = old_errno;
+ return;
+}
+
+
+bool Sandbox::suppressLogging_ = false;
+Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
+int Sandbox::proc_fd_ = -1;
+std::vector<std::pair<Sandbox::EvaluateSyscall,
+ Sandbox::EvaluateArguments> > Sandbox::evaluators_;
+
+} // namespace
Property changes on: sandbox/linux/seccomp_bpf/sandbox_bpf.cc
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698