sandbox/linux/seccomp_bpf/sandbox_bpf.cc - Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is

Side by Side Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h"

	6

	7 // The kernel gives us a sandbox, we turn it into a playground :-)

	8 // This is version 2 of the playground; version 1 was built on top of

	9 // pre-BPF seccomp mode.

	10 namespace playground2 {

	11

	12 Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) {

	13 switch (signo) {

	14 case __NR_getpid:

	15 // Return EPERM so that we can check that the filter actually ran.

	16 return (ErrorCode)EPERM;

	17 case __NR_exit_group:

	18 // Allow exit() with a non-default return code.

	19 return SB_ALLOWED;

	20 default:

	21 // Make everything else fail in an easily recognizable way.

	22 return (ErrorCode)EINVAL;

	23 }

	24 }

	25

	26 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {

	27 // Block all signals before forking a child process. This prevents an

	28 // attacker from manipulating our test by sending us an unexpected signal.

	29 sigset_t oldMask, newMask;

	30 if (sigfillset(&newMask) \|\|

	31 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {

	32 die("sigprocmask() failed");

	33 }

	34

	35 pid_t pid = fork();

	36 if (pid < 0) {

	37 // Die if we cannot fork(). We would probably fail a little later

	38 // anyway, as the machine is likely very close to running out of

	39 // memory.

	40 // But what we don't want to do is return "false", as a crafty

	41 // attacker might cause fork() to fail at will and could trick us

	42 // into running without a sandbox.

	43 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails

	44 die("fork() failed unexpectedly");

	45 }

	46

	47 // In the child process

	48 if (!pid) {

	49 // Test a very simple sandbox policy to verify that we can

	50 // successfully turn on sandboxing.

	51 suppressLogging_ = true;

	52 evaluators_.clear();

	53 setSandboxPolicy(probeEvaluator, NULL);

	54 setProcFd(proc_fd);

	55 startSandbox();

	56 if (syscall(__NR_getpid) < 0 && errno == EPERM) {

	57 syscall(__NR_exit_group, (intptr_t)100);

	58 }

	59 die(NULL);

	60 }

	61

	62 // In the parent process

	63 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {

	64 die("sigprocmask() failed");

	65 }

	66 int status;

	67 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {

	68 die("waitpid() failed unexpectedly");

	69 }

	70 return WIFEXITED(status) && WEXITSTATUS(status) == 100;

	71 }

	72

	73 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {

	74 // It the sandbox is currently active, we clearly must have support for

	75 // sandboxing.

	76 if (status_ == STATUS_ENABLED) {

	77 return status_;

	78 }

	79

	80 // Even if the sandbox was previously available, something might have

	81 // changed in our run-time environment. Check one more time.

	82 if (status_ == STATUS_AVAILABLE) {

	83 if (!isSingleThreaded(proc_fd)) {

	84 status_ = STATUS_UNAVAILABLE;

	85 }

	86 return status_;

	87 }

	88

	89 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {

	90 // All state transitions resulting in STATUS_UNAVAILABLE are immediately

	91 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all

	92 // happen, if and only if they are triggered by the process being multi-

	93 // threaded.

	94 // In other words, if a single-threaded process is currently in the

	95 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

	96 // actually available.

	97 status_ == STATUS_AVAILABLE;

	98 return status_;

	99 }

	100

	101 // If we have not previously checked for availability of the sandbox or if

	102 // we otherwise don't believe to have a good cached value, we have to

	103 // perform a thorough check now.

	104 if (status_ == STATUS_UNKNOWN) {

	105 status_ = kernelSupportSeccompBPF(proc_fd)

	106 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

	107

	108 // As we are performing our tests from a child process, the run-time

	109 // environment that is visible to the sandbox is always guaranteed to be

	110 // single-threaded. Let's check here whether the caller is single-

	111 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

	112 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {

	113 status_ = STATUS_UNAVAILABLE;

	114 }

	115 }

	116 return status_;

	117 }

	118

	119 void Sandbox::setProcFd(int proc_fd) {

	120 proc_fd_ = proc_fd;

	121 }

	122

	123 void Sandbox::startSandbox() {

	124 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {

	125 die("Trying to start sandbox, even though it is known to be unavailable");

	126 } else if (status_ == STATUS_ENABLED) {

	127 die("Cannot start sandbox recursively. Use multiple calls to "

	128 "setSandboxPolicy() to stack policies instead");

	129 }

	130 if (proc_fd_ < 0) {

	131 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

	132 }

	133 if (proc_fd_ < 0) {

	134 // For now, continue in degraded mode, if we can't access /proc.

	135 // In the future, we might want to tighten this requirement.

	136 }

	137 if (!isSingleThreaded(proc_fd_)) {

	138 die("Cannot start sandbox, if process is already multi-threaded");

	139 }

	140

	141 // We no longer need access to any files in /proc. We want to do this

	142 // before installing the filters, just in case that our policy denies

	143 // close().

	144 if (proc_fd_ >= 0) {

	145 if (HANDLE_EINTR(close(proc_fd_))) {

	146 die("Failed to close file descriptor for /proc");

	147 }

	148 proc_fd_ = -1;

	149 }

	150

	151 // Install the filters.

	152 installFilter();

	153

	154 // We are now inside the sandbox.

	155 status_ = STATUS_ENABLED;

	156 }

	157

	158 bool Sandbox::isSingleThreaded(int proc_fd) {

	159 if (proc_fd < 0) {

	160 // Cannot determine whether program is single-threaded. Hope for

	161 // the best...

	162 return true;

	163 }

	164

	165 struct stat sb;

	166 int task = -1;

	167 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

	168 fstat(task, &sb) != 0 \|\|

	169 sb.st_nlink != 3 \|\|

	170 HANDLE_EINTR(close(task))) {

	171 if (task >= 0) {

	172 HANDLE_EINTR(close(task));

	173 }

	174 return false;

	175 }

	176 return true;

	177 }

	178

	179 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,

	180 EvaluateArguments argumentEvaluator) {

	181 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));

	182 }

	183

	184 void Sandbox::installFilter() {

	185 // Verify that the user pushed a policy.

	186 if (evaluators_.empty()) {

	187 filter_failed:

	188 die("Failed to configure system call filters");

	189 }

	190

	191 // Set new SIGSYS handler

	192 struct sigaction sa;

	193 memset(&sa, 0, sizeof(sa));

	194 sa.sa_sigaction = &sigSys;

	195 sa.sa_flags = SA_SIGINFO;

	196 if (sigaction(SIGSYS, &sa, NULL) < 0) {

	197 goto filter_failed;

	198 }

	199

	200 // Unmask SIGSYS

	201 sigset_t mask;

	202 if (sigemptyset(&mask) \|\|

	203 sigaddset(&mask, SIGSYS) \|\|

	204 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {

	205 goto filter_failed;

	206 }

	207

	208 // We can't handle stacked evaluators, yet. We'll get there eventually

	209 // though. Hang tight.

	210 if (evaluators_.size() != 1) {

	211 die("Not implemented");

	212 }

	213

	214 // If the architecture doesn't match SECCOMP_ARCH, disallow the

	215 // system call.

	216 std::vector<struct sock_filter> program;

	217 program.push_back((struct sock_filter)

	218 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,

	219 offsetof(struct arch_seccomp_data, arch)));

	220 program.push_back((struct sock_filter)

	221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));

	222 program.push_back((struct sock_filter)

	223 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

	224

	225 // Grab the system call number, so that we can implement jump tables.

	226 program.push_back((struct sock_filter)

	227 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));

	228

	229 // Evaluate all possible system calls and depending on their

	230 // exit codes generate a BPF filter.

	231 // This is very inefficient right now. We need to be much smarter

	232 // eventually.

	233 // We currently incur a O(N) overhead on each system call, with N

	234 // being the number of system calls. It is easy to get this down to

	235 // O(log_2(M)) with M being the number of system calls that need special

	236 // treatment.

	237 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

	238 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) {

	239 ErrorCode err = evaluateSyscall(sysnum);

	240 int ret;

	241 switch (err) {

	242 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

	243 die("Not implemented");

	244 case SB_TRAP:

	245 ret = SECCOMP_RET_TRAP;

	246 break;

	247 case SB_ALLOWED:

	248 ret = SECCOMP_RET_ALLOW;

	249 break;

	250 default:

	251 if (err >= static_cast<ErrorCode>(1) &&

	252 err <= static_cast<ErrorCode>(4096)) {

	253 // We limit errno values to a reasonable range. In fact, the Linux ABI

	254 // doesn't support errno values outside of this range.

	255 ret = SECCOMP_RET_ERRNO + err;

	256 } else {

	257 die("Invalid ErrorCode reported by sandbox system call evaluator");

	258 }

	259 break;

	260 }

	261 program.push_back((struct sock_filter)

	262 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

	263 program.push_back((struct sock_filter)

	264 BPF_STMT(BPF_RET+BPF_K, ret));

	265 }

	266

	267 // Everything that isn't allowed is forbidden. Eventually, we would

	268 // like to have a way to log forbidden calls, when in debug mode.

	269 program.push_back((struct sock_filter)

	270 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

	271

	272 // Install BPF filter program

	273 const struct sock_fprog prog = { program.size(), &program[0] };

	274 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

	275 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

	276 goto filter_failed;

	277 }

	278

	279 return;

	280 }

	281

	282 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

	283 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {

	284 // die() can call LOG(FATAL). This is not normally async-signal safe

	285 // and can lead to bugs. We should eventually implement a different

	286 // logging and reporting mechanism that is safe to be called from

	287 // the sigSys() handler.

	288 die("Unexpected SIGSYS received");

	289 }

	290 ucontext_t ctx = reinterpret_cast<ucontext_t >(void_context);

	291 int old_errno = errno;

	292

	293 // In case of error, set the REG_RESULT CPU register to the default

	294 // errno value (i.e. EPERM).

	295 // We need to be very careful when doing this, as some of our target

	296 // platforms have pointer types and CPU registers that are wider than

	297 // ints. Furthermore, the kernel ABI requires us to return a negative

	298 // value, but errno values are usually positive. And in fact, it would

	299 // be perfectly reasonable for somebody to have defined them as unsigned

	300 // properties. This makes the correct incantation of type casts rather

	301 // subtle. Sometimes, C++ is just too smart for its own good.

	302 void rc = (void )(intptr_t)-(int)SECCOMP_DENY_ERRNO;

	303

	304 // This is where we can add extra code to handle complex system calls.

	305 // ...

	306

	307 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);

	308 errno = old_errno;

	309 return;

	310 }

	311

	312

	313 bool Sandbox::suppressLogging_ = false;

	314 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

	315 int Sandbox::proc_fd_ = -1;

	316 std::vector<std::pair<Sandbox::EvaluateSyscall,

	317 Sandbox::EvaluateArguments> > Sandbox::evaluators_;

	318

	319 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »