sandbox/linux/seccomp_bpf/sandbox_bpf.cc - Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is

Side by Side Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "sandbox_bpf.h"

	6

	7

	8 namespace playground2 {
	jln (very slow on Chromium) 2012/05/31 21:01:05 Don't forget to change it Don't forget to change it
	9

	10 int Sandbox::supportsSeccompSandbox(int proc_fd) {

	11 if (status_ == STATUS_UNKNOWN) {

	12 if (!isSingleThreaded(proc_fd)) {

	13 status_ = STATUS_UNSUPPORTED;

	14 } else {

	15 pid_t pid = fork();

	16 if (pid < 0) {

	17 die("Failed to check for sandbox support");
	jln (very slow on Chromium) 2012/05/31 21:01:05 I really don't think we should die here. supports I really don't think we should die here. supports should not make you die, that's the point, isn't it ? Now, agreed if we can't fork we're going to have some issues, but I would rather let core Chrome code fail and handle that.
	18 }

	19 if (!pid) {

	20 static const struct sock_filter filter[] = {

	21 // If the architecture doesn't match SECCOMP_ARCH, disallow the

	22 // system call.

	23 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,

	24 offsetof(struct arch_seccomp_data, arch)),

	25 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 0, 3),

	26

	27 // Check the system call number. The only allowed call are getpid()

	28 // and exit_group()

	29 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,

	30 offsetof(struct arch_seccomp_data, nr)),

	31 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_getpid, 2, 1),

	32 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit_group, 0, 2),
	jln (very slow on Chromium) 2012/05/31 21:01:05 Should we rather use your normal API to install a Should we rather use your normal API to install a filter? Should we block all signals? Merely getting a signal for which there is a handler would get us killed since we don't have *sigreturn.
	33 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),

	34 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO \| EPERM),

	35 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),

	36 };

	37

	38 // Try to install filter. If we succeed, return success.

	39 const struct sock_fprog prog = {

	40 ARRAYSIZE(filter),

	41 (struct sock_filter *)filter

	42 };

	43 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0 &&

	44 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == 0 &&

	45 syscall(__NR_getpid) == -1 && errno == EPERM) {

	46 syscall(__NR_exit_group, (intptr_t)0);

	47 }

	48 _exit(1);

	49 }

	50 int status;

	51 TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
	jln (very slow on Chromium) 2012/05/31 21:01:05 Very minor: Chrome seems to use HANDLE_EINTR Very minor: Chrome seems to use HANDLE_EINTR
	52 status_ = WIFEXITED(status) && !WEXITSTATUS(status)

	53 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

	54 }

	55 }

	56 return status_ == STATUS_AVAILABLE;

	57 }

	58

	59 void Sandbox::setProcFd(int proc_fd) {

	60 proc_fd_ = proc_fd;

	61 }

	62

	63 void Sandbox::startSandbox() {

	64 if (status_ == STATUS_UNSUPPORTED) {

	65 die("Trying to start sandbox, even though it is known to be unavailable");

	66 }

	67 if (proc_fd_ < 0) {

	68 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

	69 }

	70 if (proc_fd_ < 0) {

	71 // For now, continue in degraded mode, if we can't access /proc.

	72 // In the future, we might want to tighten this requirement.

	73 }

	74 if (!isSingleThreaded(proc_fd_)) {

	75 die("Cannot start sandbox, if process is already multi-threaded");

	76 }

	77 installFilter();

	78

	79 // We no longer need access to any files in /proc

	80 if (proc_fd_ >= 0) {

	81 if (TEMP_FAILURE_RETRY(close(proc_fd_))) {

	82 die("Failed to close file descriptor for /proc");

	83 }

	84 proc_fd_ = -1;

	85 }

	86 }

	87

	88 bool Sandbox::isSingleThreaded(int proc_fd) {

	89 if (proc_fd < 0) {

	90 // Cannot determine whether program is single-threaded. Hope for

	91 // the best...

	92 return true;

	93 }

	94

	95 struct stat sb;

	96 int task = -1;

	97 if (proc_fd < 0 \|\|

	98 (task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

	99 fstat(task, &sb) != 0 \|\|

	100 sb.st_nlink != 3 \|\|

	101 TEMP_FAILURE_RETRY(close(task))) {

	102 if (task >= 0) {

	103 TEMP_FAILURE_RETRY(close(task));

	104 }

	105 return false;

	106 }

	107 return true;

	108 }

	109

	110 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,

	111 EvaluateArguments argumentEvaluator) {

	112 evaluators_.push_back(std::make_pair<EvaluateSyscall, EvaluateArguments>(

	113 syscallEvaluator, argumentEvaluator));

	114 }

	115

	116 void Sandbox::installFilter() {

	117 // Verify that the user pushed a policy.

	118 if (evaluators_.empty()) {

	119 filter_failed:
	jln (very slow on Chromium) 2012/05/31 21:01:05 Minor: Do you mind putting this label at the end? Minor: Do you mind putting this label at the end? I find it much easier to review when stuff that exits without return: is at the end.
	120 die("Failed to configure system call filters");

	121 }

	122

	123 // Set new SIGSYS handler

	124 struct sigaction sa;

	125 memset(&sa, 0, sizeof(sa));

	126 sa.sa_sigaction = &sigSys;

	127 sa.sa_flags = SA_SIGINFO;

	128 if (sigaction(SIGSYS, &sa, NULL) < 0) {

	129 goto filter_failed;

	130 }

	131

	132 // Unmask SIGSYS

	133 sigset_t mask;

	134 sigemptyset(&mask);

	135 sigaddset(&mask, SIGSYS);

	136 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {

	137 goto filter_failed;

	138 }

	139

	140 // We can't handle stacked evaluators, yet. We'll get there eventually

	141 // though. Hang tight.

	142 if (evaluators_.size() != 1) {

	143 die("Not implemented");

	144 }

	145

	146 // If the architecture doesn't match SECCOMP_ARCH, disallow the

	147 // system call.

	148 std::vector<struct sock_filter> program;

	149 program.push_back((struct sock_filter)

	150 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,

	151 offsetof(struct arch_seccomp_data, arch)));

	152 program.push_back((struct sock_filter)

	153 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));

	154 program.push_back((struct sock_filter)

	155 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_DENY));

	156

	157 // Grab the system call number, so that we can implement jump tables.

	158 program.push_back((struct sock_filter)

	159 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));

	160

	161 // Evaluate all possible system calls and depending on their

	162 // exit codes generate a BPF filter.

	163 // This is very inefficient right now. We need to be much smarter

	164 // eventually.

	165 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

	166 for (int sysnum = 0; sysnum <= MAX_SYSCALL; ++sysnum) {

	167 ErrorCode err = evaluateSyscall(sysnum);

	168 int ret;

	169 switch (err) {

	170 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
	jln (very slow on Chromium) 2012/05/31 21:01:05 We'll need SB_INSPECT_ARG1 for Chris' policy becau We'll need SB_INSPECT_ARG1 for Chris' policy because of kill(). If that's the only show stopper, we can probably live without it and allow kill() unrestricted for now.
	171 die("Not implemented");

	172 case SB_TRAP:

	173 ret = SECCOMP_RET_TRAP;

	174 break;

	175 case SB_ALLOWED:

	176 ret = SECCOMP_RET_ALLOW;

	177 break;

	178 default:

	179 ret = SECCOMP_RET_ERRNO + err;

	180 break;

	181 }

	182 program.push_back((struct sock_filter)

	183 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

	184 program.push_back((struct sock_filter)

	185 BPF_STMT(BPF_RET+BPF_K, ret));

	186 }

	187

	188 // Everything that isn't allowed is forbidden. Eventually, we would

	189 // like to have a way to log forbidden calls, when in debug mode.

	190 program.push_back((struct sock_filter)

	191 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_DENY));
	jln (very slow on Chromium) 2012/05/31 21:01:05 It's not strictly correct to ENOPERM for many syst It's not strictly correct to ENOPERM for many system calls, but I think it's a good default. ENOMEM is more generally supported but it may trigger a panic shut down in those rare callers that look for the actual errno.
	192

	193 // Install BPF filter program

	194 const struct sock_fprog prog = { program.size(), &program[0] };

	195 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

	196 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

	197 goto filter_failed;

	198 }

	199

	200 return;

	201 }

	202

	203 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

	204 if (info->si_code != SYS_SECCOMP \|\| !void_context) {
	jln (very slow on Chromium) 2012/05/31 21:01:05 I think for now it's best to let the user specify I think for now it's best to let the user specify the SIGSYS handler and do nothing in it. For the first iteration, we'll just keep Chris' handler and change it in the next one.
	205 die("Unexpected SIGSYS received");

	206 }

	207 ucontext_t ctx = (ucontext_t )void_context;

	208 int old_errno = errno;

	209 void *rc =

	210 (void *)(intptr_t)-(int)(SECCOMP_RET_DENY & SECCOMP_RET_DATA);

	211

	212 // This is where we can add extra code to handle complex system calls.

	213 // ...

	214

	215 if (rc == (void *)(intptr_t)-(int)(SECCOMP_RET_DENY & SECCOMP_RET_DATA)) {

	216 // sprintf() is not technically async-signal safe. But in glibc it

	217 // tends to be much safer than calling fprintf() or any other higher-

	218 // level I/O function.

	219 // We need to eventually have a better solution. But for debugging

	220 // purposes during code development, this is good enough for now.

	221 char buf[80];

	222 sprintf(buf, "Seccomp policy denies system call %ld\n",

	223 (long int)ctx->uc_mcontext.gregs[REG_SYSCALL]);

	224 if (TEMP_FAILURE_RETRY(write(2, buf, strlen(buf)))) {}

	225 }

	226

	227 ctx->uc_mcontext.gregs[REG_RESULT] = (greg_t)rc;

	228 errno = old_errno;

	229 return;

	230 }

	231

	232

	233 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

	234 int Sandbox::proc_fd_ = -1;

	235 std::vector<std::pair<Sandbox::EvaluateSyscall,

	236 Sandbox::EvaluateArguments> > Sandbox::evaluators_;

	237

	238 } // namespace

OLD	NEW

« sandbox/linux/seccomp_bpf/sandbox_bpf.h ('K') | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »