Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(887)

Side by Side Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h"
6
7 // The kernel gives us a sandbox, we turn it into a playground :-)
8 // This is version 2 of the playground; version 1 was built on top of
9 // pre-BPF seccomp mode.
10 namespace playground2 {
11
Chris Evans 2012/06/01 22:48:23 Redundant newline.
Markus (顧孟勤) 2012/06/01 23:46:33 Done.
12
13 static Sandbox::ErrorCode probeEvaluator(int signo) {
14 switch (signo) {
15 case __NR_getpid:
16 // Return EPERM so that we can check that the filter actually ran.
17 return (Sandbox::ErrorCode)EPERM;
18 case __NR_exit_group:
19 // Allow exit() with a non-default return code.
20 return Sandbox::SB_ALLOWED;
21 default:
22 // Make everything else fail in an easily recognizable way.
23 return (Sandbox::ErrorCode)EINVAL;
24 }
25 }
26
27 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {
Chris Evans 2012/06/01 22:48:23 Just wanted to note that this API draws its result
Markus (顧孟勤) 2012/06/01 23:46:33 I think, I explained the rationale in my other e-m
28 if (status_ == STATUS_UNKNOWN || status_ == STATUS_UNAVAILABLE) {
Chris Evans 2012/06/01 22:48:23 Style issue: these nested ifs are getting harder t
Markus (顧孟勤) 2012/06/01 23:46:33 Done.
29 if (!isSingleThreaded(proc_fd)) {
30 status_ = STATUS_UNAVAILABLE;
31 } else {
32 sigset_t oldMask, newMask;
33 if (!sigfillset(&newMask) &&
34 !sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {
35 pid_t pid = fork();
36 if (pid >= 0) {
37 if (!pid) {
38 // Test a very simple sandbox policy to verify that we can
39 // successfully turn on sandboxing.
40 suppressLogging_ = true;
41 setSandboxPolicy(probeEvaluator, NULL);
42 setProcFd(proc_fd);
43 startSandbox();
44 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
45 syscall(__NR_exit_group, (intptr_t)100);
46 }
47 die(NULL);
Chris Evans 2012/06/01 22:48:23 die() calls strlen() on the pointer so seems like
Markus (顧孟勤) 2012/06/01 23:46:33 Good call. die() was already safe, when used in st
48 }
49 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {
50 die("sigprocmask() failed");
51 }
52 int status;
53 status_ = HANDLE_EINTR(waitpid(pid, &status, 0)) != pid
Chris Evans 2012/06/01 22:48:23 Nested ternarys are a real PITA to read / parse, c
Markus (顧孟勤) 2012/06/01 23:46:33 Done.
54 ? STATUS_UNAVAILABLE
55 : WIFEXITED(status) && WEXITSTATUS(status) == 100
56 ? STATUS_AVAILABLE
57 : STATUS_UNSUPPORTED;
58 } else if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {
Chris Evans 2012/06/01 22:48:23 It looks like this branch is associated with fork(
Markus (顧孟勤) 2012/06/01 23:46:33 Done.
59 die("sigprocmask() failed");
60 }
61 }
62 }
63 } else if (status_ == STATUS_AVAILABLE &&
64 !isSingleThreaded(proc_fd)) {
65 status_ = STATUS_UNAVAILABLE;
Chris Evans 2012/06/01 22:48:23 Still would prefer a dedicated error, STATUS_BADTH
Markus (顧孟勤) 2012/06/01 23:46:33 I don't really want the caller to have to know too
66 }
67 return status_;
68 }
69
70 void Sandbox::setProcFd(int proc_fd) {
71 proc_fd_ = proc_fd;
72 }
73
74 void Sandbox::startSandbox() {
75 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
76 die("Trying to start sandbox, even though it is known to be unavailable");
77 } else if (status_ == STATUS_ENABLED) {
78 die("Cannot start sandbox recursively. Use multiple calls to "
79 "setSandboxPolicy() to stack policies instead");
80 }
81 if (proc_fd_ < 0) {
82 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
83 }
84 if (proc_fd_ < 0) {
85 // For now, continue in degraded mode, if we can't access /proc.
86 // In the future, we might want to tighten this requirement.
87 }
88 if (!isSingleThreaded(proc_fd_)) {
89 die("Cannot start sandbox, if process is already multi-threaded");
90 }
91
92 // We no longer need access to any files in /proc. We want to do this
93 // before installing the filters, just in case that our policy denies
94 // close().
95 if (proc_fd_ >= 0) {
96 if (HANDLE_EINTR(close(proc_fd_))) {
97 die("Failed to close file descriptor for /proc");
98 }
99 proc_fd_ = -1;
100 }
101
102 // Install the filters.
103 installFilter();
104
105 // We are now inside the sandbox.
106 status_ = STATUS_ENABLED;
107 }
108
109 bool Sandbox::isSingleThreaded(int proc_fd) {
110 if (proc_fd < 0) {
111 // Cannot determine whether program is single-threaded. Hope for
112 // the best...
113 return true;
114 }
115
116 struct stat sb;
117 int task = -1;
118 if (proc_fd < 0 ||
119 (task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
120 fstat(task, &sb) != 0 ||
121 sb.st_nlink != 3 ||
122 HANDLE_EINTR(close(task))) {
123 if (task >= 0) {
124 HANDLE_EINTR(close(task));
125 }
126 return false;
127 }
128 return true;
129 }
130
131 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
132 EvaluateArguments argumentEvaluator) {
133 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));
134 }
135
136 void Sandbox::installFilter() {
137 // Verify that the user pushed a policy.
138 if (evaluators_.empty()) {
139 filter_failed:
140 die("Failed to configure system call filters");
141 }
142
143 // Set new SIGSYS handler
144 struct sigaction sa;
145 memset(&sa, 0, sizeof(sa));
146 sa.sa_sigaction = &sigSys;
147 sa.sa_flags = SA_SIGINFO;
148 if (sigaction(SIGSYS, &sa, NULL) < 0) {
149 goto filter_failed;
150 }
151
152 // Unmask SIGSYS
153 sigset_t mask;
154 sigemptyset(&mask);
155 sigaddset(&mask, SIGSYS);
156 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
157 goto filter_failed;
158 }
159
160 // We can't handle stacked evaluators, yet. We'll get there eventually
161 // though. Hang tight.
162 if (evaluators_.size() != 1) {
163 die("Not implemented");
164 }
165
166 // If the architecture doesn't match SECCOMP_ARCH, disallow the
167 // system call.
168 std::vector<struct sock_filter> program;
169 program.push_back((struct sock_filter)
170 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
171 offsetof(struct arch_seccomp_data, arch)));
172 program.push_back((struct sock_filter)
173 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
174 program.push_back((struct sock_filter)
175 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
176
177 // Grab the system call number, so that we can implement jump tables.
178 program.push_back((struct sock_filter)
179 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));
180
181 // Evaluate all possible system calls and depending on their
182 // exit codes generate a BPF filter.
183 // This is very inefficient right now. We need to be much smarter
184 // eventually.
185 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
186 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) {
187 ErrorCode err = evaluateSyscall(sysnum);
188 int ret;
189 switch (err) {
190 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
191 die("Not implemented");
192 case SB_TRAP:
193 ret = SECCOMP_RET_TRAP;
194 break;
195 case SB_ALLOWED:
196 ret = SECCOMP_RET_ALLOW;
197 break;
198 default:
199 if (err >= static_cast<ErrorCode>(1) &&
200 err <= static_cast<ErrorCode>(4096)) {
201 // We limit errno values to a reasonable range. In fact, the Linux ABI
202 // doesn't support errno values outside of this range.
203 ret = SECCOMP_RET_ERRNO + err;
204 } else {
205 die("Invalid ErrorCode reported by sandbox system call evaluator");
206 }
207 break;
208 }
209 program.push_back((struct sock_filter)
210 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));
211 program.push_back((struct sock_filter)
212 BPF_STMT(BPF_RET+BPF_K, ret));
213 }
214
215 // Everything that isn't allowed is forbidden. Eventually, we would
216 // like to have a way to log forbidden calls, when in debug mode.
217 program.push_back((struct sock_filter)
218 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
219
220 // Install BPF filter program
221 const struct sock_fprog prog = { program.size(), &program[0] };
222 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
223 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
224 goto filter_failed;
225 }
226
227 return;
228 }
229
230 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
231 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) {
232 die("Unexpected SIGSYS received");
Chris Evans 2012/06/01 22:48:23 Bug: die() in the Chromium context uses LOG(), whi
Markus (顧孟勤) 2012/06/01 23:46:33 This is really a tough call. The standalone versio
233 }
234 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
235 int old_errno = errno;
Chris Evans 2012/06/01 22:48:23 We don't seem to do anything with old_errno?
Markus (顧孟勤) 2012/06/01 23:46:33 That is correct. Non-exiting signal handlers must
236
237 // In case of error, set the REG_RESULT CPU register to the default
238 // errno value (i.e. EPERM).
239 // We need to be very careful when doing this, as some of our target
240 // platforms have pointer types and CPU registers that are wider than
241 // ints. Furthermore, the kernel ABI requires us to return a negative
242 // value, but errno values are usually positive. And in fact, it would
243 // be perfectly reasonable for somebody to have defined them as unsigned
244 // properties. This makes the correct incantation of type casts rather
245 // subtle. Sometimes, C++ is just too smart for its own good.
246 void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO;
247
248 // This is where we can add extra code to handle complex system calls.
249 // ...
250
251 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);
252 errno = old_errno;
253 return;
254 }
255
256
257 bool Sandbox::suppressLogging_ = false;
258 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
259 int Sandbox::proc_fd_ = -1;
260 std::vector<std::pair<Sandbox::EvaluateSyscall,
261 Sandbox::EvaluateArguments> > Sandbox::evaluators_;
262
263 } // namespace
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698