Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(126)

Side by Side Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox_bpf.h"
6
7
8 namespace playground2 {
9
10 int Sandbox::supportsSeccompSandbox(int proc_fd) {
jln (very slow on Chromium) 2012/05/30 20:31:21 It looks like content/common/sandbox_init_linux.cc
11 if (status_ == STATUS_UNKNOWN) {
12 if (!isSingleThreaded(proc_fd)) {
13 status_ = STATUS_UNSUPPORTED;
14 } else {
15 pid_t pid = fork();
16 if (pid < 0) {
17 die("Failed to check for sandbox support");
18 }
19 if (!pid) {
20 static const struct sock_filter filter[] = {
21 // If the architecture doesn't match SECCOMP_ARCH, disallow the
22 // system call.
23 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
24 offsetof(struct arch_seccomp_data, arch)),
25 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 0, 3),
26
27 // Check the system call number. The only allowed call are getpid()
28 // and exit_group()
29 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
30 offsetof(struct arch_seccomp_data, nr)),
31 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_getpid, 2, 1),
32 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit_group, 0, 2),
33 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
34 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | EPERM),
35 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
36 };
37
38 // Try to install filter. If we succeed, return success.
39 const struct sock_fprog prog = {
40 ARRAYSIZE(filter),
41 (struct sock_filter *)filter
42 };
43 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0 &&
44 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == 0 &&
45 syscall(__NR_getpid) == -1 && errno == EPERM) {
46 syscall(__NR_exit_group, (intptr_t)0);
47 }
48 _exit(1);
49 }
50 int status;
51 TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
52 status_ = WIFEXITED(status) && !WEXITSTATUS(status)
53 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
54 }
55 }
56 return status_ == STATUS_AVAILABLE;
57 }
58
59 void Sandbox::setProcFd(int proc_fd) {
60 proc_fd_ = proc_fd;
61 }
62
63 void Sandbox::startSandbox() {
64 if (status_ == STATUS_UNSUPPORTED) {
65 die("Trying to start sandbox, even though it is known to be unavailable");
66 }
67 if (proc_fd_ < 0) {
68 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
69 }
70 if (proc_fd_ < 0) {
71 die("Cannot access /proc");
72 }
73 if (!isSingleThreaded(proc_fd_)) {
74 die("Cannot start sandbox, if process is already multi-threaded");
75 }
76 disableFilesystem();
77 installFilter();
78
79 // We no longer need access to any files in /proc
80 if (proc_fd_ >= 0) {
81 if (TEMP_FAILURE_RETRY(close(proc_fd_))) {
82 die("Failed to close file descriptor for /proc");
83 }
84 proc_fd_ = -1;
85 }
86 }
87
88 bool Sandbox::isSingleThreaded(int proc_fd) {
89 struct stat sb;
90 int task = -1;
91 if (proc_fd < 0 ||
92 (task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
93 fstat(task, &sb) != 0 ||
94 sb.st_nlink != 3 ||
95 TEMP_FAILURE_RETRY(close(task))) {
96 if (task >= 0) {
97 TEMP_FAILURE_RETRY(close(task));
98 }
99 return false;
100 }
101 return true;
102 }
103
104 bool Sandbox::disableFilesystem() {
jln (very slow on Chromium) 2012/05/30 20:31:21 Looks good, but this should be kept independent of
105 // Some versions of PR_SET_NO_NEW_PRIVS allow unprivileged processes
106 // to call chroot(). If this feature is available in the kernel, move
107 // us into a non-existent directory.
108 // This is slightly more difficult than it sounds. We don't want
109 // to actually create a directory anywhere, as that is difficult
110 // to do securely. Instead, we rely on the /proc filesystem to
111 // give us a directory for our child process. We can then remove
112 // this directory by terminating the process.
113 // Also, pass the file descriptor from the child process to the
114 // parent rather than opening the directory by "/proc/${PID}". The
115 // latter doesn't necessarily work, if somebody already pushed us
116 // into a new pid namespace. Access by "/proc/self" is more reliable.
117 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
118 return false;
119 }
120 int fds[2];
121 pid_t pid;
122 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, fds) < 0 ||
123 (pid = fork()) < 0) {
124 chroot_failed:
125 die("Failed to isolate file system accesses");
126 }
127 if (!pid) {
128 TEMP_FAILURE_RETRY(close(fds[1]));
129 prctl(PR_SET_DUMPABLE, 1);
130 fds[1] = openat(proc_fd_, "self/fdinfo", O_RDONLY|O_DIRECTORY);
131 if (fds[1] >= 0) {
132 Util::sendFds(fds[0], NULL, 0, fds[1], -1);
133 }
134 _exit(0);
135 }
136 TEMP_FAILURE_RETRY(close(fds[0]));
137 if (!Util::getFds(fds[1], NULL, 0, &fds[0], NULL)) {
138 goto chroot_failed;
139 }
140 bool rc = false;
141 if (fchdir(fds[0]) == 0 && chroot(".") == 0) {
142 rc = true;
143 }
144 TEMP_FAILURE_RETRY(close(fds[0]));
145 TEMP_FAILURE_RETRY(close(fds[1]));
146 TEMP_FAILURE_RETRY(waitpid(pid, NULL, 0));
147 return rc;
148 }
149
150 int Sandbox::jumpTableSize(int numSyscalls, bool recursing) {
151 int ret = 0;
152 if (numSyscalls <= 0) {
153 // Nothing to do
154 } else if (numSyscalls > 160) {
155 for (int i = 0; i < numSyscalls; i += 160) {
156 ret += jumpTableSize(std::min(160, numSyscalls-i));
157 }
158 } else {
159 if (numSyscalls <= 3) {
160 ret += numSyscalls;
161 } else {
162 int m = numSyscalls/2;
163 ret += 1 + jumpTableSize(m, true) + jumpTableSize(numSyscalls-m, true);
164 }
165 if (!recursing) {
166 ++ret;
167 }
168 }
169 return ret;
170 }
171
172 void Sandbox::verifyJumpTable(struct sock_filter *filter, int numInsn,
173 const int *syscallList, int numSyscalls) {
174 if (numSyscalls <= 0) {
175 if (numInsn != 0) {
176 failed:
177 die("Failed to assemble jump table");
178 }
179 return;
180 }
181 int j = 0;
182 for (int i = syscallList[0]-1; i <= syscallList[numSyscalls-1]+1; ++i) {
183 for (; j < numSyscalls && syscallList[j] < i; ++j) { }
184 bool present = j < numSyscalls && syscallList[j] == i;
185 for (int ip = 0; ip < numInsn; ++ip) {
186 if (filter[ip].code == BPF_JMP+BPF_JEQ+BPF_K) {
187 ip += i == (int)filter[ip].k ? filter[ip].jt : filter[ip].jf;
188 } else if (filter[ip].code == BPF_JMP+BPF_JGE+BPF_K) {
189 ip += i >= (int)filter[ip].k ? filter[ip].jt : filter[ip].jf;
190 } else if (filter[ip].code == BPF_RET+BPF_K) {
191 if (!present) {
192 goto failed;
193 } else {
194 goto ok;
195 }
196 } else {
197 goto failed;
198 }
199 if (ip >= numInsn) {
200 goto failed;
201 }
202 }
203 if (present) {
204 goto failed;
205 }
206 ok:;
207 }
208 return;
209 }
210
211 static int cmp(const void *a, const void *b) {
212 return *(const int *)a - *(const int *)b;
213 }
214
215 int Sandbox::jumpTable(struct sock_filter *filter, int *idx,
216 const int *syscallList, int numSyscalls,
217 int ret, bool sorted, bool recursing) {
218 const int origIdx = *idx;
219
220 // If the list of system calls is not yet sorted, we have to do that now.
221 const int *list;
222 int l[sorted ? 0 : numSyscalls];
223 if (sorted) {
224 list = syscallList;
225 } else {
226 memcpy(l, syscallList, sizeof(int)*numSyscalls);
227 qsort(l, numSyscalls, sizeof(int), cmp);
228 list = l;
229 }
230
231 // If the list of system calls is too big, we have to split it. That allows
232 // us to avoid jumps that are longer than 256 instructions.
233 if (numSyscalls <= 0) {
234 // Nothing to do
235 } else if (numSyscalls > 160) {
236 for (int i = 0; i < numSyscalls; i += 160) {
237 jumpTable(filter, idx, list+i, std::min(160, numSyscalls-i), ret, true);
238 }
239 verifyJumpTable(filter + origIdx, *idx - origIdx, list, numSyscalls);
240 } else {
241 if (numSyscalls <= 3) {
242 for (int i = 0; i < numSyscalls; ++i) {
243 // If outputting more than one comparison, only mark the very last one
244 // with BPF_JMP. When fixing up jump targets, we use this information
245 // to generate the correct if..else.. sequence of jumps. And at that
246 // point, we add the missing BPF_JMP into the filter.
247 filter[(*idx)++] = (struct sock_filter)
248 BPF_JUMP((i == numSyscalls-1 ? BPF_JMP : 0)+BPF_JEQ+BPF_K,
249 list[i], 0, 0);
250 }
251 } else {
252 int m = numSyscalls/2;
253 int x = (*idx)++;
254 filter[x] = (struct sock_filter)
255 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, list[m], 0, 0);
256 jumpTable(filter, idx, list, m, ret, true, true);
257 if (*idx - x - 1 > 255) {
258 die("Failed to assemble jump table");
259 }
260 filter[x].jt = *idx - x - 1;
261 jumpTable(filter, idx, list+m, numSyscalls-m, ret, true, true);
262 }
263
264 // If we are done recursing, fix up jump targets and insert the
265 // return statement.
266 if (!recursing) {
267 for (int i = origIdx; i < *idx; ++i) {
268 if (BPF_OP(filter[i].code) == BPF_JEQ) {
269 if (*idx - i > 255) {
270 die("Failed to assemble jump table");
271 }
272 filter[i].jt = *idx - i - 1;
273 if (BPF_CLASS(filter[i].code) == BPF_JMP) {
274 filter[i].jf = *idx - i;
275 } else {
276 filter[i].code += BPF_JMP;
277 }
278 }
279 }
280 filter[(*idx)++] = (struct sock_filter)BPF_STMT(BPF_RET+BPF_K, ret);
281 verifyJumpTable(filter + origIdx, *idx - origIdx, list, numSyscalls);
282 }
283 }
284
285 return *idx - origIdx;
286 }
287
288 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
289 EvaluateArguments argumentEvaluator) {
290 evaluators_.push_back(std::make_pair<EvaluateSyscall, EvaluateArguments>(
291 syscallEvaluator, argumentEvaluator));
292 }
293
294 void Sandbox::installFilter() {
295 // Set new SIGSYS handler
296 struct sigaction sa;
297 memset(&sa, 0, sizeof(sa));
298 sa.sa_sigaction = &sigSys;
299 sa.sa_flags = SA_SIGINFO;
300 if (sigaction(SIGSYS, &sa, NULL) < 0) {
301 filter_failed:
302 die("Failed to configure system call filters");
303 }
304
305 // Unmask SIGSYS
306 sigset_t mask;
307 sigemptyset(&mask);
308 sigaddset(&mask, SIGSYS);
309 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
310 goto filter_failed;
311 }
312
313 // Static preamble at the beginning of the filter program
314 // static const struct sock_filter filterPreamble[] = {
315 // // If the architecture doesn't match SECCOMP_ARCH, disallow the
316 // // system call.
317 // BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)),
318 // BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0),
319 // BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_DENY),
320 //
321 // // Grab the system call number, so that we can implement jump tables.
322 // BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)),
323 // };
324
325 for (std::vector<std::pair<EvaluateSyscall, EvaluateArguments> >::
326 const_iterator iter = evaluators_.begin();
327 iter != evaluators_.end();
328 ++iter) {
329 EvaluateSyscall evaluateSyscall = iter->first;
330 EvaluateArguments evaluateArgs = iter->second;
331 int oldSysnum = INT32_MIN;
332 ErrorCode oldErr = evaluateSyscall(oldSysnum);
333 if (oldErr != evaluateSyscall(-1) ||
334 (oldErr >= SB_INSPECT_ARG_1 && oldErr <= SB_INSPECT_ARG_6)) {
335 policyErr:
336 die("Invalid sandbox policy");
337 }
338 for (int sysnum = 0; sysnum <= MAX_SYSCALL; ++sysnum) {
339 ErrorCode err = evaluateSyscall(sysnum);
340 if (err != oldErr) {
341 addRange(oldSysnum, sysnum-1, oldErr);
342 oldSysnum = sysnum;
343 oldErr = err;
344 }
345 }
346 if (oldErr != evaluateSyscall(INT32_MAX) ||
347 (oldErr >= SB_INSPECT_ARG_1 && oldErr <= SB_INSPECT_ARG_6)) {
348 goto policyErr;
349 }
350 addRange(oldSysnum, INT32_MAX, oldErr);
351
352 /***/
353
354 // Install BPF filter program
355 const struct sock_fprog prog = { 0 /***/, 0 /***/ };
356 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
357 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
358 goto filter_failed;
359 }
360 }
361
362 return;
363 }
364
365 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
jln (very slow on Chromium) 2012/05/30 20:31:21 For the purpose of merging with Chris code, would
366 if (info->si_code != SYS_SECCOMP || !void_context) {
367 die("Unexpected SIGSYS received");
368 }
369 ucontext_t *ctx = (ucontext_t *)void_context;
370 int old_errno = errno;
371 void *rc =
372 (void *)(intptr_t)-(int)(SECCOMP_RET_DENY & SECCOMP_RET_DATA);
373
374 if (rc == (void *)(intptr_t)-(int)(SECCOMP_RET_DENY & SECCOMP_RET_DATA)) {
375 // sprintf() is not technically async-signal safe. But in glibc it
376 // tends to be much safer than calling fprintf() or any other higher-
377 // level I/O function.
378 /***/
379 char buf[80];
380 sprintf(buf, "Seccomp policy denies system call %ld\n",
381 (long int)ctx->uc_mcontext.gregs[REG_SYSCALL]);
382 if (TEMP_FAILURE_RETRY(write(2, buf, strlen(buf)))) {}
383 }
384
385 ctx->uc_mcontext.gregs[REG_RESULT] = (greg_t)rc;
386 errno = old_errno;
387 return;
388 }
389
390
391 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
392 int Sandbox::proc_fd_ = -1;
393 std::vector<std::pair<Sandbox::EvaluateSyscall,
394 Sandbox::EvaluateArguments> > Sandbox::evaluators_;
395
396 } // namespace
OLDNEW
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698