Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: sandbox/linux/seccomp_bpf/sandbox_bpf.cc

Issue 10458040: Initial snapshot of the new BPF-enabled seccomp sandbox. This code is (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h"
6
7 // The kernel gives us a sandbox, we turn it into a playground :-)
8 // This is version 2 of the playground; version 1 was built on top of
9 // pre-BPF seccomp mode.
10 namespace playground2 {
11
12 Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) {
13 switch (signo) {
14 case __NR_getpid:
15 // Return EPERM so that we can check that the filter actually ran.
16 return (ErrorCode)EPERM;
17 case __NR_exit_group:
18 // Allow exit() with a non-default return code.
19 return SB_ALLOWED;
20 default:
21 // Make everything else fail in an easily recognizable way.
22 return (ErrorCode)EINVAL;
23 }
24 }
25
26 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {
27 // Block all signals before forking a child process. This prevents an
28 // attacker from manipulating our test by sending us an unexpected signal.
29 sigset_t oldMask, newMask;
30 if (sigfillset(&newMask) ||
31 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {
32 die("sigprocmask() failed");
33 }
34
35 pid_t pid = fork();
36 if (pid < 0) {
37 // Die if we cannot fork(). We would probably fail a little later
38 // anyway, as the machine is likely very close to running out of
39 // memory.
40 // But what we don't want to do is return "false", as a crafty
41 // attacker might cause fork() to fail at will and could trick us
42 // into running without a sandbox.
43 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails
44 die("fork() failed unexpectedly");
45 }
46
47 // In the child process
48 if (!pid) {
49 // Test a very simple sandbox policy to verify that we can
50 // successfully turn on sandboxing.
51 suppressLogging_ = true;
52 evaluators_.clear();
53 setSandboxPolicy(probeEvaluator, NULL);
54 setProcFd(proc_fd);
55 startSandbox();
56 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
57 syscall(__NR_exit_group, (intptr_t)100);
58 }
59 die(NULL);
60 }
61
62 // In the parent process
63 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {
64 die("sigprocmask() failed");
65 }
66 int status;
67 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
68 die("waitpid() failed unexpectedly");
69 }
70 return WIFEXITED(status) && WEXITSTATUS(status) == 100;
71 }
72
73 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {
74 // It the sandbox is currently active, we clearly must have support for
75 // sandboxing.
76 if (status_ == STATUS_ENABLED) {
77 return status_;
78 }
79
80 // Even if the sandbox was previously available, something might have
81 // changed in our run-time environment. Check one more time.
82 if (status_ == STATUS_AVAILABLE) {
83 if (!isSingleThreaded(proc_fd)) {
84 status_ = STATUS_UNAVAILABLE;
85 }
86 return status_;
87 }
88
89 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {
90 // All state transitions resulting in STATUS_UNAVAILABLE are immediately
91 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
92 // happen, if and only if they are triggered by the process being multi-
93 // threaded.
94 // In other words, if a single-threaded process is currently in the
95 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
96 // actually available.
97 status_ == STATUS_AVAILABLE;
98 return status_;
99 }
100
101 // If we have not previously checked for availability of the sandbox or if
102 // we otherwise don't believe to have a good cached value, we have to
103 // perform a thorough check now.
104 if (status_ == STATUS_UNKNOWN) {
105 status_ = kernelSupportSeccompBPF(proc_fd)
106 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
107
108 // As we are performing our tests from a child process, the run-time
109 // environment that is visible to the sandbox is always guaranteed to be
110 // single-threaded. Let's check here whether the caller is single-
111 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
112 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {
113 status_ = STATUS_UNAVAILABLE;
114 }
115 }
116 return status_;
117 }
118
119 void Sandbox::setProcFd(int proc_fd) {
120 proc_fd_ = proc_fd;
121 }
122
123 void Sandbox::startSandbox() {
124 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
125 die("Trying to start sandbox, even though it is known to be unavailable");
126 } else if (status_ == STATUS_ENABLED) {
127 die("Cannot start sandbox recursively. Use multiple calls to "
128 "setSandboxPolicy() to stack policies instead");
129 }
130 if (proc_fd_ < 0) {
131 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
132 }
133 if (proc_fd_ < 0) {
134 // For now, continue in degraded mode, if we can't access /proc.
135 // In the future, we might want to tighten this requirement.
136 }
137 if (!isSingleThreaded(proc_fd_)) {
138 die("Cannot start sandbox, if process is already multi-threaded");
139 }
140
141 // We no longer need access to any files in /proc. We want to do this
142 // before installing the filters, just in case that our policy denies
143 // close().
144 if (proc_fd_ >= 0) {
145 if (HANDLE_EINTR(close(proc_fd_))) {
146 die("Failed to close file descriptor for /proc");
147 }
148 proc_fd_ = -1;
149 }
150
151 // Install the filters.
152 installFilter();
153
154 // We are now inside the sandbox.
155 status_ = STATUS_ENABLED;
156 }
157
158 bool Sandbox::isSingleThreaded(int proc_fd) {
159 if (proc_fd < 0) {
160 // Cannot determine whether program is single-threaded. Hope for
161 // the best...
162 return true;
163 }
164
165 struct stat sb;
166 int task = -1;
167 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
168 fstat(task, &sb) != 0 ||
169 sb.st_nlink != 3 ||
170 HANDLE_EINTR(close(task))) {
171 if (task >= 0) {
172 HANDLE_EINTR(close(task));
173 }
174 return false;
175 }
176 return true;
177 }
178
179 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
180 EvaluateArguments argumentEvaluator) {
181 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));
182 }
183
184 void Sandbox::installFilter() {
185 // Verify that the user pushed a policy.
186 if (evaluators_.empty()) {
187 filter_failed:
188 die("Failed to configure system call filters");
189 }
190
191 // Set new SIGSYS handler
192 struct sigaction sa;
193 memset(&sa, 0, sizeof(sa));
194 sa.sa_sigaction = &sigSys;
195 sa.sa_flags = SA_SIGINFO;
196 if (sigaction(SIGSYS, &sa, NULL) < 0) {
197 goto filter_failed;
198 }
199
200 // Unmask SIGSYS
201 sigset_t mask;
202 sigemptyset(&mask);
203 sigaddset(&mask, SIGSYS);
Chris Evans 2012/06/04 22:21:55 Nit: we took the trouble to check the sigfillset()
204 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
205 goto filter_failed;
206 }
207
208 // We can't handle stacked evaluators, yet. We'll get there eventually
209 // though. Hang tight.
210 if (evaluators_.size() != 1) {
211 die("Not implemented");
212 }
213
214 // If the architecture doesn't match SECCOMP_ARCH, disallow the
215 // system call.
216 std::vector<struct sock_filter> program;
217 program.push_back((struct sock_filter)
218 BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
219 offsetof(struct arch_seccomp_data, arch)));
220 program.push_back((struct sock_filter)
221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
222 program.push_back((struct sock_filter)
223 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
224
225 // Grab the system call number, so that we can implement jump tables.
226 program.push_back((struct sock_filter)
227 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));
228
229 // Evaluate all possible system calls and depending on their
230 // exit codes generate a BPF filter.
231 // This is very inefficient right now. We need to be much smarter
232 // eventually.
Jorge Lucangeli Obes 2012/06/04 22:16:14 We'll probably want to fix this (to at least avoid
Chris Evans 2012/06/04 22:21:55 Nit: note the actual run time in a comment. Julien
233 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
234 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) {
235 ErrorCode err = evaluateSyscall(sysnum);
236 int ret;
237 switch (err) {
238 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
239 die("Not implemented");
240 case SB_TRAP:
241 ret = SECCOMP_RET_TRAP;
242 break;
243 case SB_ALLOWED:
244 ret = SECCOMP_RET_ALLOW;
245 break;
246 default:
247 if (err >= static_cast<ErrorCode>(1) &&
248 err <= static_cast<ErrorCode>(4096)) {
249 // We limit errno values to a reasonable range. In fact, the Linux ABI
250 // doesn't support errno values outside of this range.
251 ret = SECCOMP_RET_ERRNO + err;
252 } else {
253 die("Invalid ErrorCode reported by sandbox system call evaluator");
254 }
255 break;
256 }
257 program.push_back((struct sock_filter)
258 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));
259 program.push_back((struct sock_filter)
260 BPF_STMT(BPF_RET+BPF_K, ret));
261 }
262
263 // Everything that isn't allowed is forbidden. Eventually, we would
264 // like to have a way to log forbidden calls, when in debug mode.
265 program.push_back((struct sock_filter)
266 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
267
268 // Install BPF filter program
269 const struct sock_fprog prog = { program.size(), &program[0] };
270 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
271 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
272 goto filter_failed;
273 }
274
275 return;
276 }
277
278 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
279 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) {
Chris Evans 2012/06/04 22:21:55 Nit: add a comment that die() might call LOG(FATAL
280 die("Unexpected SIGSYS received");
281 }
282 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
283 int old_errno = errno;
284
285 // In case of error, set the REG_RESULT CPU register to the default
286 // errno value (i.e. EPERM).
287 // We need to be very careful when doing this, as some of our target
288 // platforms have pointer types and CPU registers that are wider than
289 // ints. Furthermore, the kernel ABI requires us to return a negative
290 // value, but errno values are usually positive. And in fact, it would
291 // be perfectly reasonable for somebody to have defined them as unsigned
292 // properties. This makes the correct incantation of type casts rather
293 // subtle. Sometimes, C++ is just too smart for its own good.
294 void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO;
295
296 // This is where we can add extra code to handle complex system calls.
297 // ...
298
299 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);
300 errno = old_errno;
301 return;
302 }
303
304
305 bool Sandbox::suppressLogging_ = false;
306 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
307 int Sandbox::proc_fd_ = -1;
308 std::vector<std::pair<Sandbox::EvaluateSyscall,
309 Sandbox::EvaluateArguments> > Sandbox::evaluators_;
310
311 } // namespace
OLDNEW
« no previous file with comments | « sandbox/linux/seccomp_bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp_bpf/util.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698