|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h" | |
6 | |
7 // The kernel gives us a sandbox, we turn it into a playground :-) | |
8 // This is version 2 of the playground; version 1 was built on top of | |
9 // pre-BPF seccomp mode. | |
10 namespace playground2 { | |
11 | |
12 Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) { | |
13 switch (signo) { | |
14 case __NR_getpid: | |
15 // Return EPERM so that we can check that the filter actually ran. | |
16 return (ErrorCode)EPERM; | |
17 case __NR_exit_group: | |
18 // Allow exit() with a non-default return code. | |
19 return SB_ALLOWED; | |
20 default: | |
21 // Make everything else fail in an easily recognizable way. | |
22 return (ErrorCode)EINVAL; | |
23 } | |
24 } | |
25 | |
26 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { | |
27 // Block all signals before forking a child process. This prevents an | |
28 // attacker from manipulating our test by sending us an unexpected signal. | |
29 sigset_t oldMask, newMask; | |
30 if (sigfillset(&newMask) || | |
31 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | |
32 die("sigprocmask() failed"); | |
33 } | |
34 | |
35 pid_t pid = fork(); | |
36 if (pid < 0) { | |
37 // Die if we cannot fork(). We would probably fail a little later | |
38 // anyway, as the machine is likely very close to running out of | |
39 // memory. | |
40 // But what we don't want to do is return "false", as a crafty | |
41 // attacker might cause fork() to fail at will and could trick us | |
42 // into running without a sandbox. | |
43 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails | |
44 die("fork() failed unexpectedly"); | |
45 } | |
46 | |
47 // In the child process | |
48 if (!pid) { | |
49 // Test a very simple sandbox policy to verify that we can | |
50 // successfully turn on sandboxing. | |
51 suppressLogging_ = true; | |
52 evaluators_.clear(); | |
53 setSandboxPolicy(probeEvaluator, NULL); | |
54 setProcFd(proc_fd); | |
55 startSandbox(); | |
56 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | |
57 syscall(__NR_exit_group, (intptr_t)100); | |
58 } | |
59 die(NULL); | |
60 } | |
61 | |
62 // In the parent process | |
63 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | |
64 die("sigprocmask() failed"); | |
65 } | |
66 int status; | |
67 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { | |
68 die("waitpid() failed unexpectedly"); | |
69 } | |
70 return WIFEXITED(status) && WEXITSTATUS(status) == 100; | |
71 } | |
72 | |
73 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { | |
74 // It the sandbox is currently active, we clearly must have support for | |
75 // sandboxing. | |
76 if (status_ == STATUS_ENABLED) { | |
77 return status_; | |
78 } | |
79 | |
80 // Even if the sandbox was previously available, something might have | |
81 // changed in our run-time environment. Check one more time. | |
82 if (status_ == STATUS_AVAILABLE) { | |
83 if (!isSingleThreaded(proc_fd)) { | |
84 status_ = STATUS_UNAVAILABLE; | |
85 } | |
86 return status_; | |
87 } | |
88 | |
89 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) { | |
90 // All state transitions resulting in STATUS_UNAVAILABLE are immediately | |
91 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all | |
92 // happen, if and only if they are triggered by the process being multi- | |
93 // threaded. | |
94 // In other words, if a single-threaded process is currently in the | |
95 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is | |
96 // actually available. | |
97 status_ == STATUS_AVAILABLE; | |
98 return status_; | |
99 } | |
100 | |
101 // If we have not previously checked for availability of the sandbox or if | |
102 // we otherwise don't believe to have a good cached value, we have to | |
103 // perform a thorough check now. | |
104 if (status_ == STATUS_UNKNOWN) { | |
105 status_ = kernelSupportSeccompBPF(proc_fd) | |
106 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; | |
107 | |
108 // As we are performing our tests from a child process, the run-time | |
109 // environment that is visible to the sandbox is always guaranteed to be | |
110 // single-threaded. Let's check here whether the caller is single- | |
111 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. | |
112 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { | |
113 status_ = STATUS_UNAVAILABLE; | |
114 } | |
115 } | |
116 return status_; | |
117 } | |
118 | |
119 void Sandbox::setProcFd(int proc_fd) { | |
120 proc_fd_ = proc_fd; | |
121 } | |
122 | |
123 void Sandbox::startSandbox() { | |
124 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | |
125 die("Trying to start sandbox, even though it is known to be unavailable"); | |
126 } else if (status_ == STATUS_ENABLED) { | |
127 die("Cannot start sandbox recursively. Use multiple calls to " | |
128 "setSandboxPolicy() to stack policies instead"); | |
129 } | |
130 if (proc_fd_ < 0) { | |
131 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | |
132 } | |
133 if (proc_fd_ < 0) { | |
134 // For now, continue in degraded mode, if we can't access /proc. | |
135 // In the future, we might want to tighten this requirement. | |
136 } | |
137 if (!isSingleThreaded(proc_fd_)) { | |
138 die("Cannot start sandbox, if process is already multi-threaded"); | |
139 } | |
140 | |
141 // We no longer need access to any files in /proc. We want to do this | |
142 // before installing the filters, just in case that our policy denies | |
143 // close(). | |
144 if (proc_fd_ >= 0) { | |
145 if (HANDLE_EINTR(close(proc_fd_))) { | |
146 die("Failed to close file descriptor for /proc"); | |
147 } | |
148 proc_fd_ = -1; | |
149 } | |
150 | |
151 // Install the filters. | |
152 installFilter(); | |
153 | |
154 // We are now inside the sandbox. | |
155 status_ = STATUS_ENABLED; | |
156 } | |
157 | |
158 bool Sandbox::isSingleThreaded(int proc_fd) { | |
159 if (proc_fd < 0) { | |
160 // Cannot determine whether program is single-threaded. Hope for | |
161 // the best... | |
162 return true; | |
163 } | |
164 | |
165 struct stat sb; | |
166 int task = -1; | |
167 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | |
168 fstat(task, &sb) != 0 || | |
169 sb.st_nlink != 3 || | |
170 HANDLE_EINTR(close(task))) { | |
171 if (task >= 0) { | |
172 HANDLE_EINTR(close(task)); | |
173 } | |
174 return false; | |
175 } | |
176 return true; | |
177 } | |
178 | |
179 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | |
180 EvaluateArguments argumentEvaluator) { | |
181 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | |
182 } | |
183 | |
184 void Sandbox::installFilter() { | |
185 // Verify that the user pushed a policy. | |
186 if (evaluators_.empty()) { | |
187 filter_failed: | |
188 die("Failed to configure system call filters"); | |
189 } | |
190 | |
191 // Set new SIGSYS handler | |
192 struct sigaction sa; | |
193 memset(&sa, 0, sizeof(sa)); | |
194 sa.sa_sigaction = &sigSys; | |
195 sa.sa_flags = SA_SIGINFO; | |
196 if (sigaction(SIGSYS, &sa, NULL) < 0) { | |
197 goto filter_failed; | |
198 } | |
199 | |
200 // Unmask SIGSYS | |
201 sigset_t mask; | |
202 sigemptyset(&mask); | |
203 sigaddset(&mask, SIGSYS); | |
Chris Evans
2012/06/04 22:21:55
Nit: we took the trouble to check the sigfillset()
| |
204 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | |
205 goto filter_failed; | |
206 } | |
207 | |
208 // We can't handle stacked evaluators, yet. We'll get there eventually | |
209 // though. Hang tight. | |
210 if (evaluators_.size() != 1) { | |
211 die("Not implemented"); | |
212 } | |
213 | |
214 // If the architecture doesn't match SECCOMP_ARCH, disallow the | |
215 // system call. | |
216 std::vector<struct sock_filter> program; | |
217 program.push_back((struct sock_filter) | |
218 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, | |
219 offsetof(struct arch_seccomp_data, arch))); | |
220 program.push_back((struct sock_filter) | |
221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | |
222 program.push_back((struct sock_filter) | |
223 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
224 | |
225 // Grab the system call number, so that we can implement jump tables. | |
226 program.push_back((struct sock_filter) | |
227 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr))); | |
228 | |
229 // Evaluate all possible system calls and depending on their | |
230 // exit codes generate a BPF filter. | |
231 // This is very inefficient right now. We need to be much smarter | |
232 // eventually. | |
Jorge Lucangeli Obes
2012/06/04 22:16:14
We'll probably want to fix this (to at least avoid
Chris Evans
2012/06/04 22:21:55
Nit: note the actual run time in a comment. Julien
| |
233 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
234 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) { | |
235 ErrorCode err = evaluateSyscall(sysnum); | |
236 int ret; | |
237 switch (err) { | |
238 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | |
239 die("Not implemented"); | |
240 case SB_TRAP: | |
241 ret = SECCOMP_RET_TRAP; | |
242 break; | |
243 case SB_ALLOWED: | |
244 ret = SECCOMP_RET_ALLOW; | |
245 break; | |
246 default: | |
247 if (err >= static_cast<ErrorCode>(1) && | |
248 err <= static_cast<ErrorCode>(4096)) { | |
249 // We limit errno values to a reasonable range. In fact, the Linux ABI | |
250 // doesn't support errno values outside of this range. | |
251 ret = SECCOMP_RET_ERRNO + err; | |
252 } else { | |
253 die("Invalid ErrorCode reported by sandbox system call evaluator"); | |
254 } | |
255 break; | |
256 } | |
257 program.push_back((struct sock_filter) | |
258 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
259 program.push_back((struct sock_filter) | |
260 BPF_STMT(BPF_RET+BPF_K, ret)); | |
261 } | |
262 | |
263 // Everything that isn't allowed is forbidden. Eventually, we would | |
264 // like to have a way to log forbidden calls, when in debug mode. | |
265 program.push_back((struct sock_filter) | |
266 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
267 | |
268 // Install BPF filter program | |
269 const struct sock_fprog prog = { program.size(), &program[0] }; | |
270 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
271 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
272 goto filter_failed; | |
273 } | |
274 | |
275 return; | |
276 } | |
277 | |
278 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | |
279 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | |
Chris Evans
2012/06/04 22:21:55
Nit: add a comment that die() might call LOG(FATAL
| |
280 die("Unexpected SIGSYS received"); | |
281 } | |
282 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | |
283 int old_errno = errno; | |
284 | |
285 // In case of error, set the REG_RESULT CPU register to the default | |
286 // errno value (i.e. EPERM). | |
287 // We need to be very careful when doing this, as some of our target | |
288 // platforms have pointer types and CPU registers that are wider than | |
289 // ints. Furthermore, the kernel ABI requires us to return a negative | |
290 // value, but errno values are usually positive. And in fact, it would | |
291 // be perfectly reasonable for somebody to have defined them as unsigned | |
292 // properties. This makes the correct incantation of type casts rather | |
293 // subtle. Sometimes, C++ is just too smart for its own good. | |
294 void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO; | |
295 | |
296 // This is where we can add extra code to handle complex system calls. | |
297 // ... | |
298 | |
299 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc); | |
300 errno = old_errno; | |
301 return; | |
302 } | |
303 | |
304 | |
305 bool Sandbox::suppressLogging_ = false; | |
306 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | |
307 int Sandbox::proc_fd_ = -1; | |
308 std::vector<std::pair<Sandbox::EvaluateSyscall, | |
309 Sandbox::EvaluateArguments> > Sandbox::evaluators_; | |
310 | |
311 } // namespace | |
OLD | NEW |