OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <endian.h> | |
6 #if __BYTE_ORDER == __BIG_ENDIAN | |
7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit | |
8 // values that need to be inspected by a virtual machine that only ever | |
9 // operates on 32bit values. The kernel developers decided how values | |
10 // should be split into two 32bit words to achieve this goal. But at this | |
11 // time, there is no existing BPF implementation in the kernel that uses | |
12 // 64bit big endian values. So, all we have to go by is the consensus | |
13 // from a discussion on LKLM. Actual implementations, if and when they | |
14 // happen, might very well differ. | |
15 // If this code is ever going to be used with such a kernel, you should | |
16 // disable the "#error" and carefully test the code (e.g. run the unit | |
17 // tests). If things don't work, search for all occurrences of __BYTE_ORDER | |
18 // and verify that the proposed implementation agrees with what the kernel | |
19 // actually does. | |
20 #error Big endian operation is untested and expected to be broken | |
21 #endif | |
22 | |
23 #ifndef SECCOMP_BPF_STANDALONE | 5 #ifndef SECCOMP_BPF_STANDALONE |
24 #include "base/logging.h" | 6 #include "base/logging.h" |
25 #include "base/posix/eintr_wrapper.h" | 7 #include "base/posix/eintr_wrapper.h" |
26 #endif | 8 #endif |
27 | 9 |
28 #include "sandbox/linux/seccomp-bpf/codegen.h" | 10 #include "sandbox/linux/seccomp-bpf/codegen.h" |
29 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 11 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
30 #include "sandbox/linux/seccomp-bpf/syscall.h" | 12 #include "sandbox/linux/seccomp-bpf/syscall.h" |
31 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" | 13 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" |
32 #include "sandbox/linux/seccomp-bpf/verifier.h" | 14 #include "sandbox/linux/seccomp-bpf/verifier.h" |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 | 54 |
73 // The kernel gives us a sandbox, we turn it into a playground :-) | 55 // The kernel gives us a sandbox, we turn it into a playground :-) |
74 // This is version 2 of the playground; version 1 was built on top of | 56 // This is version 2 of the playground; version 1 was built on top of |
75 // pre-BPF seccomp mode. | 57 // pre-BPF seccomp mode. |
76 namespace playground2 { | 58 namespace playground2 { |
77 | 59 |
78 const int kExpectedExitCode = 100; | 60 const int kExpectedExitCode = 100; |
79 | 61 |
80 // We define a really simple sandbox policy. It is just good enough for us | 62 // We define a really simple sandbox policy. It is just good enough for us |
81 // to tell that the sandbox has actually been activated. | 63 // to tell that the sandbox has actually been activated. |
82 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) { | 64 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) { |
83 switch (sysnum) { | 65 switch (sysnum) { |
84 case __NR_getpid: | 66 case __NR_getpid: |
85 // Return EPERM so that we can check that the filter actually ran. | 67 // Return EPERM so that we can check that the filter actually ran. |
86 return ErrorCode(EPERM); | 68 return ErrorCode(EPERM); |
87 case __NR_exit_group: | 69 case __NR_exit_group: |
88 // Allow exit() with a non-default return code. | 70 // Allow exit() with a non-default return code. |
89 return ErrorCode(ErrorCode::ERR_ALLOWED); | 71 return ErrorCode(ErrorCode::ERR_ALLOWED); |
90 default: | 72 default: |
91 // Make everything else fail in an easily recognizable way. | 73 // Make everything else fail in an easily recognizable way. |
92 return ErrorCode(EINVAL); | 74 return ErrorCode(EINVAL); |
93 } | 75 } |
94 } | 76 } |
95 | 77 |
96 void Sandbox::probeProcess(void) { | 78 void Sandbox::ProbeProcess(void) { |
97 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | 79 if (syscall(__NR_getpid) < 0 && errno == EPERM) { |
98 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); | 80 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); |
99 } | 81 } |
100 } | 82 } |
101 | 83 |
102 bool Sandbox::isValidSyscallNumber(int sysnum) { | 84 bool Sandbox::IsValidSyscallNumber(int sysnum) { |
103 return SyscallIterator::IsValid(sysnum); | 85 return SyscallIterator::IsValid(sysnum); |
104 } | 86 } |
105 | 87 |
106 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) { | 88 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) { |
107 if (!isValidSyscallNumber(sysnum)) { | 89 if (!IsValidSyscallNumber(sysnum)) { |
108 return ErrorCode(ENOSYS); | 90 return ErrorCode(ENOSYS); |
109 } | 91 } |
110 return ErrorCode(ErrorCode::ERR_ALLOWED); | 92 return ErrorCode(ErrorCode::ERR_ALLOWED); |
111 } | 93 } |
112 | 94 |
113 void Sandbox::tryVsyscallProcess(void) { | 95 void Sandbox::TryVsyscallProcess(void) { |
114 time_t current_time; | 96 time_t current_time; |
115 // time() is implemented as a vsyscall. With an older glibc, with | 97 // time() is implemented as a vsyscall. With an older glibc, with |
116 // vsyscall=emulate and some versions of the seccomp BPF patch | 98 // vsyscall=emulate and some versions of the seccomp BPF patch |
117 // we may get SIGKILL-ed. Detect this! | 99 // we may get SIGKILL-ed. Detect this! |
118 if (time(¤t_time) != static_cast<time_t>(-1)) { | 100 if (time(¤t_time) != static_cast<time_t>(-1)) { |
119 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); | 101 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); |
120 } | 102 } |
121 } | 103 } |
122 | 104 |
123 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), | 105 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(), |
124 EvaluateSyscall syscallEvaluator, | 106 EvaluateSyscall syscall_evaluator, |
125 void *aux, | 107 void *aux, |
126 int proc_fd) { | 108 int proc_fd) { |
127 // Block all signals before forking a child process. This prevents an | 109 // Block all signals before forking a child process. This prevents an |
128 // attacker from manipulating our test by sending us an unexpected signal. | 110 // attacker from manipulating our test by sending us an unexpected signal. |
129 sigset_t oldMask, newMask; | 111 sigset_t old_mask, new_mask; |
130 if (sigfillset(&newMask) || | 112 if (sigfillset(&new_mask) || |
131 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | 113 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) { |
132 SANDBOX_DIE("sigprocmask() failed"); | 114 SANDBOX_DIE("sigprocmask() failed"); |
133 } | 115 } |
134 int fds[2]; | 116 int fds[2]; |
135 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { | 117 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { |
136 SANDBOX_DIE("pipe() failed"); | 118 SANDBOX_DIE("pipe() failed"); |
137 } | 119 } |
138 | 120 |
139 if (fds[0] <= 2 || fds[1] <= 2) { | 121 if (fds[0] <= 2 || fds[1] <= 2) { |
140 SANDBOX_DIE("Process started without standard file descriptors"); | 122 SANDBOX_DIE("Process started without standard file descriptors"); |
141 } | 123 } |
142 | 124 |
143 pid_t pid = fork(); | 125 pid_t pid = fork(); |
144 if (pid < 0) { | 126 if (pid < 0) { |
145 // Die if we cannot fork(). We would probably fail a little later | 127 // Die if we cannot fork(). We would probably fail a little later |
146 // anyway, as the machine is likely very close to running out of | 128 // anyway, as the machine is likely very close to running out of |
147 // memory. | 129 // memory. |
148 // But what we don't want to do is return "false", as a crafty | 130 // But what we don't want to do is return "false", as a crafty |
149 // attacker might cause fork() to fail at will and could trick us | 131 // attacker might cause fork() to fail at will and could trick us |
150 // into running without a sandbox. | 132 // into running without a sandbox. |
151 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails | 133 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails |
152 SANDBOX_DIE("fork() failed unexpectedly"); | 134 SANDBOX_DIE("fork() failed unexpectedly"); |
153 } | 135 } |
154 | 136 |
155 // In the child process | 137 // In the child process |
156 if (!pid) { | 138 if (!pid) { |
157 // Test a very simple sandbox policy to verify that we can | 139 // Test a very simple sandbox policy to verify that we can |
158 // successfully turn on sandboxing. | 140 // successfully turn on sandboxing. |
159 Die::EnableSimpleExit(); | 141 Die::EnableSimpleExit(); |
160 | 142 |
161 errno = 0; | 143 errno = 0; |
(...skipping 22 matching lines...) Expand all Loading... |
184 if (HANDLE_EINTR(close(fds[1]))) { | 166 if (HANDLE_EINTR(close(fds[1]))) { |
185 // This call to close() has been failing in strange ways. See | 167 // This call to close() has been failing in strange ways. See |
186 // crbug.com/152530. So we only fail in debug mode now. | 168 // crbug.com/152530. So we only fail in debug mode now. |
187 #if !defined(NDEBUG) | 169 #if !defined(NDEBUG) |
188 WriteFailedStderrSetupMessage(fds[1]); | 170 WriteFailedStderrSetupMessage(fds[1]); |
189 SANDBOX_DIE(NULL); | 171 SANDBOX_DIE(NULL); |
190 #endif | 172 #endif |
191 } | 173 } |
192 | 174 |
193 evaluators_.clear(); | 175 evaluators_.clear(); |
194 setSandboxPolicy(syscallEvaluator, aux); | 176 SetSandboxPolicy(syscall_evaluator, aux); |
195 setProcFd(proc_fd); | 177 set_proc_fd(proc_fd); |
196 | 178 |
197 // By passing "quiet=true" to "startSandboxInternal()" we suppress | 179 // By passing "quiet=true" to "startSandboxInternal()" we suppress |
198 // messages for expected and benign failures (e.g. if the current | 180 // messages for expected and benign failures (e.g. if the current |
199 // kernel lacks support for BPF filters). | 181 // kernel lacks support for BPF filters). |
200 startSandboxInternal(true); | 182 StartSandboxInternal(true); |
201 | 183 |
202 // Run our code in the sandbox. | 184 // Run our code in the sandbox. |
203 CodeInSandbox(); | 185 code_in_sandbox(); |
204 | 186 |
205 // CodeInSandbox() is not supposed to return here. | 187 // code_in_sandbox() is not supposed to return here. |
206 SANDBOX_DIE(NULL); | 188 SANDBOX_DIE(NULL); |
207 } | 189 } |
208 | 190 |
209 // In the parent process. | 191 // In the parent process. |
210 if (HANDLE_EINTR(close(fds[1]))) { | 192 if (HANDLE_EINTR(close(fds[1]))) { |
211 SANDBOX_DIE("close() failed"); | 193 SANDBOX_DIE("close() failed"); |
212 } | 194 } |
213 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | 195 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) { |
214 SANDBOX_DIE("sigprocmask() failed"); | 196 SANDBOX_DIE("sigprocmask() failed"); |
215 } | 197 } |
216 int status; | 198 int status; |
217 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { | 199 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { |
218 SANDBOX_DIE("waitpid() failed unexpectedly"); | 200 SANDBOX_DIE("waitpid() failed unexpectedly"); |
219 } | 201 } |
220 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; | 202 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; |
221 | 203 |
222 // If we fail to support sandboxing, there might be an additional | 204 // If we fail to support sandboxing, there might be an additional |
223 // error message. If so, this was an entirely unexpected and fatal | 205 // error message. If so, this was an entirely unexpected and fatal |
(...skipping 11 matching lines...) Expand all Loading... |
235 SANDBOX_DIE(buf); | 217 SANDBOX_DIE(buf); |
236 } | 218 } |
237 } | 219 } |
238 if (HANDLE_EINTR(close(fds[0]))) { | 220 if (HANDLE_EINTR(close(fds[0]))) { |
239 SANDBOX_DIE("close() failed"); | 221 SANDBOX_DIE("close() failed"); |
240 } | 222 } |
241 | 223 |
242 return rc; | 224 return rc; |
243 } | 225 } |
244 | 226 |
245 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { | 227 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) { |
246 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 228 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
247 if (RUNNING_ON_VALGRIND) { | 229 if (RUNNING_ON_VALGRIND) { |
248 // Valgrind doesn't like our run-time test. Disable testing and assume we | 230 // Valgrind doesn't like our run-time test. Disable testing and assume we |
249 // always support sandboxing. This feature should only ever be enabled when | 231 // always support sandboxing. This feature should only ever be enabled when |
250 // debugging. | 232 // debugging. |
251 return true; | 233 return true; |
252 } | 234 } |
253 #endif | 235 #endif |
254 | 236 |
255 return | 237 return |
256 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) && | 238 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) && |
257 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0, | 239 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0, |
258 proc_fd); | 240 proc_fd); |
259 } | 241 } |
260 | 242 |
261 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { | 243 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) { |
262 // It the sandbox is currently active, we clearly must have support for | 244 // It the sandbox is currently active, we clearly must have support for |
263 // sandboxing. | 245 // sandboxing. |
264 if (status_ == STATUS_ENABLED) { | 246 if (status_ == STATUS_ENABLED) { |
265 return status_; | 247 return status_; |
266 } | 248 } |
267 | 249 |
268 // Even if the sandbox was previously available, something might have | 250 // Even if the sandbox was previously available, something might have |
269 // changed in our run-time environment. Check one more time. | 251 // changed in our run-time environment. Check one more time. |
270 if (status_ == STATUS_AVAILABLE) { | 252 if (status_ == STATUS_AVAILABLE) { |
271 if (!isSingleThreaded(proc_fd)) { | 253 if (!IsSingleThreaded(proc_fd)) { |
272 status_ = STATUS_UNAVAILABLE; | 254 status_ = STATUS_UNAVAILABLE; |
273 } | 255 } |
274 return status_; | 256 return status_; |
275 } | 257 } |
276 | 258 |
277 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) { | 259 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) { |
278 // All state transitions resulting in STATUS_UNAVAILABLE are immediately | 260 // All state transitions resulting in STATUS_UNAVAILABLE are immediately |
279 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all | 261 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all |
280 // happen, if and only if they are triggered by the process being multi- | 262 // happen, if and only if they are triggered by the process being multi- |
281 // threaded. | 263 // threaded. |
282 // In other words, if a single-threaded process is currently in the | 264 // In other words, if a single-threaded process is currently in the |
283 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is | 265 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is |
284 // actually available. | 266 // actually available. |
285 status_ = STATUS_AVAILABLE; | 267 status_ = STATUS_AVAILABLE; |
286 return status_; | 268 return status_; |
287 } | 269 } |
288 | 270 |
289 // If we have not previously checked for availability of the sandbox or if | 271 // If we have not previously checked for availability of the sandbox or if |
290 // we otherwise don't believe to have a good cached value, we have to | 272 // we otherwise don't believe to have a good cached value, we have to |
291 // perform a thorough check now. | 273 // perform a thorough check now. |
292 if (status_ == STATUS_UNKNOWN) { | 274 if (status_ == STATUS_UNKNOWN) { |
293 status_ = kernelSupportSeccompBPF(proc_fd) | 275 status_ = KernelSupportSeccompBPF(proc_fd) |
294 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; | 276 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; |
295 | 277 |
296 // As we are performing our tests from a child process, the run-time | 278 // As we are performing our tests from a child process, the run-time |
297 // environment that is visible to the sandbox is always guaranteed to be | 279 // environment that is visible to the sandbox is always guaranteed to be |
298 // single-threaded. Let's check here whether the caller is single- | 280 // single-threaded. Let's check here whether the caller is single- |
299 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. | 281 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. |
300 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { | 282 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) { |
301 status_ = STATUS_UNAVAILABLE; | 283 status_ = STATUS_UNAVAILABLE; |
302 } | 284 } |
303 } | 285 } |
304 return status_; | 286 return status_; |
305 } | 287 } |
306 | 288 |
307 void Sandbox::setProcFd(int proc_fd) { | 289 void Sandbox::set_proc_fd(int proc_fd) { |
308 proc_fd_ = proc_fd; | 290 proc_fd_ = proc_fd; |
309 } | 291 } |
310 | 292 |
311 void Sandbox::startSandboxInternal(bool quiet) { | 293 void Sandbox::StartSandboxInternal(bool quiet) { |
312 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | 294 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { |
313 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " | 295 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " |
314 "unavailable"); | 296 "unavailable"); |
315 } else if (status_ == STATUS_ENABLED) { | 297 } else if (status_ == STATUS_ENABLED) { |
316 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " | 298 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " |
317 "setSandboxPolicy() to stack policies instead"); | 299 "setSandboxPolicy() to stack policies instead"); |
318 } | 300 } |
319 if (proc_fd_ < 0) { | 301 if (proc_fd_ < 0) { |
320 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | 302 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); |
321 } | 303 } |
322 if (proc_fd_ < 0) { | 304 if (proc_fd_ < 0) { |
323 // For now, continue in degraded mode, if we can't access /proc. | 305 // For now, continue in degraded mode, if we can't access /proc. |
324 // In the future, we might want to tighten this requirement. | 306 // In the future, we might want to tighten this requirement. |
325 } | 307 } |
326 if (!isSingleThreaded(proc_fd_)) { | 308 if (!IsSingleThreaded(proc_fd_)) { |
327 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); | 309 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); |
328 } | 310 } |
329 | 311 |
330 // We no longer need access to any files in /proc. We want to do this | 312 // We no longer need access to any files in /proc. We want to do this |
331 // before installing the filters, just in case that our policy denies | 313 // before installing the filters, just in case that our policy denies |
332 // close(). | 314 // close(). |
333 if (proc_fd_ >= 0) { | 315 if (proc_fd_ >= 0) { |
334 if (HANDLE_EINTR(close(proc_fd_))) { | 316 if (HANDLE_EINTR(close(proc_fd_))) { |
335 SANDBOX_DIE("Failed to close file descriptor for /proc"); | 317 SANDBOX_DIE("Failed to close file descriptor for /proc"); |
336 } | 318 } |
337 proc_fd_ = -1; | 319 proc_fd_ = -1; |
338 } | 320 } |
339 | 321 |
340 // Install the filters. | 322 // Install the filters. |
341 installFilter(quiet); | 323 InstallFilter(quiet); |
342 | 324 |
343 // We are now inside the sandbox. | 325 // We are now inside the sandbox. |
344 status_ = STATUS_ENABLED; | 326 status_ = STATUS_ENABLED; |
345 } | 327 } |
346 | 328 |
347 bool Sandbox::isSingleThreaded(int proc_fd) { | 329 bool Sandbox::IsSingleThreaded(int proc_fd) { |
348 if (proc_fd < 0) { | 330 if (proc_fd < 0) { |
349 // Cannot determine whether program is single-threaded. Hope for | 331 // Cannot determine whether program is single-threaded. Hope for |
350 // the best... | 332 // the best... |
351 return true; | 333 return true; |
352 } | 334 } |
353 | 335 |
354 struct stat sb; | 336 struct stat sb; |
355 int task = -1; | 337 int task = -1; |
356 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | 338 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || |
357 fstat(task, &sb) != 0 || | 339 fstat(task, &sb) != 0 || |
358 sb.st_nlink != 3 || | 340 sb.st_nlink != 3 || |
359 HANDLE_EINTR(close(task))) { | 341 HANDLE_EINTR(close(task))) { |
360 if (task >= 0) { | 342 if (task >= 0) { |
361 if (HANDLE_EINTR(close(task))) { } | 343 if (HANDLE_EINTR(close(task))) { } |
362 } | 344 } |
363 return false; | 345 return false; |
364 } | 346 } |
365 return true; | 347 return true; |
366 } | 348 } |
367 | 349 |
368 bool Sandbox::isDenied(const ErrorCode& code) { | 350 bool Sandbox::IsDenied(const ErrorCode& code) { |
369 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || | 351 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || |
370 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && | 352 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && |
371 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); | 353 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); |
372 } | 354 } |
373 | 355 |
374 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, | 356 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator, |
375 void *aux) { | 357 void *aux) { |
376 for (SyscallIterator iter(true); !iter.Done(); ) { | 358 for (SyscallIterator iter(true); !iter.Done(); ) { |
377 uint32_t sysnum = iter.Next(); | 359 uint32_t sysnum = iter.Next(); |
378 if (!isDenied(syscallEvaluator(sysnum, aux))) { | 360 if (!IsDenied(syscall_evaluator(sysnum, aux))) { |
379 SANDBOX_DIE("Policies should deny system calls that are outside the " | 361 SANDBOX_DIE("Policies should deny system calls that are outside the " |
380 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); | 362 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); |
381 } | 363 } |
382 } | 364 } |
383 return; | 365 return; |
384 } | 366 } |
385 | 367 |
386 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { | 368 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { |
387 if (BPF_CLASS(insn->code) == BPF_RET && | 369 if (BPF_CLASS(insn->code) == BPF_RET && |
388 insn->k > SECCOMP_RET_TRAP && | 370 insn->k > SECCOMP_RET_TRAP && |
389 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) { | 371 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) { |
390 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; | 372 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1]; |
391 if (!err.safe_) { | 373 if (!err.safe_) { |
392 bool *is_unsafe = static_cast<bool *>(aux); | 374 bool *is_unsafe = static_cast<bool *>(aux); |
393 *is_unsafe = true; | 375 *is_unsafe = true; |
394 } | 376 } |
395 } | 377 } |
396 } | 378 } |
397 | 379 |
398 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { | 380 void Sandbox::RedirectToUserspace(Instruction *insn, void *) { |
399 // When inside an UnsafeTrap() callback, we want to allow all system calls. | 381 // When inside an UnsafeTrap() callback, we want to allow all system calls. |
400 // This means, we must conditionally disable the sandbox -- and that's not | 382 // This means, we must conditionally disable the sandbox -- and that's not |
401 // something that kernel-side BPF filters can do, as they cannot inspect | 383 // something that kernel-side BPF filters can do, as they cannot inspect |
402 // any state other than the syscall arguments. | 384 // any state other than the syscall arguments. |
403 // But if we redirect all error handlers to user-space, then we can easily | 385 // But if we redirect all error handlers to user-space, then we can easily |
404 // make this decision. | 386 // make this decision. |
405 // The performance penalty for this extra round-trip to user-space is not | 387 // The performance penalty for this extra round-trip to user-space is not |
406 // actually that bad, as we only ever pay it for denied system calls; and a | 388 // actually that bad, as we only ever pay it for denied system calls; and a |
407 // typical program has very few of these. | 389 // typical program has very few of these. |
408 if (BPF_CLASS(insn->code) == BPF_RET && | 390 if (BPF_CLASS(insn->code) == BPF_RET && |
409 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | 391 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
410 insn->k = Trap(ReturnErrno, | 392 insn->k = Trap(ReturnErrno, |
411 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); | 393 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); |
412 } | 394 } |
413 } | 395 } |
414 | 396 |
415 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { | 397 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { |
416 // We need to replicate the behavior of RedirectToUserspace(), so that our | 398 // We need to replicate the behavior of RedirectToUserspace(), so that our |
417 // Verifier can still work correctly. | 399 // Verifier can still work correctly. |
418 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); | 400 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); |
419 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); | 401 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); |
420 ErrorCode err = evaluator.first(sysnum, evaluator.second); | 402 ErrorCode err = evaluator.first(sysnum, evaluator.second); |
421 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | 403 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
422 return Trap(ReturnErrno, | 404 return Trap(ReturnErrno, |
423 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); | 405 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); |
424 } | 406 } |
425 return err; | 407 return err; |
426 } | 408 } |
427 | 409 |
428 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { | 410 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) { |
429 if (status_ == STATUS_ENABLED) { | 411 if (status_ == STATUS_ENABLED) { |
430 SANDBOX_DIE("Cannot change policy after sandbox has started"); | 412 SANDBOX_DIE("Cannot change policy after sandbox has started"); |
431 } | 413 } |
432 policySanityChecks(syscallEvaluator, aux); | 414 PolicySanityChecks(syscall_evaluator, aux); |
433 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); | 415 evaluators_.push_back(std::make_pair(syscall_evaluator, aux)); |
434 } | 416 } |
435 | 417 |
436 void Sandbox::installFilter(bool quiet) { | 418 void Sandbox::InstallFilter(bool quiet) { |
437 // Verify that the user pushed a policy. | 419 // Verify that the user pushed a policy. |
438 if (evaluators_.empty()) { | 420 if (evaluators_.empty()) { |
439 filter_failed: | 421 filter_failed: |
440 SANDBOX_DIE("Failed to configure system call filters"); | 422 SANDBOX_DIE("Failed to configure system call filters"); |
441 } | 423 } |
442 | 424 |
443 // Set new SIGSYS handler | 425 // Set new SIGSYS handler |
444 struct sigaction sa; | 426 struct sigaction sa; |
445 memset(&sa, 0, sizeof(sa)); | 427 memset(&sa, 0, sizeof(sa)); |
446 sa.sa_sigaction = sigSys; | 428 sa.sa_sigaction = SigSys; |
447 sa.sa_flags = SA_SIGINFO | SA_NODEFER; | 429 sa.sa_flags = SA_SIGINFO | SA_NODEFER; |
448 if (sigaction(SIGSYS, &sa, NULL) < 0) { | 430 if (sigaction(SIGSYS, &sa, NULL) < 0) { |
449 goto filter_failed; | 431 goto filter_failed; |
450 } | 432 } |
451 | 433 |
452 // Unmask SIGSYS | 434 // Unmask SIGSYS |
453 sigset_t mask; | 435 sigset_t mask; |
454 if (sigemptyset(&mask) || | 436 if (sigemptyset(&mask) || |
455 sigaddset(&mask, SIGSYS) || | 437 sigaddset(&mask, SIGSYS) || |
456 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | 438 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { |
457 goto filter_failed; | 439 goto filter_failed; |
458 } | 440 } |
459 | 441 |
460 // We can't handle stacked evaluators, yet. We'll get there eventually | 442 // We can't handle stacked evaluators, yet. We'll get there eventually |
461 // though. Hang tight. | 443 // though. Hang tight. |
462 if (evaluators_.size() != 1) { | 444 if (evaluators_.size() != 1) { |
463 SANDBOX_DIE("Not implemented"); | 445 SANDBOX_DIE("Not implemented"); |
464 } | 446 } |
465 | 447 |
466 // Assemble the BPF filter program. | 448 // Assemble the BPF filter program. |
467 CodeGen *gen = new CodeGen(); | 449 CodeGen *gen = new CodeGen(); |
468 if (!gen) { | 450 if (!gen) { |
469 SANDBOX_DIE("Out of memory"); | 451 SANDBOX_DIE("Out of memory"); |
470 } | 452 } |
471 | 453 |
472 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 454 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
473 // system call. | 455 // system call. |
474 Instruction *tail; | 456 Instruction *tail; |
475 Instruction *head = | 457 Instruction *head = |
476 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 458 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_ARCH_IDX, |
477 offsetof(struct arch_seccomp_data, arch), | |
478 tail = | 459 tail = |
479 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, | 460 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
480 NULL, | 461 NULL, |
481 gen->MakeInstruction(BPF_RET+BPF_K, | 462 gen->MakeInstruction(BPF_RET+BPF_K, |
482 Kill( | 463 Kill("Invalid audit architecture in BPF filter")))); |
483 "Invalid audit architecture in BPF filter").err_))); | |
484 | 464 |
485 { | 465 { |
486 // Evaluate all possible system calls and group their ErrorCodes into | 466 // Evaluate all possible system calls and group their ErrorCodes into |
487 // ranges of identical codes. | 467 // ranges of identical codes. |
488 Ranges ranges; | 468 Ranges ranges; |
489 findRanges(&ranges); | 469 FindRanges(&ranges); |
490 | 470 |
491 // Compile the system call ranges to an optimized BPF jumptable | 471 // Compile the system call ranges to an optimized BPF jumptable |
492 Instruction *jumptable = | 472 Instruction *jumptable = |
493 assembleJumpTable(gen, ranges.begin(), ranges.end()); | 473 AssembleJumpTable(gen, ranges.begin(), ranges.end()); |
494 | 474 |
495 // If there is at least one UnsafeTrap() in our program, the entire sandbox | 475 // If there is at least one UnsafeTrap() in our program, the entire sandbox |
496 // is unsafe. We need to modify the program so that all non- | 476 // is unsafe. We need to modify the program so that all non- |
497 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then | 477 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then |
498 // allow us to temporarily disable sandboxing rules inside of callbacks to | 478 // allow us to temporarily disable sandboxing rules inside of callbacks to |
499 // UnsafeTrap(). | 479 // UnsafeTrap(). |
500 has_unsafe_traps_ = false; | 480 has_unsafe_traps_ = false; |
501 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); | 481 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); |
502 | 482 |
503 // Grab the system call number, so that we can implement jump tables. | 483 // Grab the system call number, so that we can implement jump tables. |
504 Instruction *load_nr = | 484 Instruction *load_nr = |
505 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 485 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_NR_IDX); |
506 offsetof(struct arch_seccomp_data, nr)); | |
507 | 486 |
508 // If our BPF program has unsafe jumps, enable support for them. This | 487 // If our BPF program has unsafe jumps, enable support for them. This |
509 // test happens very early in the BPF filter program. Even before we | 488 // test happens very early in the BPF filter program. Even before we |
510 // consider looking at system call numbers. | 489 // consider looking at system call numbers. |
511 // As support for unsafe jumps essentially defeats all the security | 490 // As support for unsafe jumps essentially defeats all the security |
512 // measures that the sandbox provides, we print a big warning message -- | 491 // measures that the sandbox provides, we print a big warning message -- |
513 // and of course, we make sure to only ever enable this feature if it | 492 // and of course, we make sure to only ever enable this feature if it |
514 // is actually requested by the sandbox policy. | 493 // is actually requested by the sandbox policy. |
515 if (has_unsafe_traps_) { | 494 if (has_unsafe_traps_) { |
516 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { | 495 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { |
(...skipping 26 matching lines...) Expand all Loading... |
543 // Allow system calls, if they originate from our magic return address | 522 // Allow system calls, if they originate from our magic return address |
544 // (which we can query by calling SandboxSyscall(-1)). | 523 // (which we can query by calling SandboxSyscall(-1)). |
545 uintptr_t syscall_entry_point = | 524 uintptr_t syscall_entry_point = |
546 static_cast<uintptr_t>(SandboxSyscall(-1)); | 525 static_cast<uintptr_t>(SandboxSyscall(-1)); |
547 uint32_t low = static_cast<uint32_t>(syscall_entry_point); | 526 uint32_t low = static_cast<uint32_t>(syscall_entry_point); |
548 #if __SIZEOF_POINTER__ > 4 | 527 #if __SIZEOF_POINTER__ > 4 |
549 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); | 528 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); |
550 #endif | 529 #endif |
551 | 530 |
552 // BPF cannot do native 64bit comparisons. On 64bit architectures, we | 531 // BPF cannot do native 64bit comparisons. On 64bit architectures, we |
553 // have to compare both 32bit halfs of the instruction pointer. If they | 532 // have to compare both 32bit halves of the instruction pointer. If they |
554 // match what we expect, we return ERR_ALLOWED. If either or both don't | 533 // match what we expect, we return ERR_ALLOWED. If either or both don't |
555 // match, we continue evalutating the rest of the sandbox policy. | 534 // match, we continue evalutating the rest of the sandbox policy. |
556 Instruction *escape_hatch = | 535 Instruction *escape_hatch = |
557 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 536 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_LSB_IDX, |
558 offsetof(struct arch_seccomp_data, | |
559 instruction_pointer) + | |
560 (__SIZEOF_POINTER__ > 4 && | |
561 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0), | |
562 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, | 537 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, |
563 #if __SIZEOF_POINTER__ > 4 | 538 #if __SIZEOF_POINTER__ > 4 |
564 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 539 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_MSB_IDX, |
565 offsetof(struct arch_seccomp_data, | |
566 instruction_pointer) + | |
567 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4), | |
568 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, | 540 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, |
569 #endif | 541 #endif |
570 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), | 542 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), |
571 #if __SIZEOF_POINTER__ > 4 | 543 #if __SIZEOF_POINTER__ > 4 |
572 load_nr)), | 544 load_nr)), |
573 #endif | 545 #endif |
574 load_nr)); | 546 load_nr)); |
575 gen->JoinInstructions(tail, escape_hatch); | 547 gen->JoinInstructions(tail, escape_hatch); |
576 } else { | 548 } else { |
577 gen->JoinInstructions(tail, load_nr); | 549 gen->JoinInstructions(tail, load_nr); |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
639 // system memory allocator that is in effect, these operators can result | 611 // system memory allocator that is in effect, these operators can result |
640 // in system calls to things like munmap() or brk(). | 612 // in system calls to things like munmap() or brk(). |
641 struct sock_filter bpf[program->size()]; | 613 struct sock_filter bpf[program->size()]; |
642 const struct sock_fprog prog = { | 614 const struct sock_fprog prog = { |
643 static_cast<unsigned short>(program->size()), bpf }; | 615 static_cast<unsigned short>(program->size()), bpf }; |
644 memcpy(bpf, &(*program)[0], sizeof(bpf)); | 616 memcpy(bpf, &(*program)[0], sizeof(bpf)); |
645 delete program; | 617 delete program; |
646 | 618 |
647 // Release memory that is no longer needed | 619 // Release memory that is no longer needed |
648 evaluators_.clear(); | 620 evaluators_.clear(); |
| 621 conds_.clear(); |
649 | 622 |
650 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 623 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
651 // Valgrind is really not happy about our sandbox. Disable it when running | 624 // Valgrind is really not happy about our sandbox. Disable it when running |
652 // in Valgrind. This feature is dangerous and should never be enabled by | 625 // in Valgrind. This feature is dangerous and should never be enabled by |
653 // default. We protect it behind a pre-processor option. | 626 // default. We protect it behind a pre-processor option. |
654 if (!RUNNING_ON_VALGRIND) | 627 if (!RUNNING_ON_VALGRIND) |
655 #endif | 628 #endif |
656 { | 629 { |
657 // Install BPF filter program | 630 // Install BPF filter program |
658 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | 631 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
659 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); | 632 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); |
660 } else { | 633 } else { |
661 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | 634 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { |
662 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); | 635 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); |
663 } | 636 } |
664 } | 637 } |
665 } | 638 } |
666 | 639 |
667 return; | 640 return; |
668 } | 641 } |
669 | 642 |
670 void Sandbox::findRanges(Ranges *ranges) { | 643 void Sandbox::FindRanges(Ranges *ranges) { |
671 // Please note that "struct seccomp_data" defines system calls as a signed | 644 // Please note that "struct seccomp_data" defines system calls as a signed |
672 // int32_t, but BPF instructions always operate on unsigned quantities. We | 645 // int32_t, but BPF instructions always operate on unsigned quantities. We |
673 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | 646 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, |
674 // and then verifying that the rest of the number range (both positive and | 647 // and then verifying that the rest of the number range (both positive and |
675 // negative) all return the same ErrorCode. | 648 // negative) all return the same ErrorCode. |
676 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | 649 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first; |
677 void *aux = evaluators_.begin()->second; | 650 void *aux = evaluators_.begin()->second; |
678 uint32_t oldSysnum = 0; | 651 uint32_t old_sysnum = 0; |
679 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux); | 652 ErrorCode old_err = evaluate_syscall(old_sysnum, aux); |
680 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux); | 653 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux); |
681 for (SyscallIterator iter(false); !iter.Done(); ) { | 654 for (SyscallIterator iter(false); !iter.Done(); ) { |
682 uint32_t sysnum = iter.Next(); | 655 uint32_t sysnum = iter.Next(); |
683 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux); | 656 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux); |
684 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) { | 657 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) { |
685 // A proper sandbox policy should always treat system calls outside of | 658 // A proper sandbox policy should always treat system calls outside of |
686 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns | 659 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns |
687 // "false" for SyscallIterator::IsValid()) identically. Typically, all | 660 // "false" for SyscallIterator::IsValid()) identically. Typically, all |
688 // of these system calls would be denied with the same ErrorCode. | 661 // of these system calls would be denied with the same ErrorCode. |
689 SANDBOX_DIE("Invalid seccomp policy"); | 662 SANDBOX_DIE("Invalid seccomp policy"); |
690 } | 663 } |
691 if (!err.Equals(oldErr) || iter.Done()) { | 664 if (!err.Equals(old_err) || iter.Done()) { |
692 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr)); | 665 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err)); |
693 oldSysnum = sysnum; | 666 old_sysnum = sysnum; |
694 oldErr = err; | 667 old_err = err; |
695 } | 668 } |
696 } | 669 } |
697 } | 670 } |
698 | 671 |
699 Instruction *Sandbox::assembleJumpTable(CodeGen *gen, | 672 Instruction *Sandbox::AssembleJumpTable(CodeGen *gen, |
700 Ranges::const_iterator start, | 673 Ranges::const_iterator start, |
701 Ranges::const_iterator stop) { | 674 Ranges::const_iterator stop) { |
702 // We convert the list of system call ranges into jump table that performs | 675 // We convert the list of system call ranges into jump table that performs |
703 // a binary search over the ranges. | 676 // a binary search over the ranges. |
704 // As a sanity check, we need to have at least one distinct ranges for us | 677 // As a sanity check, we need to have at least one distinct ranges for us |
705 // to be able to build a jump table. | 678 // to be able to build a jump table. |
706 if (stop - start <= 0) { | 679 if (stop - start <= 0) { |
707 SANDBOX_DIE("Invalid set of system call ranges"); | 680 SANDBOX_DIE("Invalid set of system call ranges"); |
708 } else if (stop - start == 1) { | 681 } else if (stop - start == 1) { |
709 // If we have narrowed things down to a single range object, we can | 682 // If we have narrowed things down to a single range object, we can |
710 // return from the BPF filter program. | 683 // return from the BPF filter program. |
711 return gen->MakeInstruction(BPF_RET+BPF_K, start->err); | 684 return RetExpression(gen, start->err); |
712 } | 685 } |
713 | 686 |
714 // Pick the range object that is located at the mid point of our list. | 687 // Pick the range object that is located at the mid point of our list. |
715 // We compare our system call number against the lowest valid system call | 688 // We compare our system call number against the lowest valid system call |
716 // number in this range object. If our number is lower, it is outside of | 689 // number in this range object. If our number is lower, it is outside of |
717 // this range object. If it is greater or equal, it might be inside. | 690 // this range object. If it is greater or equal, it might be inside. |
718 Ranges::const_iterator mid = start + (stop - start)/2; | 691 Ranges::const_iterator mid = start + (stop - start)/2; |
719 | 692 |
720 // Sub-divide the list of ranges and continue recursively. | 693 // Sub-divide the list of ranges and continue recursively. |
721 Instruction *jf = assembleJumpTable(gen, start, mid); | 694 Instruction *jf = AssembleJumpTable(gen, start, mid); |
722 Instruction *jt = assembleJumpTable(gen, mid, stop); | 695 Instruction *jt = AssembleJumpTable(gen, mid, stop); |
723 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); | 696 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); |
724 } | 697 } |
725 | 698 |
726 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 699 Instruction *Sandbox::RetExpression(CodeGen *gen, const ErrorCode& cond) { |
| 700 if (cond.error_type_ == ErrorCode::ET_COND) { |
| 701 return CondExpression(gen, cond); |
| 702 } else { |
| 703 return gen->MakeInstruction(BPF_RET+BPF_K, cond); |
| 704 } |
| 705 } |
| 706 |
| 707 Instruction *Sandbox::CondExpression(CodeGen *gen, const ErrorCode& cond) { |
| 708 // We can only inspect the six system call arguments that are passed in |
| 709 // CPU registers. |
| 710 if (cond.argno_ < 0 || cond.argno_ >= 6) { |
| 711 SANDBOX_DIE("Internal compiler error; invalid argument number " |
| 712 "encountered"); |
| 713 } |
| 714 |
| 715 // BPF programs operate on 32bit entities. Load both halfs of the 64bit |
| 716 // system call argument and then generate suitable conditional statements. |
| 717 Instruction *msb_head = |
| 718 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 719 SECCOMP_ARG_MSB_IDX(cond.argno_)); |
| 720 Instruction *msb_tail = msb_head; |
| 721 Instruction *lsb_head = |
| 722 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 723 SECCOMP_ARG_LSB_IDX(cond.argno_)); |
| 724 Instruction *lsb_tail = lsb_head; |
| 725 |
| 726 // Emit a suitable comparison statement. |
| 727 switch (cond.op_) { |
| 728 case ErrorCode::OP_EQUAL: |
| 729 // Compare the least significant bits for equality |
| 730 lsb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, |
| 731 static_cast<uint32_t>(cond.value_), |
| 732 RetExpression(gen, *cond.passed_), |
| 733 RetExpression(gen, *cond.failed_)); |
| 734 gen->JoinInstructions(lsb_head, lsb_tail); |
| 735 |
| 736 // If we are looking at a 64bit argument, we need to also compare the |
| 737 // most significant bits. |
| 738 if (cond.width_ == ErrorCode::TP_64BIT) { |
| 739 msb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, |
| 740 static_cast<uint32_t>(cond.value_ >> 32), |
| 741 NULL, |
| 742 RetExpression(gen, *cond.failed_)); |
| 743 gen->JoinInstructions(msb_head, msb_tail); |
| 744 } |
| 745 break; |
| 746 default: |
| 747 // TODO(markus): We can only check for equality so far. |
| 748 SANDBOX_DIE("Not implemented"); |
| 749 break; |
| 750 } |
| 751 |
| 752 // Ensure that we never pass a 64bit value, when we only expect a 32bit |
| 753 // value. This is somewhat complicated by the fact that on 64bit systems, |
| 754 // callers could legitimately pass in a non-zero value in the MSB, iff the |
| 755 // LSB has been sign-extended into the MSB. |
| 756 if (cond.width_ == ErrorCode::TP_32BIT) { |
| 757 if (cond.value_ >> 32) { |
| 758 SANDBOX_DIE("Invalid comparison of a 32bit system call argument " |
| 759 "against a 64bit constant; this test is always false."); |
| 760 } |
| 761 |
| 762 Instruction *invalid_64bit = RetExpression(gen, Unexpected64bitArgument()); |
| 763 #if __SIZEOF_POINTER__ > 4 |
| 764 invalid_64bit = |
| 765 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0xFFFFFFFF, |
| 766 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 767 SECCOMP_ARG_LSB_IDX(cond.argno_), |
| 768 gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, 0x80000000, |
| 769 lsb_head, |
| 770 invalid_64bit)), |
| 771 invalid_64bit); |
| 772 #endif |
| 773 gen->JoinInstructions( |
| 774 msb_tail, |
| 775 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0, |
| 776 lsb_head, |
| 777 invalid_64bit)); |
| 778 } else { |
| 779 gen->JoinInstructions(msb_tail, lsb_head); |
| 780 } |
| 781 |
| 782 return msb_head; |
| 783 } |
| 784 |
| 785 ErrorCode Sandbox::Unexpected64bitArgument() { |
| 786 return Kill("Unexpected 64bit argument detected"); |
| 787 } |
| 788 |
| 789 void Sandbox::SigSys(int nr, siginfo_t *info, void *void_context) { |
727 // Various sanity checks to make sure we actually received a signal | 790 // Various sanity checks to make sure we actually received a signal |
728 // triggered by a BPF filter. If something else triggered SIGSYS | 791 // triggered by a BPF filter. If something else triggered SIGSYS |
729 // (e.g. kill()), there is really nothing we can do with this signal. | 792 // (e.g. kill()), there is really nothing we can do with this signal. |
730 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || | 793 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || |
731 info->si_errno <= 0 || | 794 info->si_errno <= 0 || |
732 static_cast<size_t>(info->si_errno) > trapArraySize_) { | 795 static_cast<size_t>(info->si_errno) > trap_array_size_) { |
733 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal | 796 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal |
734 // safe and can lead to bugs. We should eventually implement a different | 797 // safe and can lead to bugs. We should eventually implement a different |
735 // logging and reporting mechanism that is safe to be called from | 798 // logging and reporting mechanism that is safe to be called from |
736 // the sigSys() handler. | 799 // the sigSys() handler. |
737 // TODO: If we feel confident that our code otherwise works correctly, we | 800 // TODO: If we feel confident that our code otherwise works correctly, we |
738 // could actually make an argument that spurious SIGSYS should | 801 // could actually make an argument that spurious SIGSYS should |
739 // just get silently ignored. TBD | 802 // just get silently ignored. TBD |
740 sigsys_err: | 803 sigsys_err: |
741 SANDBOX_DIE("Unexpected SIGSYS received"); | 804 SANDBOX_DIE("Unexpected SIGSYS received"); |
742 } | 805 } |
(...skipping 23 matching lines...) Expand all Loading... |
766 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { | 829 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { |
767 errno = old_errno; | 830 errno = old_errno; |
768 if (sigsys.nr == __NR_clone) { | 831 if (sigsys.nr == __NR_clone) { |
769 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); | 832 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); |
770 } | 833 } |
771 rc = SandboxSyscall(sigsys.nr, | 834 rc = SandboxSyscall(sigsys.nr, |
772 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), | 835 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), |
773 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), | 836 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), |
774 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); | 837 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); |
775 } else { | 838 } else { |
776 const ErrorCode& err = trapArray_[info->si_errno - 1]; | 839 const ErrorCode& err = trap_array_[info->si_errno - 1]; |
777 if (!err.safe_) { | 840 if (!err.safe_) { |
778 SetIsInSigHandler(); | 841 SetIsInSigHandler(); |
779 } | 842 } |
780 | 843 |
781 // Copy the seccomp-specific data into a arch_seccomp_data structure. This | 844 // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
782 // is what we are showing to TrapFnc callbacks that the system call | 845 // is what we are showing to TrapFnc callbacks that the system call |
783 // evaluator registered with the sandbox. | 846 // evaluator registered with the sandbox. |
784 struct arch_seccomp_data data = { | 847 struct arch_seccomp_data data = { |
785 sigsys.nr, | 848 sigsys.nr, |
786 SECCOMP_ARCH, | 849 SECCOMP_ARCH, |
(...skipping 30 matching lines...) Expand all Loading... |
817 } else { | 880 } else { |
818 return safe < o.safe; | 881 return safe < o.safe; |
819 } | 882 } |
820 } | 883 } |
821 | 884 |
822 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, | 885 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, |
823 bool safe) { | 886 bool safe) { |
824 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance | 887 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
825 // of a SECCOMP_RET_TRAP. | 888 // of a SECCOMP_RET_TRAP. |
826 TrapKey key(fnc, aux, safe); | 889 TrapKey key(fnc, aux, safe); |
827 TrapIds::const_iterator iter = trapIds_.find(key); | 890 TrapIds::const_iterator iter = trap_ids_.find(key); |
828 uint16_t id; | 891 uint16_t id; |
829 if (iter != trapIds_.end()) { | 892 if (iter != trap_ids_.end()) { |
830 // We have seen this pair before. Return the same id that we assigned | 893 // We have seen this pair before. Return the same id that we assigned |
831 // earlier. | 894 // earlier. |
832 id = iter->second; | 895 id = iter->second; |
833 } else { | 896 } else { |
834 // This is a new pair. Remember it and assign a new id. | 897 // This is a new pair. Remember it and assign a new id. |
835 // Please note that we have to store traps in memory that doesn't get | 898 // Please note that we have to store traps in memory that doesn't get |
836 // deallocated when the program is shutting down. A memory leak is | 899 // deallocated when the program is shutting down. A memory leak is |
837 // intentional, because we might otherwise not be able to execute | 900 // intentional, because we might otherwise not be able to execute |
838 // system calls part way through the program shutting down | 901 // system calls part way through the program shutting down |
839 if (!traps_) { | 902 if (!traps_) { |
840 traps_ = new Traps(); | 903 traps_ = new Traps(); |
841 } | 904 } |
842 if (traps_->size() >= SECCOMP_RET_DATA) { | 905 if (traps_->size() >= SECCOMP_RET_DATA) { |
843 // In practice, this is pretty much impossible to trigger, as there | 906 // In practice, this is pretty much impossible to trigger, as there |
844 // are other kernel limitations that restrict overall BPF program sizes. | 907 // are other kernel limitations that restrict overall BPF program sizes. |
845 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); | 908 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); |
846 } | 909 } |
847 id = traps_->size() + 1; | 910 id = traps_->size() + 1; |
848 | 911 |
849 traps_->push_back(ErrorCode(fnc, aux, safe, id)); | 912 traps_->push_back(ErrorCode(fnc, aux, safe, id)); |
850 trapIds_[key] = id; | 913 trap_ids_[key] = id; |
851 | 914 |
852 // We want to access the traps_ vector from our signal handler. But | 915 // We want to access the traps_ vector from our signal handler. But |
853 // we are not assured that doing so is async-signal safe. On the other | 916 // we are not assured that doing so is async-signal safe. On the other |
854 // hand, C++ guarantees that the contents of a vector is stored in a | 917 // hand, C++ guarantees that the contents of a vector is stored in a |
855 // contiguous C-style array. | 918 // contiguous C-style array. |
856 // So, we look up the address and size of this array outside of the | 919 // So, we look up the address and size of this array outside of the |
857 // signal handler, where we can safely do so. | 920 // signal handler, where we can safely do so. |
858 trapArray_ = &(*traps_)[0]; | 921 trap_array_ = &(*traps_)[0]; |
859 trapArraySize_ = id; | 922 trap_array_size_ = id; |
860 return traps_->back(); | 923 return traps_->back(); |
861 } | 924 } |
862 | 925 |
863 return ErrorCode(fnc, aux, safe, id); | 926 return ErrorCode(fnc, aux, safe, id); |
864 } | 927 } |
865 | 928 |
866 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { | 929 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
867 return MakeTrap(fnc, aux, true /* Safe Trap */); | 930 return MakeTrap(fnc, aux, true /* Safe Trap */); |
868 } | 931 } |
869 | 932 |
(...skipping 13 matching lines...) Expand all Loading... |
883 | 946 |
884 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { | 947 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { |
885 // TrapFnc functions report error by following the native kernel convention | 948 // TrapFnc functions report error by following the native kernel convention |
886 // of returning an exit code in the range of -1..-4096. They do not try to | 949 // of returning an exit code in the range of -1..-4096. They do not try to |
887 // set errno themselves. The glibc wrapper that triggered the SIGSYS will | 950 // set errno themselves. The glibc wrapper that triggered the SIGSYS will |
888 // ultimately do so for us. | 951 // ultimately do so for us. |
889 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; | 952 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; |
890 return -err; | 953 return -err; |
891 } | 954 } |
892 | 955 |
893 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { | 956 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width, |
| 957 ErrorCode::Operation op, uint64_t value, |
| 958 const ErrorCode& passed, const ErrorCode& failed) { |
| 959 return ErrorCode(argno, width, op, value, |
| 960 &*conds_.insert(passed).first, |
| 961 &*conds_.insert(failed).first); |
| 962 } |
| 963 |
| 964 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) { |
894 SANDBOX_DIE(static_cast<char *>(aux)); | 965 SANDBOX_DIE(static_cast<char *>(aux)); |
895 } | 966 } |
896 | 967 |
897 ErrorCode Sandbox::Kill(const char *msg) { | 968 ErrorCode Sandbox::Kill(const char *msg) { |
898 return Trap(bpfFailure, const_cast<char *>(msg)); | 969 return Trap(BpfFailure, const_cast<char *>(msg)); |
899 } | 970 } |
900 | 971 |
901 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 972 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
902 int Sandbox::proc_fd_ = -1; | 973 int Sandbox::proc_fd_ = -1; |
903 Sandbox::Evaluators Sandbox::evaluators_; | 974 Sandbox::Evaluators Sandbox::evaluators_; |
904 Sandbox::Traps *Sandbox::traps_ = NULL; | 975 Sandbox::Traps *Sandbox::traps_ = NULL; |
905 Sandbox::TrapIds Sandbox::trapIds_; | 976 Sandbox::TrapIds Sandbox::trap_ids_; |
906 ErrorCode *Sandbox::trapArray_ = NULL; | 977 ErrorCode *Sandbox::trap_array_ = NULL; |
907 size_t Sandbox::trapArraySize_ = 0; | 978 size_t Sandbox::trap_array_size_ = 0; |
908 bool Sandbox::has_unsafe_traps_ = false; | 979 bool Sandbox::has_unsafe_traps_ = false; |
| 980 Sandbox::Conds Sandbox::conds_; |
909 | 981 |
910 } // namespace | 982 } // namespace |
OLD | NEW |