OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <time.h> | 5 #include <time.h> |
6 | 6 |
7 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 7 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
8 #include "sandbox/linux/seccomp-bpf/verifier.h" | 8 #include "sandbox/linux/seccomp-bpf/verifier.h" |
9 | 9 |
10 // The kernel gives us a sandbox, we turn it into a playground :-) | 10 // The kernel gives us a sandbox, we turn it into a playground :-) |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 } | 53 } |
54 | 54 |
55 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), | 55 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), |
56 EvaluateSyscall syscallEvaluator, | 56 EvaluateSyscall syscallEvaluator, |
57 int proc_fd) { | 57 int proc_fd) { |
58 // Block all signals before forking a child process. This prevents an | 58 // Block all signals before forking a child process. This prevents an |
59 // attacker from manipulating our test by sending us an unexpected signal. | 59 // attacker from manipulating our test by sending us an unexpected signal. |
60 sigset_t oldMask, newMask; | 60 sigset_t oldMask, newMask; |
61 if (sigfillset(&newMask) || | 61 if (sigfillset(&newMask) || |
62 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | 62 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { |
63 die("sigprocmask() failed"); | 63 SANDBOX_DIE("sigprocmask() failed"); |
64 } | 64 } |
65 int fds[2]; | 65 int fds[2]; |
66 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { | 66 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { |
67 die("pipe() failed"); | 67 SANDBOX_DIE("pipe() failed"); |
68 } | 68 } |
69 | 69 |
70 pid_t pid = fork(); | 70 pid_t pid = fork(); |
71 if (pid < 0) { | 71 if (pid < 0) { |
72 // Die if we cannot fork(). We would probably fail a little later | 72 // Die if we cannot fork(). We would probably fail a little later |
73 // anyway, as the machine is likely very close to running out of | 73 // anyway, as the machine is likely very close to running out of |
74 // memory. | 74 // memory. |
75 // But what we don't want to do is return "false", as a crafty | 75 // But what we don't want to do is return "false", as a crafty |
76 // attacker might cause fork() to fail at will and could trick us | 76 // attacker might cause fork() to fail at will and could trick us |
77 // into running without a sandbox. | 77 // into running without a sandbox. |
78 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails | 78 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails |
79 die("fork() failed unexpectedly"); | 79 SANDBOX_DIE("fork() failed unexpectedly"); |
80 } | 80 } |
81 | 81 |
82 // In the child process | 82 // In the child process |
83 if (!pid) { | 83 if (!pid) { |
84 // Test a very simple sandbox policy to verify that we can | 84 // Test a very simple sandbox policy to verify that we can |
85 // successfully turn on sandboxing. | 85 // successfully turn on sandboxing. |
86 dryRun_ = true; | 86 Die::EnableSimpleExit(); |
87 if (HANDLE_EINTR(close(fds[0])) || | 87 if (HANDLE_EINTR(close(fds[0])) || |
88 dup2(fds[1], 2) != 2 || | 88 dup2(fds[1], 2) != 2 || |
89 HANDLE_EINTR(close(fds[1]))) { | 89 HANDLE_EINTR(close(fds[1]))) { |
90 static const char msg[] = "Failed to set up stderr\n"; | 90 static const char msg[] = "Failed to set up stderr\n"; |
91 if (HANDLE_EINTR(write(fds[1], msg, sizeof(msg)-1))) { } | 91 if (HANDLE_EINTR(write(fds[1], msg, sizeof(msg)-1))) { } |
92 } else { | 92 } else { |
93 evaluators_.clear(); | 93 evaluators_.clear(); |
94 setSandboxPolicy(syscallEvaluator, NULL); | 94 setSandboxPolicy(syscallEvaluator, NULL); |
95 setProcFd(proc_fd); | 95 setProcFd(proc_fd); |
96 startSandbox(); | 96 |
| 97 // By passing "quiet=true" to "startSandboxInternal()" we suppress |
| 98 // messages for expected and benign failures (e.g. if the current |
| 99 // kernel lacks support for BPF filters). |
| 100 startSandboxInternal(true); |
| 101 |
97 // Run our code in the sandbox | 102 // Run our code in the sandbox |
98 CodeInSandbox(); | 103 CodeInSandbox(); |
99 } | 104 } |
100 die(NULL); | 105 SANDBOX_DIE(NULL); |
101 } | 106 } |
102 | 107 |
103 // In the parent process. | 108 // In the parent process. |
104 if (HANDLE_EINTR(close(fds[1]))) { | 109 if (HANDLE_EINTR(close(fds[1]))) { |
105 die("close() failed"); | 110 SANDBOX_DIE("close() failed"); |
106 } | 111 } |
107 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | 112 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { |
108 die("sigprocmask() failed"); | 113 SANDBOX_DIE("sigprocmask() failed"); |
109 } | 114 } |
110 int status; | 115 int status; |
111 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { | 116 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { |
112 die("waitpid() failed unexpectedly"); | 117 SANDBOX_DIE("waitpid() failed unexpectedly"); |
113 } | 118 } |
114 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == 100; | 119 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == 100; |
115 | 120 |
116 // If we fail to support sandboxing, there might be an additional | 121 // If we fail to support sandboxing, there might be an additional |
117 // error message. If so, this was an entirely unexpected and fatal | 122 // error message. If so, this was an entirely unexpected and fatal |
118 // failure. We should report the failure and somebody most fix | 123 // failure. We should report the failure and somebody must fix |
119 // things. This is probably a security-critical bug in the sandboxing | 124 // things. This is probably a security-critical bug in the sandboxing |
120 // code. | 125 // code. |
121 if (!rc) { | 126 if (!rc) { |
122 char buf[4096]; | 127 char buf[4096]; |
123 ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1)); | 128 ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1)); |
124 if (len > 0) { | 129 if (len > 0) { |
125 while (len > 1 && buf[len-1] == '\n') { | 130 while (len > 1 && buf[len-1] == '\n') { |
126 --len; | 131 --len; |
127 } | 132 } |
128 buf[len] = '\000'; | 133 buf[len] = '\000'; |
129 die(buf); | 134 SANDBOX_DIE(buf); |
130 } | 135 } |
131 } | 136 } |
132 if (HANDLE_EINTR(close(fds[0]))) { | 137 if (HANDLE_EINTR(close(fds[0]))) { |
133 die("close() failed"); | 138 SANDBOX_DIE("close() failed"); |
134 } | 139 } |
135 | 140 |
136 return rc; | 141 return rc; |
137 | 142 |
138 } | 143 } |
139 | 144 |
140 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { | 145 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { |
141 return RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, proc_fd) && | 146 return RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, proc_fd) && |
142 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, | 147 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, |
143 proc_fd); | 148 proc_fd); |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
186 status_ = STATUS_UNAVAILABLE; | 191 status_ = STATUS_UNAVAILABLE; |
187 } | 192 } |
188 } | 193 } |
189 return status_; | 194 return status_; |
190 } | 195 } |
191 | 196 |
192 void Sandbox::setProcFd(int proc_fd) { | 197 void Sandbox::setProcFd(int proc_fd) { |
193 proc_fd_ = proc_fd; | 198 proc_fd_ = proc_fd; |
194 } | 199 } |
195 | 200 |
196 void Sandbox::startSandbox() { | 201 void Sandbox::startSandboxInternal(bool quiet) { |
197 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | 202 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { |
198 die("Trying to start sandbox, even though it is known to be unavailable"); | 203 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " |
| 204 "unavailable"); |
199 } else if (status_ == STATUS_ENABLED) { | 205 } else if (status_ == STATUS_ENABLED) { |
200 die("Cannot start sandbox recursively. Use multiple calls to " | 206 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " |
201 "setSandboxPolicy() to stack policies instead"); | 207 "setSandboxPolicy() to stack policies instead"); |
202 } | 208 } |
203 if (proc_fd_ < 0) { | 209 if (proc_fd_ < 0) { |
204 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | 210 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); |
205 } | 211 } |
206 if (proc_fd_ < 0) { | 212 if (proc_fd_ < 0) { |
207 // For now, continue in degraded mode, if we can't access /proc. | 213 // For now, continue in degraded mode, if we can't access /proc. |
208 // In the future, we might want to tighten this requirement. | 214 // In the future, we might want to tighten this requirement. |
209 } | 215 } |
210 if (!isSingleThreaded(proc_fd_)) { | 216 if (!isSingleThreaded(proc_fd_)) { |
211 die("Cannot start sandbox, if process is already multi-threaded"); | 217 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); |
212 } | 218 } |
213 | 219 |
214 // We no longer need access to any files in /proc. We want to do this | 220 // We no longer need access to any files in /proc. We want to do this |
215 // before installing the filters, just in case that our policy denies | 221 // before installing the filters, just in case that our policy denies |
216 // close(). | 222 // close(). |
217 if (proc_fd_ >= 0) { | 223 if (proc_fd_ >= 0) { |
218 if (HANDLE_EINTR(close(proc_fd_))) { | 224 if (HANDLE_EINTR(close(proc_fd_))) { |
219 die("Failed to close file descriptor for /proc"); | 225 SANDBOX_DIE("Failed to close file descriptor for /proc"); |
220 } | 226 } |
221 proc_fd_ = -1; | 227 proc_fd_ = -1; |
222 } | 228 } |
223 | 229 |
224 // Install the filters. | 230 // Install the filters. |
225 installFilter(); | 231 installFilter(quiet); |
226 | 232 |
227 // We are now inside the sandbox. | 233 // We are now inside the sandbox. |
228 status_ = STATUS_ENABLED; | 234 status_ = STATUS_ENABLED; |
229 } | 235 } |
230 | 236 |
231 bool Sandbox::isSingleThreaded(int proc_fd) { | 237 bool Sandbox::isSingleThreaded(int proc_fd) { |
232 if (proc_fd < 0) { | 238 if (proc_fd < 0) { |
233 // Cannot determine whether program is single-threaded. Hope for | 239 // Cannot determine whether program is single-threaded. Hope for |
234 // the best... | 240 // the best... |
235 return true; | 241 return true; |
(...skipping 19 matching lines...) Expand all Loading... |
255 code <= (SECCOMP_RET_ERRNO + 4095)); | 261 code <= (SECCOMP_RET_ERRNO + 4095)); |
256 } | 262 } |
257 | 263 |
258 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, | 264 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, |
259 EvaluateArguments) { | 265 EvaluateArguments) { |
260 // Do some sanity checks on the policy. This will warn users if they do | 266 // Do some sanity checks on the policy. This will warn users if they do |
261 // things that are likely unsafe and unintended. | 267 // things that are likely unsafe and unintended. |
262 // We also have similar checks later, when we actually compile the BPF | 268 // We also have similar checks later, when we actually compile the BPF |
263 // program. That catches problems with incorrectly stacked evaluators. | 269 // program. That catches problems with incorrectly stacked evaluators. |
264 if (!isDenied(syscallEvaluator(-1))) { | 270 if (!isDenied(syscallEvaluator(-1))) { |
265 die("Negative system calls should always be disallowed by policy"); | 271 SANDBOX_DIE("Negative system calls should always be disallowed by policy"); |
266 } | 272 } |
267 #ifndef NDEBUG | 273 #ifndef NDEBUG |
268 #if defined(__i386__) || defined(__x86_64__) | 274 #if defined(__i386__) || defined(__x86_64__) |
269 #if defined(__x86_64__) && defined(__ILP32__) | 275 #if defined(__x86_64__) && defined(__ILP32__) |
270 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u; | 276 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u; |
271 sysnum <= (MAX_SYSCALL & ~0x40000000u); | 277 sysnum <= (MAX_SYSCALL & ~0x40000000u); |
272 ++sysnum) { | 278 ++sysnum) { |
273 if (!isDenied(syscallEvaluator(sysnum))) { | 279 if (!isDenied(syscallEvaluator(sysnum))) { |
274 die("In x32 mode, you should not allow any non-x32 system calls"); | 280 SANDBOX_DIE("In x32 mode, you should not allow any non-x32 system calls"); |
275 } | 281 } |
276 } | 282 } |
277 #else | 283 #else |
278 for (unsigned int sysnum = MIN_SYSCALL | 0x40000000u; | 284 for (unsigned int sysnum = MIN_SYSCALL | 0x40000000u; |
279 sysnum <= (MAX_SYSCALL | 0x40000000u); | 285 sysnum <= (MAX_SYSCALL | 0x40000000u); |
280 ++sysnum) { | 286 ++sysnum) { |
281 if (!isDenied(syscallEvaluator(sysnum))) { | 287 if (!isDenied(syscallEvaluator(sysnum))) { |
282 die("x32 system calls should be explicitly disallowed"); | 288 SANDBOX_DIE("x32 system calls should be explicitly disallowed"); |
283 } | 289 } |
284 } | 290 } |
285 #endif | 291 #endif |
286 #endif | 292 #endif |
287 #endif | 293 #endif |
288 // Check interesting boundary values just outside of the valid system call | 294 // Check interesting boundary values just outside of the valid system call |
289 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1. | 295 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1. |
290 // They all should be denied. | 296 // They all should be denied. |
291 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) || | 297 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) || |
292 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) || | 298 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) || |
293 !isDenied(syscallEvaluator(-1)) || | 299 !isDenied(syscallEvaluator(-1)) || |
294 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) || | 300 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) || |
295 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) { | 301 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) { |
296 die("Even for default-allow policies, you must never allow system calls " | 302 SANDBOX_DIE("Even for default-allow policies, you must never allow system " |
297 "outside of the standard system call range"); | 303 "calls outside of the standard system call range"); |
298 } | 304 } |
299 return; | 305 return; |
300 } | 306 } |
301 | 307 |
302 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | 308 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, |
303 EvaluateArguments argumentEvaluator) { | 309 EvaluateArguments argumentEvaluator) { |
304 if (status_ == STATUS_ENABLED) { | 310 if (status_ == STATUS_ENABLED) { |
305 die("Cannot change policy after sandbox has started"); | 311 SANDBOX_DIE("Cannot change policy after sandbox has started"); |
306 } | 312 } |
307 policySanityChecks(syscallEvaluator, argumentEvaluator); | 313 policySanityChecks(syscallEvaluator, argumentEvaluator); |
308 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | 314 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); |
309 } | 315 } |
310 | 316 |
311 void Sandbox::installFilter() { | 317 void Sandbox::installFilter(bool quiet) { |
312 // Verify that the user pushed a policy. | 318 // Verify that the user pushed a policy. |
313 if (evaluators_.empty()) { | 319 if (evaluators_.empty()) { |
314 filter_failed: | 320 filter_failed: |
315 die("Failed to configure system call filters"); | 321 SANDBOX_DIE("Failed to configure system call filters"); |
316 } | 322 } |
317 | 323 |
318 // Set new SIGSYS handler | 324 // Set new SIGSYS handler |
319 struct sigaction sa; | 325 struct sigaction sa; |
320 memset(&sa, 0, sizeof(sa)); | 326 memset(&sa, 0, sizeof(sa)); |
321 sa.sa_sigaction = &sigSys; | 327 sa.sa_sigaction = &sigSys; |
322 sa.sa_flags = SA_SIGINFO; | 328 sa.sa_flags = SA_SIGINFO; |
323 if (sigaction(SIGSYS, &sa, NULL) < 0) { | 329 if (sigaction(SIGSYS, &sa, NULL) < 0) { |
324 goto filter_failed; | 330 goto filter_failed; |
325 } | 331 } |
326 | 332 |
327 // Unmask SIGSYS | 333 // Unmask SIGSYS |
328 sigset_t mask; | 334 sigset_t mask; |
329 if (sigemptyset(&mask) || | 335 if (sigemptyset(&mask) || |
330 sigaddset(&mask, SIGSYS) || | 336 sigaddset(&mask, SIGSYS) || |
331 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | 337 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { |
332 goto filter_failed; | 338 goto filter_failed; |
333 } | 339 } |
334 | 340 |
335 // We can't handle stacked evaluators, yet. We'll get there eventually | 341 // We can't handle stacked evaluators, yet. We'll get there eventually |
336 // though. Hang tight. | 342 // though. Hang tight. |
337 if (evaluators_.size() != 1) { | 343 if (evaluators_.size() != 1) { |
338 die("Not implemented"); | 344 SANDBOX_DIE("Not implemented"); |
339 } | 345 } |
340 | 346 |
341 // Assemble the BPF filter program. | 347 // Assemble the BPF filter program. |
342 Program *program = new Program(); | 348 Program *program = new Program(); |
343 if (!program) { | 349 if (!program) { |
344 die("Out of memory"); | 350 SANDBOX_DIE("Out of memory"); |
345 } | 351 } |
346 | 352 |
347 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 353 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
348 // system call. | 354 // system call. |
349 program->push_back((struct sock_filter) | 355 program->push_back((struct sock_filter) |
350 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); | 356 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); |
351 program->push_back((struct sock_filter) | 357 program->push_back((struct sock_filter) |
352 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | 358 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); |
353 | 359 |
354 program->push_back((struct sock_filter) | 360 program->push_back((struct sock_filter) |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
389 emitJumpStatements(program, &rets, ranges.begin(), ranges.end()); | 395 emitJumpStatements(program, &rets, ranges.begin(), ranges.end()); |
390 emitReturnStatements(program, rets); | 396 emitReturnStatements(program, rets); |
391 } | 397 } |
392 | 398 |
393 // Make sure compilation resulted in BPF program that executes | 399 // Make sure compilation resulted in BPF program that executes |
394 // correctly. Otherwise, there is an internal error in our BPF compiler. | 400 // correctly. Otherwise, there is an internal error in our BPF compiler. |
395 // There is really nothing the caller can do until the bug is fixed. | 401 // There is really nothing the caller can do until the bug is fixed. |
396 #ifndef NDEBUG | 402 #ifndef NDEBUG |
397 const char *err = NULL; | 403 const char *err = NULL; |
398 if (!Verifier::verifyBPF(*program, evaluators_, &err)) { | 404 if (!Verifier::verifyBPF(*program, evaluators_, &err)) { |
399 die(err); | 405 SANDBOX_DIE(err); |
400 } | 406 } |
401 #endif | 407 #endif |
402 | 408 |
403 // We want to be very careful in not imposing any requirements on the | 409 // We want to be very careful in not imposing any requirements on the |
404 // policies that are set with setSandboxPolicy(). This means, as soon as | 410 // policies that are set with setSandboxPolicy(). This means, as soon as |
405 // the sandbox is active, we shouldn't be relying on libraries that could | 411 // the sandbox is active, we shouldn't be relying on libraries that could |
406 // be making system calls. This, for example, means we should avoid | 412 // be making system calls. This, for example, means we should avoid |
407 // using the heap and we should avoid using STL functions. | 413 // using the heap and we should avoid using STL functions. |
408 // Temporarily copy the contents of the "program" vector into a | 414 // Temporarily copy the contents of the "program" vector into a |
409 // stack-allocated array; and then explicitly destroy that object. | 415 // stack-allocated array; and then explicitly destroy that object. |
410 // This makes sure we don't ex- or implicitly call new/delete after we | 416 // This makes sure we don't ex- or implicitly call new/delete after we |
411 // installed the BPF filter program in the kernel. Depending on the | 417 // installed the BPF filter program in the kernel. Depending on the |
412 // system memory allocator that is in effect, these operators can result | 418 // system memory allocator that is in effect, these operators can result |
413 // in system calls to things like munmap() or brk(). | 419 // in system calls to things like munmap() or brk(). |
414 struct sock_filter bpf[program->size()]; | 420 struct sock_filter bpf[program->size()]; |
415 const struct sock_fprog prog = { | 421 const struct sock_fprog prog = { |
416 static_cast<unsigned short>(program->size()), bpf }; | 422 static_cast<unsigned short>(program->size()), bpf }; |
417 memcpy(bpf, &(*program)[0], sizeof(bpf)); | 423 memcpy(bpf, &(*program)[0], sizeof(bpf)); |
418 delete program; | 424 delete program; |
419 | 425 |
420 // Release memory that is no longer needed | 426 // Release memory that is no longer needed |
421 evaluators_.clear(); | 427 evaluators_.clear(); |
422 | 428 |
423 // Install BPF filter program | 429 // Install BPF filter program |
424 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | 430 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
425 die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs"); | 431 SANDBOX_DIE(quiet |
| 432 ? NULL : "Kernel refuses to enable no-new-privs"); |
426 } else { | 433 } else { |
427 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | 434 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { |
428 die(dryRun_ ? NULL : "Kernel refuses to turn on BPF filters"); | 435 SANDBOX_DIE(quiet |
| 436 ? NULL : "Kernel refuses to turn on BPF filters"); |
429 } | 437 } |
430 } | 438 } |
431 | 439 |
432 return; | 440 return; |
433 } | 441 } |
434 | 442 |
435 void Sandbox::findRanges(Ranges *ranges) { | 443 void Sandbox::findRanges(Ranges *ranges) { |
436 // Please note that "struct seccomp_data" defines system calls as a signed | 444 // Please note that "struct seccomp_data" defines system calls as a signed |
437 // int32_t, but BPF instructions always operate on unsigned quantities. We | 445 // int32_t, but BPF instructions always operate on unsigned quantities. We |
438 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | 446 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, |
(...skipping 17 matching lines...) Expand all Loading... |
456 // "oldErr" should at this point be the "default" policy for all system call | 464 // "oldErr" should at this point be the "default" policy for all system call |
457 // numbers that don't have an explicit handler in the system call evaluator. | 465 // numbers that don't have an explicit handler in the system call evaluator. |
458 // But as we are quite paranoid, we perform some more sanity checks to verify | 466 // But as we are quite paranoid, we perform some more sanity checks to verify |
459 // that there actually is a consistent "default" policy in the first place. | 467 // that there actually is a consistent "default" policy in the first place. |
460 // We don't actually iterate over all possible 2^32 values, though. We just | 468 // We don't actually iterate over all possible 2^32 values, though. We just |
461 // perform spot checks at the boundaries. | 469 // perform spot checks at the boundaries. |
462 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF. | 470 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF. |
463 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || | 471 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || |
464 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) || | 472 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) || |
465 oldErr != evaluateSyscall(-1)) { | 473 oldErr != evaluateSyscall(-1)) { |
466 die("Invalid seccomp policy"); | 474 SANDBOX_DIE("Invalid seccomp policy"); |
467 } | 475 } |
468 ranges->push_back( | 476 ranges->push_back( |
469 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); | 477 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); |
470 } | 478 } |
471 | 479 |
472 void Sandbox::emitJumpStatements(Program *program, RetInsns *rets, | 480 void Sandbox::emitJumpStatements(Program *program, RetInsns *rets, |
473 Ranges::const_iterator start, | 481 Ranges::const_iterator start, |
474 Ranges::const_iterator stop) { | 482 Ranges::const_iterator stop) { |
475 // We convert the list of system call ranges into jump table that performs | 483 // We convert the list of system call ranges into jump table that performs |
476 // a binary search over the ranges. | 484 // a binary search over the ranges. |
477 // As a sanity check, we need to have at least two distinct ranges for us | 485 // As a sanity check, we need to have at least two distinct ranges for us |
478 // to be able to build a jump table. | 486 // to be able to build a jump table. |
479 if (stop - start <= 1) { | 487 if (stop - start <= 1) { |
480 die("Invalid set of system call ranges"); | 488 SANDBOX_DIE("Invalid set of system call ranges"); |
481 } | 489 } |
482 | 490 |
483 // Pick the range object that is located at the mid point of our list. | 491 // Pick the range object that is located at the mid point of our list. |
484 // We compare our system call number against the lowest valid system call | 492 // We compare our system call number against the lowest valid system call |
485 // number in this range object. If our number is lower, it is outside of | 493 // number in this range object. If our number is lower, it is outside of |
486 // this range object. If it is greater or equal, it might be inside. | 494 // this range object. If it is greater or equal, it might be inside. |
487 Ranges::const_iterator mid = start + (stop - start)/2; | 495 Ranges::const_iterator mid = start + (stop - start)/2; |
488 Program::size_type jmp = program->size(); | 496 Program::size_type jmp = program->size(); |
489 if (jmp >= SECCOMP_MAX_PROGRAM_SIZE) { | 497 if (jmp >= SECCOMP_MAX_PROGRAM_SIZE) { |
490 compiler_err: | 498 compiler_err: |
491 die("Internal compiler error; failed to compile jump table"); | 499 SANDBOX_DIE("Internal compiler error; failed to compile jump table"); |
492 } | 500 } |
493 program->push_back((struct sock_filter) | 501 program->push_back((struct sock_filter) |
494 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, mid->from, | 502 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, mid->from, |
495 // Jump targets are place-holders that will be fixed up later. | 503 // Jump targets are place-holders that will be fixed up later. |
496 0, 0)); | 504 0, 0)); |
497 | 505 |
498 // The comparison turned out to be false; i.e. our system call number is | 506 // The comparison turned out to be false; i.e. our system call number is |
499 // less than the range object at the mid point of the list. | 507 // less than the range object at the mid point of the list. |
500 if (mid - start == 1) { | 508 if (mid - start == 1) { |
501 // If we have narrowed things down to a single range object, we can | 509 // If we have narrowed things down to a single range object, we can |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
535 } | 543 } |
536 | 544 |
537 void Sandbox::emitReturnStatements(Program *program, const RetInsns& rets) { | 545 void Sandbox::emitReturnStatements(Program *program, const RetInsns& rets) { |
538 // Iterate over the list of distinct exit codes from our BPF filter | 546 // Iterate over the list of distinct exit codes from our BPF filter |
539 // program and emit the BPF_RET statements. | 547 // program and emit the BPF_RET statements. |
540 for (RetInsns::const_iterator ret_iter = rets.begin(); | 548 for (RetInsns::const_iterator ret_iter = rets.begin(); |
541 ret_iter != rets.end(); | 549 ret_iter != rets.end(); |
542 ++ret_iter) { | 550 ++ret_iter) { |
543 Program::size_type ip = program->size(); | 551 Program::size_type ip = program->size(); |
544 if (ip >= SECCOMP_MAX_PROGRAM_SIZE) { | 552 if (ip >= SECCOMP_MAX_PROGRAM_SIZE) { |
545 die("Internal compiler error; failed to compile jump table"); | 553 SANDBOX_DIE("Internal compiler error; failed to compile jump table"); |
546 } | 554 } |
547 program->push_back((struct sock_filter) | 555 program->push_back((struct sock_filter) |
548 BPF_STMT(BPF_RET+BPF_K, ret_iter->first)); | 556 BPF_STMT(BPF_RET+BPF_K, ret_iter->first)); |
549 | 557 |
550 // Iterate over the instruction pointers for the BPF_JMP instructions | 558 // Iterate over the instruction pointers for the BPF_JMP instructions |
551 // that need to be patched up. | 559 // that need to be patched up. |
552 for (std::vector<FixUp>::const_iterator insn_iter=ret_iter->second.begin(); | 560 for (std::vector<FixUp>::const_iterator insn_iter=ret_iter->second.begin(); |
553 insn_iter != ret_iter->second.end(); | 561 insn_iter != ret_iter->second.end(); |
554 ++insn_iter) { | 562 ++insn_iter) { |
555 // Jumps are always relative and they are always forward. | 563 // Jumps are always relative and they are always forward. |
556 int distance = ip - insn_iter->addr - 1; | 564 int distance = ip - insn_iter->addr - 1; |
557 if (distance < 0 || distance > 255) { | 565 if (distance < 0 || distance > 255) { |
558 die("Internal compiler error; failed to compile jump table"); | 566 SANDBOX_DIE("Internal compiler error; failed to compile jump table"); |
559 } | 567 } |
560 | 568 |
561 // Decide whether we need to patch up the "true" or the "false" jump | 569 // Decide whether we need to patch up the "true" or the "false" jump |
562 // target. | 570 // target. |
563 if (insn_iter->jt) { | 571 if (insn_iter->jt) { |
564 (*program)[insn_iter->addr].jt = distance; | 572 (*program)[insn_iter->addr].jt = distance; |
565 } else { | 573 } else { |
566 (*program)[insn_iter->addr].jf = distance; | 574 (*program)[insn_iter->addr].jf = distance; |
567 } | 575 } |
568 } | 576 } |
569 } | 577 } |
570 } | 578 } |
571 | 579 |
572 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 580 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
573 // Various sanity checks to make sure we actually received a signal | 581 // Various sanity checks to make sure we actually received a signal |
574 // triggered by a BPF filter. If something else triggered SIGSYS | 582 // triggered by a BPF filter. If something else triggered SIGSYS |
575 // (e.g. kill()), there is really nothing we can do with this signal. | 583 // (e.g. kill()), there is really nothing we can do with this signal. |
576 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || | 584 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || |
577 info->si_errno <= 0 || | 585 info->si_errno <= 0 || |
578 static_cast<size_t>(info->si_errno) > trapArraySize_) { | 586 static_cast<size_t>(info->si_errno) > trapArraySize_) { |
579 // die() can call LOG(FATAL). This is not normally async-signal safe | 587 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal safe |
580 // and can lead to bugs. We should eventually implement a different | 588 // and can lead to bugs. We should eventually implement a different |
581 // logging and reporting mechanism that is safe to be called from | 589 // logging and reporting mechanism that is safe to be called from |
582 // the sigSys() handler. | 590 // the sigSys() handler. |
583 // TODO: If we feel confident that our code otherwise works correctly, we | 591 // TODO: If we feel confident that our code otherwise works correctly, we |
584 // could actually make an argument that spurious SIGSYS should | 592 // could actually make an argument that spurious SIGSYS should |
585 // just get silently ignored. TBD | 593 // just get silently ignored. TBD |
586 sigsys_err: | 594 sigsys_err: |
587 die("Unexpected SIGSYS received"); | 595 SANDBOX_DIE("Unexpected SIGSYS received"); |
588 } | 596 } |
589 | 597 |
590 // Signal handlers should always preserve "errno". Otherwise, we could | 598 // Signal handlers should always preserve "errno". Otherwise, we could |
591 // trigger really subtle bugs. | 599 // trigger really subtle bugs. |
592 int old_errno = errno; | 600 int old_errno = errno; |
593 | 601 |
594 // Obtain the signal context. This, most notably, gives us access to | 602 // Obtain the signal context. This, most notably, gives us access to |
595 // all CPU registers at the time of the signal. | 603 // all CPU registers at the time of the signal. |
596 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | 604 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); |
597 | 605 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
633 // Update the CPU register that stores the return code of the system call | 641 // Update the CPU register that stores the return code of the system call |
634 // that we just handled, and restore "errno" to the value that it had | 642 // that we just handled, and restore "errno" to the value that it had |
635 // before entering the signal handler. | 643 // before entering the signal handler. |
636 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); | 644 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); |
637 errno = old_errno; | 645 errno = old_errno; |
638 | 646 |
639 return; | 647 return; |
640 } | 648 } |
641 | 649 |
642 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { | 650 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { |
643 die(static_cast<char *>(aux)); | 651 SANDBOX_DIE(static_cast<char *>(aux)); |
644 } | 652 } |
645 | 653 |
646 int Sandbox::getTrapId(Sandbox::TrapFnc fnc, const void *aux) { | 654 int Sandbox::getTrapId(Sandbox::TrapFnc fnc, const void *aux) { |
647 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance | 655 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
648 // of a SECCOMP_RET_TRAP. | 656 // of a SECCOMP_RET_TRAP. |
649 std::pair<TrapFnc, const void *> key(fnc, aux); | 657 std::pair<TrapFnc, const void *> key(fnc, aux); |
650 TrapIds::const_iterator iter = trapIds_.find(key); | 658 TrapIds::const_iterator iter = trapIds_.find(key); |
651 if (iter != trapIds_.end()) { | 659 if (iter != trapIds_.end()) { |
652 // We have seen this pair before. Return the same id that we assigned | 660 // We have seen this pair before. Return the same id that we assigned |
653 // earlier. | 661 // earlier. |
654 return iter->second; | 662 return iter->second; |
655 } else { | 663 } else { |
656 // This is a new pair. Remember it and assign a new id. | 664 // This is a new pair. Remember it and assign a new id. |
657 // Please note that we have to store traps in memory that doesn't get | 665 // Please note that we have to store traps in memory that doesn't get |
658 // deallocated when the program is shutting down. A memory leak is | 666 // deallocated when the program is shutting down. A memory leak is |
659 // intentional, because we might otherwise not be able to execute | 667 // intentional, because we might otherwise not be able to execute |
660 // system calls part way through the program shutting down | 668 // system calls part way through the program shutting down |
661 if (!traps_) { | 669 if (!traps_) { |
662 traps_ = new Traps(); | 670 traps_ = new Traps(); |
663 } | 671 } |
664 Traps::size_type id = traps_->size() + 1; | 672 Traps::size_type id = traps_->size() + 1; |
665 if (id > SECCOMP_RET_DATA) { | 673 if (id > SECCOMP_RET_DATA) { |
666 // In practice, this is pretty much impossible to trigger, as there | 674 // In practice, this is pretty much impossible to trigger, as there |
667 // are other kernel limitations that restrict overall BPF program sizes. | 675 // are other kernel limitations that restrict overall BPF program sizes. |
668 die("Too many SECCOMP_RET_TRAP callback instances"); | 676 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); |
669 } | 677 } |
670 | 678 |
671 traps_->push_back(ErrorCode(fnc, aux, id)); | 679 traps_->push_back(ErrorCode(fnc, aux, id)); |
672 trapIds_[key] = id; | 680 trapIds_[key] = id; |
673 | 681 |
674 // We want to access the traps_ vector from our signal handler. But | 682 // We want to access the traps_ vector from our signal handler. But |
675 // we are not assured that doing so is async-signal safe. On the other | 683 // we are not assured that doing so is async-signal safe. On the other |
676 // hand, C++ guarantees that the contents of a vector is stored in a | 684 // hand, C++ guarantees that the contents of a vector is stored in a |
677 // contiguous C-style array. | 685 // contiguous C-style array. |
678 // So, we look up the address and size of this array outside of the | 686 // So, we look up the address and size of this array outside of the |
679 // signal handler, where we can safely do so. | 687 // signal handler, where we can safely do so. |
680 trapArray_ = &(*traps_)[0]; | 688 trapArray_ = &(*traps_)[0]; |
681 trapArraySize_ = id; | 689 trapArraySize_ = id; |
682 return id; | 690 return id; |
683 } | 691 } |
684 } | 692 } |
685 | 693 |
686 bool Sandbox::dryRun_ = false; | |
687 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 694 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
688 int Sandbox::proc_fd_ = -1; | 695 int Sandbox::proc_fd_ = -1; |
689 Sandbox::Evaluators Sandbox::evaluators_; | 696 Sandbox::Evaluators Sandbox::evaluators_; |
690 Sandbox::Traps *Sandbox::traps_ = NULL; | 697 Sandbox::Traps *Sandbox::traps_ = NULL; |
691 Sandbox::TrapIds Sandbox::trapIds_; | 698 Sandbox::TrapIds Sandbox::trapIds_; |
692 Sandbox::ErrorCode *Sandbox::trapArray_ = NULL; | 699 Sandbox::ErrorCode *Sandbox::trapArray_ = NULL; |
693 size_t Sandbox::trapArraySize_ = 0; | 700 size_t Sandbox::trapArraySize_ = 0; |
694 | 701 |
695 } // namespace | 702 } // namespace |
OLD | NEW |