Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(357)

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed comments and fixed death tests Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <endian.h>
6 #if __BYTE_ORDER == __BIG_ENDIAN
jln (very slow on Chromium) 2012/12/14 02:28:02 You don't want to keep something around since it w
7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit
8 // values that need to be inspected by a virtual machine that only ever
9 // operates on 32bit values. The kernel developers decided how values
10 // should be split into two 32bit words to achieve this goal. But at this
11 // time, there is no existing BPF implementation in the kernel that uses
12 // 64bit big endian values. So, all we have to go by is the consensus
13 // from a discussion on LKLM. Actual implementations, if and when they
14 // happen, might very well differ.
15 // If this code is ever going to be used with such a kernel, you should
16 // disable the "#error" and carefully test the code (e.g. run the unit
17 // tests). If things don't work, search for all occurrences of __BYTE_ORDER
18 // and verify that the proposed implementation agrees with what the kernel
19 // actually does.
20 #error Big endian operation is untested and expected to be broken
21 #endif
22
23 #ifndef SECCOMP_BPF_STANDALONE 5 #ifndef SECCOMP_BPF_STANDALONE
24 #include "base/logging.h" 6 #include "base/logging.h"
25 #include "base/posix/eintr_wrapper.h" 7 #include "base/posix/eintr_wrapper.h"
26 #endif 8 #endif
27 9
28 #include "sandbox/linux/seccomp-bpf/codegen.h" 10 #include "sandbox/linux/seccomp-bpf/codegen.h"
29 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 11 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
30 #include "sandbox/linux/seccomp-bpf/syscall.h" 12 #include "sandbox/linux/seccomp-bpf/syscall.h"
31 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" 13 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
32 #include "sandbox/linux/seccomp-bpf/verifier.h" 14 #include "sandbox/linux/seccomp-bpf/verifier.h"
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 54
73 // The kernel gives us a sandbox, we turn it into a playground :-) 55 // The kernel gives us a sandbox, we turn it into a playground :-)
74 // This is version 2 of the playground; version 1 was built on top of 56 // This is version 2 of the playground; version 1 was built on top of
75 // pre-BPF seccomp mode. 57 // pre-BPF seccomp mode.
76 namespace playground2 { 58 namespace playground2 {
77 59
78 const int kExpectedExitCode = 100; 60 const int kExpectedExitCode = 100;
79 61
80 // We define a really simple sandbox policy. It is just good enough for us 62 // We define a really simple sandbox policy. It is just good enough for us
81 // to tell that the sandbox has actually been activated. 63 // to tell that the sandbox has actually been activated.
82 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) { 64 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {
83 switch (sysnum) { 65 switch (sysnum) {
84 case __NR_getpid: 66 case __NR_getpid:
85 // Return EPERM so that we can check that the filter actually ran. 67 // Return EPERM so that we can check that the filter actually ran.
86 return ErrorCode(EPERM); 68 return ErrorCode(EPERM);
87 case __NR_exit_group: 69 case __NR_exit_group:
88 // Allow exit() with a non-default return code. 70 // Allow exit() with a non-default return code.
89 return ErrorCode(ErrorCode::ERR_ALLOWED); 71 return ErrorCode(ErrorCode::ERR_ALLOWED);
90 default: 72 default:
91 // Make everything else fail in an easily recognizable way. 73 // Make everything else fail in an easily recognizable way.
92 return ErrorCode(EINVAL); 74 return ErrorCode(EINVAL);
93 } 75 }
94 } 76 }
95 77
96 void Sandbox::probeProcess(void) { 78 void Sandbox::ProbeProcess(void) {
97 if (syscall(__NR_getpid) < 0 && errno == EPERM) { 79 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
98 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 80 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
99 } 81 }
100 } 82 }
101 83
102 bool Sandbox::isValidSyscallNumber(int sysnum) { 84 bool Sandbox::IsValidSyscallNumber(int sysnum) {
103 return SyscallIterator::IsValid(sysnum); 85 return SyscallIterator::IsValid(sysnum);
104 } 86 }
105 87
106 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) { 88 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {
107 if (!isValidSyscallNumber(sysnum)) { 89 if (!IsValidSyscallNumber(sysnum)) {
108 return ErrorCode(ENOSYS); 90 return ErrorCode(ENOSYS);
109 } 91 }
110 return ErrorCode(ErrorCode::ERR_ALLOWED); 92 return ErrorCode(ErrorCode::ERR_ALLOWED);
111 } 93 }
112 94
113 void Sandbox::tryVsyscallProcess(void) { 95 void Sandbox::TryVsyscallProcess(void) {
114 time_t current_time; 96 time_t current_time;
115 // time() is implemented as a vsyscall. With an older glibc, with 97 // time() is implemented as a vsyscall. With an older glibc, with
116 // vsyscall=emulate and some versions of the seccomp BPF patch 98 // vsyscall=emulate and some versions of the seccomp BPF patch
117 // we may get SIGKILL-ed. Detect this! 99 // we may get SIGKILL-ed. Detect this!
118 if (time(&current_time) != static_cast<time_t>(-1)) { 100 if (time(&current_time) != static_cast<time_t>(-1)) {
119 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 101 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
120 } 102 }
121 } 103 }
122 104
123 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), 105 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),
124 EvaluateSyscall syscallEvaluator, 106 EvaluateSyscall syscall_evaluator,
125 void *aux, 107 void *aux,
126 int proc_fd) { 108 int proc_fd) {
127 // Block all signals before forking a child process. This prevents an 109 // Block all signals before forking a child process. This prevents an
128 // attacker from manipulating our test by sending us an unexpected signal. 110 // attacker from manipulating our test by sending us an unexpected signal.
129 sigset_t oldMask, newMask; 111 sigset_t old_mask, new_mask;
130 if (sigfillset(&newMask) || 112 if (sigfillset(&new_mask) ||
131 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { 113 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
132 SANDBOX_DIE("sigprocmask() failed"); 114 SANDBOX_DIE("sigprocmask() failed");
133 } 115 }
134 int fds[2]; 116 int fds[2];
135 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { 117 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) {
136 SANDBOX_DIE("pipe() failed"); 118 SANDBOX_DIE("pipe() failed");
137 } 119 }
138 120
139 if (fds[0] <= 2 || fds[1] <= 2) { 121 if (fds[0] <= 2 || fds[1] <= 2) {
140 SANDBOX_DIE("Process started without standard file descriptors"); 122 SANDBOX_DIE("Process started without standard file descriptors");
141 } 123 }
142 124
143 pid_t pid = fork(); 125 pid_t pid = fork();
144 if (pid < 0) { 126 if (pid < 0) {
145 // Die if we cannot fork(). We would probably fail a little later 127 // Die if we cannot fork(). We would probably fail a little later
146 // anyway, as the machine is likely very close to running out of 128 // anyway, as the machine is likely very close to running out of
147 // memory. 129 // memory.
148 // But what we don't want to do is return "false", as a crafty 130 // But what we don't want to do is return "false", as a crafty
149 // attacker might cause fork() to fail at will and could trick us 131 // attacker might cause fork() to fail at will and could trick us
150 // into running without a sandbox. 132 // into running without a sandbox.
151 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails 133 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails
152 SANDBOX_DIE("fork() failed unexpectedly"); 134 SANDBOX_DIE("fork() failed unexpectedly");
153 } 135 }
154 136
155 // In the child process 137 // In the child process
156 if (!pid) { 138 if (!pid) {
157 // Test a very simple sandbox policy to verify that we can 139 // Test a very simple sandbox policy to verify that we can
158 // successfully turn on sandboxing. 140 // successfully turn on sandboxing.
159 Die::EnableSimpleExit(); 141 Die::EnableSimpleExit();
160 142
161 errno = 0; 143 errno = 0;
(...skipping 22 matching lines...) Expand all
184 if (HANDLE_EINTR(close(fds[1]))) { 166 if (HANDLE_EINTR(close(fds[1]))) {
185 // This call to close() has been failing in strange ways. See 167 // This call to close() has been failing in strange ways. See
186 // crbug.com/152530. So we only fail in debug mode now. 168 // crbug.com/152530. So we only fail in debug mode now.
187 #if !defined(NDEBUG) 169 #if !defined(NDEBUG)
188 WriteFailedStderrSetupMessage(fds[1]); 170 WriteFailedStderrSetupMessage(fds[1]);
189 SANDBOX_DIE(NULL); 171 SANDBOX_DIE(NULL);
190 #endif 172 #endif
191 } 173 }
192 174
193 evaluators_.clear(); 175 evaluators_.clear();
194 setSandboxPolicy(syscallEvaluator, aux); 176 SetSandboxPolicy(syscall_evaluator, aux);
195 setProcFd(proc_fd); 177 SetProcFd(proc_fd);
196 178
197 // By passing "quiet=true" to "startSandboxInternal()" we suppress 179 // By passing "quiet=true" to "startSandboxInternal()" we suppress
198 // messages for expected and benign failures (e.g. if the current 180 // messages for expected and benign failures (e.g. if the current
199 // kernel lacks support for BPF filters). 181 // kernel lacks support for BPF filters).
200 startSandboxInternal(true); 182 StartSandboxInternal(true);
201 183
202 // Run our code in the sandbox. 184 // Run our code in the sandbox.
203 CodeInSandbox(); 185 code_in_sandbox();
204 186
205 // CodeInSandbox() is not supposed to return here. 187 // code_in_sandbox() is not supposed to return here.
206 SANDBOX_DIE(NULL); 188 SANDBOX_DIE(NULL);
207 } 189 }
208 190
209 // In the parent process. 191 // In the parent process.
210 if (HANDLE_EINTR(close(fds[1]))) { 192 if (HANDLE_EINTR(close(fds[1]))) {
211 SANDBOX_DIE("close() failed"); 193 SANDBOX_DIE("close() failed");
212 } 194 }
213 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { 195 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
214 SANDBOX_DIE("sigprocmask() failed"); 196 SANDBOX_DIE("sigprocmask() failed");
215 } 197 }
216 int status; 198 int status;
217 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { 199 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
218 SANDBOX_DIE("waitpid() failed unexpectedly"); 200 SANDBOX_DIE("waitpid() failed unexpectedly");
219 } 201 }
220 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; 202 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;
221 203
222 // If we fail to support sandboxing, there might be an additional 204 // If we fail to support sandboxing, there might be an additional
223 // error message. If so, this was an entirely unexpected and fatal 205 // error message. If so, this was an entirely unexpected and fatal
(...skipping 11 matching lines...) Expand all
235 SANDBOX_DIE(buf); 217 SANDBOX_DIE(buf);
236 } 218 }
237 } 219 }
238 if (HANDLE_EINTR(close(fds[0]))) { 220 if (HANDLE_EINTR(close(fds[0]))) {
239 SANDBOX_DIE("close() failed"); 221 SANDBOX_DIE("close() failed");
240 } 222 }
241 223
242 return rc; 224 return rc;
243 } 225 }
244 226
245 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { 227 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {
246 #if defined(SECCOMP_BPF_VALGRIND_HACKS) 228 #if defined(SECCOMP_BPF_VALGRIND_HACKS)
247 if (RUNNING_ON_VALGRIND) { 229 if (RUNNING_ON_VALGRIND) {
248 // Valgrind doesn't like our run-time test. Disable testing and assume we 230 // Valgrind doesn't like our run-time test. Disable testing and assume we
249 // always support sandboxing. This feature should only ever be enabled when 231 // always support sandboxing. This feature should only ever be enabled when
250 // debugging. 232 // debugging.
251 return true; 233 return true;
252 } 234 }
253 #endif 235 #endif
254 236
255 return 237 return
256 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) && 238 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&
257 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0, 239 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,
258 proc_fd); 240 proc_fd);
259 } 241 }
260 242
261 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { 243 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {
262 // It the sandbox is currently active, we clearly must have support for 244 // It the sandbox is currently active, we clearly must have support for
263 // sandboxing. 245 // sandboxing.
264 if (status_ == STATUS_ENABLED) { 246 if (status_ == STATUS_ENABLED) {
265 return status_; 247 return status_;
266 } 248 }
267 249
268 // Even if the sandbox was previously available, something might have 250 // Even if the sandbox was previously available, something might have
269 // changed in our run-time environment. Check one more time. 251 // changed in our run-time environment. Check one more time.
270 if (status_ == STATUS_AVAILABLE) { 252 if (status_ == STATUS_AVAILABLE) {
271 if (!isSingleThreaded(proc_fd)) { 253 if (!IsSingleThreaded(proc_fd)) {
272 status_ = STATUS_UNAVAILABLE; 254 status_ = STATUS_UNAVAILABLE;
273 } 255 }
274 return status_; 256 return status_;
275 } 257 }
276 258
277 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) { 259 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
278 // All state transitions resulting in STATUS_UNAVAILABLE are immediately 260 // All state transitions resulting in STATUS_UNAVAILABLE are immediately
279 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all 261 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
280 // happen, if and only if they are triggered by the process being multi- 262 // happen, if and only if they are triggered by the process being multi-
281 // threaded. 263 // threaded.
282 // In other words, if a single-threaded process is currently in the 264 // In other words, if a single-threaded process is currently in the
283 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is 265 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
284 // actually available. 266 // actually available.
285 status_ = STATUS_AVAILABLE; 267 status_ = STATUS_AVAILABLE;
286 return status_; 268 return status_;
287 } 269 }
288 270
289 // If we have not previously checked for availability of the sandbox or if 271 // If we have not previously checked for availability of the sandbox or if
290 // we otherwise don't believe to have a good cached value, we have to 272 // we otherwise don't believe to have a good cached value, we have to
291 // perform a thorough check now. 273 // perform a thorough check now.
292 if (status_ == STATUS_UNKNOWN) { 274 if (status_ == STATUS_UNKNOWN) {
293 status_ = kernelSupportSeccompBPF(proc_fd) 275 status_ = KernelSupportSeccompBPF(proc_fd)
294 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; 276 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
295 277
296 // As we are performing our tests from a child process, the run-time 278 // As we are performing our tests from a child process, the run-time
297 // environment that is visible to the sandbox is always guaranteed to be 279 // environment that is visible to the sandbox is always guaranteed to be
298 // single-threaded. Let's check here whether the caller is single- 280 // single-threaded. Let's check here whether the caller is single-
299 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. 281 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
300 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { 282 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
301 status_ = STATUS_UNAVAILABLE; 283 status_ = STATUS_UNAVAILABLE;
302 } 284 }
303 } 285 }
304 return status_; 286 return status_;
305 } 287 }
306 288
307 void Sandbox::setProcFd(int proc_fd) { 289 void Sandbox::SetProcFd(int proc_fd) {
jln (very slow on Chromium) 2012/12/14 02:28:02 Should be set_proc_fd() since it's a mutator.
308 proc_fd_ = proc_fd; 290 proc_fd_ = proc_fd;
309 } 291 }
310 292
311 void Sandbox::startSandboxInternal(bool quiet) { 293 void Sandbox::StartSandboxInternal(bool quiet) {
312 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { 294 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
313 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " 295 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "
314 "unavailable"); 296 "unavailable");
315 } else if (status_ == STATUS_ENABLED) { 297 } else if (status_ == STATUS_ENABLED) {
316 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " 298 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "
317 "setSandboxPolicy() to stack policies instead"); 299 "setSandboxPolicy() to stack policies instead");
318 } 300 }
319 if (proc_fd_ < 0) { 301 if (proc_fd_ < 0) {
320 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); 302 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
321 } 303 }
322 if (proc_fd_ < 0) { 304 if (proc_fd_ < 0) {
323 // For now, continue in degraded mode, if we can't access /proc. 305 // For now, continue in degraded mode, if we can't access /proc.
324 // In the future, we might want to tighten this requirement. 306 // In the future, we might want to tighten this requirement.
325 } 307 }
326 if (!isSingleThreaded(proc_fd_)) { 308 if (!IsSingleThreaded(proc_fd_)) {
327 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); 309 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");
328 } 310 }
329 311
330 // We no longer need access to any files in /proc. We want to do this 312 // We no longer need access to any files in /proc. We want to do this
331 // before installing the filters, just in case that our policy denies 313 // before installing the filters, just in case that our policy denies
332 // close(). 314 // close().
333 if (proc_fd_ >= 0) { 315 if (proc_fd_ >= 0) {
334 if (HANDLE_EINTR(close(proc_fd_))) { 316 if (HANDLE_EINTR(close(proc_fd_))) {
335 SANDBOX_DIE("Failed to close file descriptor for /proc"); 317 SANDBOX_DIE("Failed to close file descriptor for /proc");
336 } 318 }
337 proc_fd_ = -1; 319 proc_fd_ = -1;
338 } 320 }
339 321
340 // Install the filters. 322 // Install the filters.
341 installFilter(quiet); 323 InstallFilter(quiet);
342 324
343 // We are now inside the sandbox. 325 // We are now inside the sandbox.
344 status_ = STATUS_ENABLED; 326 status_ = STATUS_ENABLED;
345 } 327 }
346 328
347 bool Sandbox::isSingleThreaded(int proc_fd) { 329 bool Sandbox::IsSingleThreaded(int proc_fd) {
348 if (proc_fd < 0) { 330 if (proc_fd < 0) {
349 // Cannot determine whether program is single-threaded. Hope for 331 // Cannot determine whether program is single-threaded. Hope for
350 // the best... 332 // the best...
351 return true; 333 return true;
352 } 334 }
353 335
354 struct stat sb; 336 struct stat sb;
355 int task = -1; 337 int task = -1;
356 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || 338 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
357 fstat(task, &sb) != 0 || 339 fstat(task, &sb) != 0 ||
358 sb.st_nlink != 3 || 340 sb.st_nlink != 3 ||
359 HANDLE_EINTR(close(task))) { 341 HANDLE_EINTR(close(task))) {
360 if (task >= 0) { 342 if (task >= 0) {
361 if (HANDLE_EINTR(close(task))) { } 343 if (HANDLE_EINTR(close(task))) { }
362 } 344 }
363 return false; 345 return false;
364 } 346 }
365 return true; 347 return true;
366 } 348 }
367 349
368 bool Sandbox::isDenied(const ErrorCode& code) { 350 bool Sandbox::IsDenied(const ErrorCode& code) {
369 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || 351 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
370 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && 352 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
371 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); 353 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
372 } 354 }
373 355
374 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, 356 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,
375 void *aux) { 357 void *aux) {
376 for (SyscallIterator iter(true); !iter.Done(); ) { 358 for (SyscallIterator iter(true); !iter.Done(); ) {
377 uint32_t sysnum = iter.Next(); 359 uint32_t sysnum = iter.Next();
378 if (!isDenied(syscallEvaluator(sysnum, aux))) { 360 if (!IsDenied(syscall_evaluator(sysnum, aux))) {
379 SANDBOX_DIE("Policies should deny system calls that are outside the " 361 SANDBOX_DIE("Policies should deny system calls that are outside the "
380 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); 362 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
381 } 363 }
382 } 364 }
383 return; 365 return;
384 } 366 }
385 367
386 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { 368 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
387 if (BPF_CLASS(insn->code) == BPF_RET && 369 if (BPF_CLASS(insn->code) == BPF_RET &&
388 insn->k > SECCOMP_RET_TRAP && 370 insn->k > SECCOMP_RET_TRAP &&
389 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) { 371 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) {
390 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; 372 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1];
391 if (!err.safe_) { 373 if (!err.safe_) {
392 bool *is_unsafe = static_cast<bool *>(aux); 374 bool *is_unsafe = static_cast<bool *>(aux);
393 *is_unsafe = true; 375 *is_unsafe = true;
394 } 376 }
395 } 377 }
396 } 378 }
397 379
398 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { 380 void Sandbox::RedirectToUserspace(Instruction *insn, void *) {
399 // When inside an UnsafeTrap() callback, we want to allow all system calls. 381 // When inside an UnsafeTrap() callback, we want to allow all system calls.
400 // This means, we must conditionally disable the sandbox -- and that's not 382 // This means, we must conditionally disable the sandbox -- and that's not
401 // something that kernel-side BPF filters can do, as they cannot inspect 383 // something that kernel-side BPF filters can do, as they cannot inspect
402 // any state other than the syscall arguments. 384 // any state other than the syscall arguments.
403 // But if we redirect all error handlers to user-space, then we can easily 385 // But if we redirect all error handlers to user-space, then we can easily
404 // make this decision. 386 // make this decision.
405 // The performance penalty for this extra round-trip to user-space is not 387 // The performance penalty for this extra round-trip to user-space is not
406 // actually that bad, as we only ever pay it for denied system calls; and a 388 // actually that bad, as we only ever pay it for denied system calls; and a
407 // typical program has very few of these. 389 // typical program has very few of these.
408 if (BPF_CLASS(insn->code) == BPF_RET && 390 if (BPF_CLASS(insn->code) == BPF_RET &&
409 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 391 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
410 insn->k = Trap(ReturnErrno, 392 insn->k = Trap(ReturnErrno,
411 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); 393 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
412 } 394 }
413 } 395 }
414 396
415 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { 397 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
416 // We need to replicate the behavior of RedirectToUserspace(), so that our 398 // We need to replicate the behavior of RedirectToUserspace(), so that our
417 // Verifier can still work correctly. 399 // Verifier can still work correctly.
418 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); 400 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
419 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); 401 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
420 ErrorCode err = evaluator.first(sysnum, evaluator.second); 402 ErrorCode err = evaluator.first(sysnum, evaluator.second);
421 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 403 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
422 return Trap(ReturnErrno, 404 return Trap(ReturnErrno,
423 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); 405 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
424 } 406 }
425 return err; 407 return err;
426 } 408 }
427 409
428 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { 410 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {
429 if (status_ == STATUS_ENABLED) { 411 if (status_ == STATUS_ENABLED) {
430 SANDBOX_DIE("Cannot change policy after sandbox has started"); 412 SANDBOX_DIE("Cannot change policy after sandbox has started");
431 } 413 }
432 policySanityChecks(syscallEvaluator, aux); 414 PolicySanityChecks(syscall_evaluator, aux);
433 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); 415 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));
434 } 416 }
435 417
436 void Sandbox::installFilter(bool quiet) { 418 void Sandbox::InstallFilter(bool quiet) {
437 // Verify that the user pushed a policy. 419 // Verify that the user pushed a policy.
438 if (evaluators_.empty()) { 420 if (evaluators_.empty()) {
439 filter_failed: 421 filter_failed:
440 SANDBOX_DIE("Failed to configure system call filters"); 422 SANDBOX_DIE("Failed to configure system call filters");
441 } 423 }
442 424
443 // Set new SIGSYS handler 425 // Set new SIGSYS handler
444 struct sigaction sa; 426 struct sigaction sa;
445 memset(&sa, 0, sizeof(sa)); 427 memset(&sa, 0, sizeof(sa));
446 sa.sa_sigaction = sigSys; 428 sa.sa_sigaction = SigSys;
447 sa.sa_flags = SA_SIGINFO | SA_NODEFER; 429 sa.sa_flags = SA_SIGINFO | SA_NODEFER;
448 if (sigaction(SIGSYS, &sa, NULL) < 0) { 430 if (sigaction(SIGSYS, &sa, NULL) < 0) {
449 goto filter_failed; 431 goto filter_failed;
450 } 432 }
451 433
452 // Unmask SIGSYS 434 // Unmask SIGSYS
453 sigset_t mask; 435 sigset_t mask;
454 if (sigemptyset(&mask) || 436 if (sigemptyset(&mask) ||
455 sigaddset(&mask, SIGSYS) || 437 sigaddset(&mask, SIGSYS) ||
456 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { 438 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
457 goto filter_failed; 439 goto filter_failed;
458 } 440 }
459 441
460 // We can't handle stacked evaluators, yet. We'll get there eventually 442 // We can't handle stacked evaluators, yet. We'll get there eventually
461 // though. Hang tight. 443 // though. Hang tight.
462 if (evaluators_.size() != 1) { 444 if (evaluators_.size() != 1) {
463 SANDBOX_DIE("Not implemented"); 445 SANDBOX_DIE("Not implemented");
464 } 446 }
465 447
466 // Assemble the BPF filter program. 448 // Assemble the BPF filter program.
467 CodeGen *gen = new CodeGen(); 449 CodeGen *gen = new CodeGen();
468 if (!gen) { 450 if (!gen) {
469 SANDBOX_DIE("Out of memory"); 451 SANDBOX_DIE("Out of memory");
470 } 452 }
471 453
472 // If the architecture doesn't match SECCOMP_ARCH, disallow the 454 // If the architecture doesn't match SECCOMP_ARCH, disallow the
473 // system call. 455 // system call.
474 Instruction *tail; 456 Instruction *tail;
475 Instruction *head = 457 Instruction *head =
476 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 458 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_ARCH_IDX,
477 offsetof(struct arch_seccomp_data, arch),
478 tail = 459 tail =
479 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 460 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
480 NULL, 461 NULL,
481 gen->MakeInstruction(BPF_RET+BPF_K, 462 gen->MakeInstruction(BPF_RET+BPF_K,
482 Kill( 463 Kill("Invalid audit architecture in BPF filter"))));
483 "Invalid audit architecture in BPF filter").err_)));
484 464
485 { 465 {
486 // Evaluate all possible system calls and group their ErrorCodes into 466 // Evaluate all possible system calls and group their ErrorCodes into
487 // ranges of identical codes. 467 // ranges of identical codes.
488 Ranges ranges; 468 Ranges ranges;
489 findRanges(&ranges); 469 FindRanges(&ranges);
490 470
491 // Compile the system call ranges to an optimized BPF jumptable 471 // Compile the system call ranges to an optimized BPF jumptable
492 Instruction *jumptable = 472 Instruction *jumptable =
493 assembleJumpTable(gen, ranges.begin(), ranges.end()); 473 AssembleJumpTable(gen, ranges.begin(), ranges.end());
494 474
495 // If there is at least one UnsafeTrap() in our program, the entire sandbox 475 // If there is at least one UnsafeTrap() in our program, the entire sandbox
496 // is unsafe. We need to modify the program so that all non- 476 // is unsafe. We need to modify the program so that all non-
497 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then 477 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then
498 // allow us to temporarily disable sandboxing rules inside of callbacks to 478 // allow us to temporarily disable sandboxing rules inside of callbacks to
499 // UnsafeTrap(). 479 // UnsafeTrap().
500 has_unsafe_traps_ = false; 480 has_unsafe_traps_ = false;
501 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); 481 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);
502 482
503 // Grab the system call number, so that we can implement jump tables. 483 // Grab the system call number, so that we can implement jump tables.
504 Instruction *load_nr = 484 Instruction *load_nr =
505 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 485 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_NR_IDX);
506 offsetof(struct arch_seccomp_data, nr));
507 486
508 // If our BPF program has unsafe jumps, enable support for them. This 487 // If our BPF program has unsafe jumps, enable support for them. This
509 // test happens very early in the BPF filter program. Even before we 488 // test happens very early in the BPF filter program. Even before we
510 // consider looking at system call numbers. 489 // consider looking at system call numbers.
511 // As support for unsafe jumps essentially defeats all the security 490 // As support for unsafe jumps essentially defeats all the security
512 // measures that the sandbox provides, we print a big warning message -- 491 // measures that the sandbox provides, we print a big warning message --
513 // and of course, we make sure to only ever enable this feature if it 492 // and of course, we make sure to only ever enable this feature if it
514 // is actually requested by the sandbox policy. 493 // is actually requested by the sandbox policy.
515 if (has_unsafe_traps_) { 494 if (has_unsafe_traps_) {
516 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { 495 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {
(...skipping 30 matching lines...) Expand all
547 uint32_t low = static_cast<uint32_t>(syscall_entry_point); 526 uint32_t low = static_cast<uint32_t>(syscall_entry_point);
548 #if __SIZEOF_POINTER__ > 4 527 #if __SIZEOF_POINTER__ > 4
549 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); 528 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
550 #endif 529 #endif
551 530
552 // BPF cannot do native 64bit comparisons. On 64bit architectures, we 531 // BPF cannot do native 64bit comparisons. On 64bit architectures, we
553 // have to compare both 32bit halfs of the instruction pointer. If they 532 // have to compare both 32bit halfs of the instruction pointer. If they
554 // match what we expect, we return ERR_ALLOWED. If either or both don't 533 // match what we expect, we return ERR_ALLOWED. If either or both don't
555 // match, we continue evalutating the rest of the sandbox policy. 534 // match, we continue evalutating the rest of the sandbox policy.
556 Instruction *escape_hatch = 535 Instruction *escape_hatch =
557 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 536 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_LSB_IDX,
558 offsetof(struct arch_seccomp_data,
559 instruction_pointer) +
560 (__SIZEOF_POINTER__ > 4 &&
561 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),
562 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, 537 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,
563 #if __SIZEOF_POINTER__ > 4 538 #if __SIZEOF_POINTER__ > 4
564 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 539 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_MSB_IDX,
565 offsetof(struct arch_seccomp_data,
566 instruction_pointer) +
567 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),
568 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, 540 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,
569 #endif 541 #endif
570 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), 542 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),
571 #if __SIZEOF_POINTER__ > 4 543 #if __SIZEOF_POINTER__ > 4
572 load_nr)), 544 load_nr)),
573 #endif 545 #endif
574 load_nr)); 546 load_nr));
575 gen->JoinInstructions(tail, escape_hatch); 547 gen->JoinInstructions(tail, escape_hatch);
576 } else { 548 } else {
577 gen->JoinInstructions(tail, load_nr); 549 gen->JoinInstructions(tail, load_nr);
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
639 // system memory allocator that is in effect, these operators can result 611 // system memory allocator that is in effect, these operators can result
640 // in system calls to things like munmap() or brk(). 612 // in system calls to things like munmap() or brk().
641 struct sock_filter bpf[program->size()]; 613 struct sock_filter bpf[program->size()];
642 const struct sock_fprog prog = { 614 const struct sock_fprog prog = {
643 static_cast<unsigned short>(program->size()), bpf }; 615 static_cast<unsigned short>(program->size()), bpf };
644 memcpy(bpf, &(*program)[0], sizeof(bpf)); 616 memcpy(bpf, &(*program)[0], sizeof(bpf));
645 delete program; 617 delete program;
646 618
647 // Release memory that is no longer needed 619 // Release memory that is no longer needed
648 evaluators_.clear(); 620 evaluators_.clear();
621 conds_.clear();
649 622
650 #if defined(SECCOMP_BPF_VALGRIND_HACKS) 623 #if defined(SECCOMP_BPF_VALGRIND_HACKS)
651 // Valgrind is really not happy about our sandbox. Disable it when running 624 // Valgrind is really not happy about our sandbox. Disable it when running
652 // in Valgrind. This feature is dangerous and should never be enabled by 625 // in Valgrind. This feature is dangerous and should never be enabled by
653 // default. We protect it behind a pre-processor option. 626 // default. We protect it behind a pre-processor option.
654 if (!RUNNING_ON_VALGRIND) 627 if (!RUNNING_ON_VALGRIND)
655 #endif 628 #endif
656 { 629 {
657 // Install BPF filter program 630 // Install BPF filter program
658 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 631 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
659 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); 632 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");
660 } else { 633 } else {
661 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { 634 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
662 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); 635 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");
663 } 636 }
664 } 637 }
665 } 638 }
666 639
667 return; 640 return;
668 } 641 }
669 642
670 void Sandbox::findRanges(Ranges *ranges) { 643 void Sandbox::FindRanges(Ranges *ranges) {
671 // Please note that "struct seccomp_data" defines system calls as a signed 644 // Please note that "struct seccomp_data" defines system calls as a signed
672 // int32_t, but BPF instructions always operate on unsigned quantities. We 645 // int32_t, but BPF instructions always operate on unsigned quantities. We
673 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, 646 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
674 // and then verifying that the rest of the number range (both positive and 647 // and then verifying that the rest of the number range (both positive and
675 // negative) all return the same ErrorCode. 648 // negative) all return the same ErrorCode.
676 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; 649 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;
677 void *aux = evaluators_.begin()->second; 650 void *aux = evaluators_.begin()->second;
678 uint32_t oldSysnum = 0; 651 uint32_t old_sysnum = 0;
679 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux); 652 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);
680 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux); 653 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);
681 for (SyscallIterator iter(false); !iter.Done(); ) { 654 for (SyscallIterator iter(false); !iter.Done(); ) {
682 uint32_t sysnum = iter.Next(); 655 uint32_t sysnum = iter.Next();
683 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux); 656 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);
684 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) { 657 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {
685 // A proper sandbox policy should always treat system calls outside of 658 // A proper sandbox policy should always treat system calls outside of
686 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns 659 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns
687 // "false" for SyscallIterator::IsValid()) identically. Typically, all 660 // "false" for SyscallIterator::IsValid()) identically. Typically, all
688 // of these system calls would be denied with the same ErrorCode. 661 // of these system calls would be denied with the same ErrorCode.
689 SANDBOX_DIE("Invalid seccomp policy"); 662 SANDBOX_DIE("Invalid seccomp policy");
690 } 663 }
691 if (!err.Equals(oldErr) || iter.Done()) { 664 if (!err.Equals(old_err) || iter.Done()) {
692 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr)); 665 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));
693 oldSysnum = sysnum; 666 old_sysnum = sysnum;
694 oldErr = err; 667 old_err = err;
695 } 668 }
696 } 669 }
697 } 670 }
698 671
699 Instruction *Sandbox::assembleJumpTable(CodeGen *gen, 672 Instruction *Sandbox::AssembleJumpTable(CodeGen *gen,
700 Ranges::const_iterator start, 673 Ranges::const_iterator start,
701 Ranges::const_iterator stop) { 674 Ranges::const_iterator stop) {
702 // We convert the list of system call ranges into jump table that performs 675 // We convert the list of system call ranges into jump table that performs
703 // a binary search over the ranges. 676 // a binary search over the ranges.
704 // As a sanity check, we need to have at least one distinct ranges for us 677 // As a sanity check, we need to have at least one distinct ranges for us
705 // to be able to build a jump table. 678 // to be able to build a jump table.
706 if (stop - start <= 0) { 679 if (stop - start <= 0) {
707 SANDBOX_DIE("Invalid set of system call ranges"); 680 SANDBOX_DIE("Invalid set of system call ranges");
708 } else if (stop - start == 1) { 681 } else if (stop - start == 1) {
709 // If we have narrowed things down to a single range object, we can 682 // If we have narrowed things down to a single range object, we can
710 // return from the BPF filter program. 683 // return from the BPF filter program.
711 return gen->MakeInstruction(BPF_RET+BPF_K, start->err); 684 return RetExpression(gen, start->err);
712 } 685 }
713 686
714 // Pick the range object that is located at the mid point of our list. 687 // Pick the range object that is located at the mid point of our list.
715 // We compare our system call number against the lowest valid system call 688 // We compare our system call number against the lowest valid system call
716 // number in this range object. If our number is lower, it is outside of 689 // number in this range object. If our number is lower, it is outside of
717 // this range object. If it is greater or equal, it might be inside. 690 // this range object. If it is greater or equal, it might be inside.
718 Ranges::const_iterator mid = start + (stop - start)/2; 691 Ranges::const_iterator mid = start + (stop - start)/2;
719 692
720 // Sub-divide the list of ranges and continue recursively. 693 // Sub-divide the list of ranges and continue recursively.
721 Instruction *jf = assembleJumpTable(gen, start, mid); 694 Instruction *jf = AssembleJumpTable(gen, start, mid);
722 Instruction *jt = assembleJumpTable(gen, mid, stop); 695 Instruction *jt = AssembleJumpTable(gen, mid, stop);
723 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); 696 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);
724 } 697 }
725 698
726 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { 699 Instruction *Sandbox::RetExpression(CodeGen *gen, const ErrorCode& cond) {
700 if (cond.error_type_ == ErrorCode::ET_COND) {
701 return CondExpression(gen, cond);
702 } else {
703 return gen->MakeInstruction(BPF_RET+BPF_K, cond);
704 }
705 }
706
707 Instruction *Sandbox::CondExpression(CodeGen *gen, const ErrorCode& cond) {
708 // We can only inspect the six system call arguments that are passed in
709 // CPU registers.
710 if (cond.argno_ < 0 || cond.argno_ >= 6) {
711 SANDBOX_DIE("Internal compiler error; invalid argument number "
712 "encountered");
713 }
714
715 // BPF programs operate on 32bit entities. Load both halfs of the 64bit
jln (very slow on Chromium) 2012/12/14 02:28:02 s/halfs/halves.
716 // system call argument and then generate suitable conditional statements.
717 Instruction *msb_head =
718 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
719 SECCOMP_ARG_MSB_IDX(cond.argno_));
720 Instruction *msb_tail = msb_head;
721 Instruction *lsb_head =
722 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
723 SECCOMP_ARG_LSB_IDX(cond.argno_));
724 Instruction *lsb_tail = lsb_head;
725
726 // Emit a suitable comparison statement.
727 switch (cond.op_) {
728 case ErrorCode::OP_EQUAL:
729 // Compare the least significant bits for equality
730 lsb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,
731 static_cast<uint32_t>(cond.value_),
732 RetExpression(gen, *cond.passed_),
733 RetExpression(gen, *cond.failed_));
734 gen->JoinInstructions(lsb_head, lsb_tail);
735
736 // If we are looking at a 64bit argument, we need to also compare the
737 // most significant bits.
738 if (cond.width_ == ErrorCode::TP_64BIT) {
739 msb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,
740 static_cast<uint32_t>(cond.value_ >> 32),
741 NULL,
742 RetExpression(gen, *cond.failed_));
743 gen->JoinInstructions(msb_head, msb_tail);
744 }
745 break;
746 default:
747 // TODO(markus): We can only check for equality so far.
748 SANDBOX_DIE("Not implemented");
749 break;
750 }
751
752 // Ensure that we never pass a 64bit value, when we only expect a 32bit
753 // value. This is somewhat complicated by the fact that on 64bit systems,
754 // callers could legitimately pass in a non-zero value in the MSB, iff the
755 // LSB has been sign-extended into the MSB.
756 if (cond.width_ == ErrorCode::TP_32BIT) {
757 if (cond.value_ >> 32) {
758 SANDBOX_DIE("Invalid comparison of a 32bit system call argument "
759 "against a 64bit constant; this test is always false.");
760 }
761
762 Instruction *invalid_64bit = RetExpression(gen, Unexpected64bitArgument());
763 #if __SIZEOF_POINTER__ > 4
jln (very slow on Chromium) 2012/12/14 02:28:02 I'll trust the test on this, my brain is not worki
764 invalid_64bit =
765 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0xFFFFFFFF,
766 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
767 SECCOMP_ARG_LSB_IDX(cond.argno_),
768 gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, 0x80000000,
769 lsb_head,
770 invalid_64bit)),
771 invalid_64bit);
772 #endif
773 gen->JoinInstructions(
774 msb_tail,
775 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0,
776 lsb_head,
777 invalid_64bit));
778 } else {
779 gen->JoinInstructions(msb_tail, lsb_head);
780 }
781
782 return msb_head;
783 }
784
785 ErrorCode Sandbox::Unexpected64bitArgument() {
786 return Kill("Unexpected 64bit argument detected");
787 }
788
789 void Sandbox::SigSys(int nr, siginfo_t *info, void *void_context) {
727 // Various sanity checks to make sure we actually received a signal 790 // Various sanity checks to make sure we actually received a signal
728 // triggered by a BPF filter. If something else triggered SIGSYS 791 // triggered by a BPF filter. If something else triggered SIGSYS
729 // (e.g. kill()), there is really nothing we can do with this signal. 792 // (e.g. kill()), there is really nothing we can do with this signal.
730 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || 793 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context ||
731 info->si_errno <= 0 || 794 info->si_errno <= 0 ||
732 static_cast<size_t>(info->si_errno) > trapArraySize_) { 795 static_cast<size_t>(info->si_errno) > trap_array_size_) {
733 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal 796 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
734 // safe and can lead to bugs. We should eventually implement a different 797 // safe and can lead to bugs. We should eventually implement a different
735 // logging and reporting mechanism that is safe to be called from 798 // logging and reporting mechanism that is safe to be called from
736 // the sigSys() handler. 799 // the sigSys() handler.
737 // TODO: If we feel confident that our code otherwise works correctly, we 800 // TODO: If we feel confident that our code otherwise works correctly, we
738 // could actually make an argument that spurious SIGSYS should 801 // could actually make an argument that spurious SIGSYS should
739 // just get silently ignored. TBD 802 // just get silently ignored. TBD
740 sigsys_err: 803 sigsys_err:
741 SANDBOX_DIE("Unexpected SIGSYS received"); 804 SANDBOX_DIE("Unexpected SIGSYS received");
742 } 805 }
(...skipping 23 matching lines...) Expand all
766 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { 829 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
767 errno = old_errno; 830 errno = old_errno;
768 if (sigsys.nr == __NR_clone) { 831 if (sigsys.nr == __NR_clone) {
769 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); 832 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");
770 } 833 }
771 rc = SandboxSyscall(sigsys.nr, 834 rc = SandboxSyscall(sigsys.nr,
772 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), 835 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),
773 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), 836 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),
774 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); 837 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));
775 } else { 838 } else {
776 const ErrorCode& err = trapArray_[info->si_errno - 1]; 839 const ErrorCode& err = trap_array_[info->si_errno - 1];
777 if (!err.safe_) { 840 if (!err.safe_) {
778 SetIsInSigHandler(); 841 SetIsInSigHandler();
779 } 842 }
780 843
781 // Copy the seccomp-specific data into a arch_seccomp_data structure. This 844 // Copy the seccomp-specific data into a arch_seccomp_data structure. This
782 // is what we are showing to TrapFnc callbacks that the system call 845 // is what we are showing to TrapFnc callbacks that the system call
783 // evaluator registered with the sandbox. 846 // evaluator registered with the sandbox.
784 struct arch_seccomp_data data = { 847 struct arch_seccomp_data data = {
785 sigsys.nr, 848 sigsys.nr,
786 SECCOMP_ARCH, 849 SECCOMP_ARCH,
(...skipping 30 matching lines...) Expand all
817 } else { 880 } else {
818 return safe < o.safe; 881 return safe < o.safe;
819 } 882 }
820 } 883 }
821 884
822 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, 885 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
823 bool safe) { 886 bool safe) {
824 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance 887 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
825 // of a SECCOMP_RET_TRAP. 888 // of a SECCOMP_RET_TRAP.
826 TrapKey key(fnc, aux, safe); 889 TrapKey key(fnc, aux, safe);
827 TrapIds::const_iterator iter = trapIds_.find(key); 890 TrapIds::const_iterator iter = trap_ids_.find(key);
828 uint16_t id; 891 uint16_t id;
829 if (iter != trapIds_.end()) { 892 if (iter != trap_ids_.end()) {
830 // We have seen this pair before. Return the same id that we assigned 893 // We have seen this pair before. Return the same id that we assigned
831 // earlier. 894 // earlier.
832 id = iter->second; 895 id = iter->second;
833 } else { 896 } else {
834 // This is a new pair. Remember it and assign a new id. 897 // This is a new pair. Remember it and assign a new id.
835 // Please note that we have to store traps in memory that doesn't get 898 // Please note that we have to store traps in memory that doesn't get
836 // deallocated when the program is shutting down. A memory leak is 899 // deallocated when the program is shutting down. A memory leak is
837 // intentional, because we might otherwise not be able to execute 900 // intentional, because we might otherwise not be able to execute
838 // system calls part way through the program shutting down 901 // system calls part way through the program shutting down
839 if (!traps_) { 902 if (!traps_) {
840 traps_ = new Traps(); 903 traps_ = new Traps();
841 } 904 }
842 if (traps_->size() >= SECCOMP_RET_DATA) { 905 if (traps_->size() >= SECCOMP_RET_DATA) {
843 // In practice, this is pretty much impossible to trigger, as there 906 // In practice, this is pretty much impossible to trigger, as there
844 // are other kernel limitations that restrict overall BPF program sizes. 907 // are other kernel limitations that restrict overall BPF program sizes.
845 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); 908 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
846 } 909 }
847 id = traps_->size() + 1; 910 id = traps_->size() + 1;
848 911
849 traps_->push_back(ErrorCode(fnc, aux, safe, id)); 912 traps_->push_back(ErrorCode(fnc, aux, safe, id));
850 trapIds_[key] = id; 913 trap_ids_[key] = id;
851 914
852 // We want to access the traps_ vector from our signal handler. But 915 // We want to access the traps_ vector from our signal handler. But
853 // we are not assured that doing so is async-signal safe. On the other 916 // we are not assured that doing so is async-signal safe. On the other
854 // hand, C++ guarantees that the contents of a vector is stored in a 917 // hand, C++ guarantees that the contents of a vector is stored in a
855 // contiguous C-style array. 918 // contiguous C-style array.
856 // So, we look up the address and size of this array outside of the 919 // So, we look up the address and size of this array outside of the
857 // signal handler, where we can safely do so. 920 // signal handler, where we can safely do so.
858 trapArray_ = &(*traps_)[0]; 921 trap_array_ = &(*traps_)[0];
859 trapArraySize_ = id; 922 trap_array_size_ = id;
860 return traps_->back(); 923 return traps_->back();
861 } 924 }
862 925
863 return ErrorCode(fnc, aux, safe, id); 926 return ErrorCode(fnc, aux, safe, id);
864 } 927 }
865 928
866 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { 929 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
867 return MakeTrap(fnc, aux, true /* Safe Trap */); 930 return MakeTrap(fnc, aux, true /* Safe Trap */);
868 } 931 }
869 932
(...skipping 13 matching lines...) Expand all
883 946
884 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { 947 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
885 // TrapFnc functions report error by following the native kernel convention 948 // TrapFnc functions report error by following the native kernel convention
886 // of returning an exit code in the range of -1..-4096. They do not try to 949 // of returning an exit code in the range of -1..-4096. They do not try to
887 // set errno themselves. The glibc wrapper that triggered the SIGSYS will 950 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
888 // ultimately do so for us. 951 // ultimately do so for us.
889 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; 952 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
890 return -err; 953 return -err;
891 } 954 }
892 955
893 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { 956 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,
957 ErrorCode::Operation op, uint64_t value,
958 const ErrorCode& passed, const ErrorCode& failed) {
959 return ErrorCode(argno, width, op, value,
960 &*conds_.insert(passed).first,
961 &*conds_.insert(failed).first);
962 }
963
964 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {
894 SANDBOX_DIE(static_cast<char *>(aux)); 965 SANDBOX_DIE(static_cast<char *>(aux));
895 } 966 }
896 967
897 ErrorCode Sandbox::Kill(const char *msg) { 968 ErrorCode Sandbox::Kill(const char *msg) {
898 return Trap(bpfFailure, const_cast<char *>(msg)); 969 return Trap(BpfFailure, const_cast<char *>(msg));
899 } 970 }
900 971
901 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; 972 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
902 int Sandbox::proc_fd_ = -1; 973 int Sandbox::proc_fd_ = -1;
903 Sandbox::Evaluators Sandbox::evaluators_; 974 Sandbox::Evaluators Sandbox::evaluators_;
904 Sandbox::Traps *Sandbox::traps_ = NULL; 975 Sandbox::Traps *Sandbox::traps_ = NULL;
905 Sandbox::TrapIds Sandbox::trapIds_; 976 Sandbox::TrapIds Sandbox::trap_ids_;
906 ErrorCode *Sandbox::trapArray_ = NULL; 977 ErrorCode *Sandbox::trap_array_ = NULL;
907 size_t Sandbox::trapArraySize_ = 0; 978 size_t Sandbox::trap_array_size_ = 0;
908 bool Sandbox::has_unsafe_traps_ = false; 979 bool Sandbox::has_unsafe_traps_ = false;
980 Sandbox::Conds Sandbox::conds_;
909 981
910 } // namespace 982 } // namespace
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698