Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11419121: SECCOMP-BPF: Added support for greylisting of system calls. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: More unittest coverage Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <endian.h>
6 #if __BYTE_ORDER == __BIG_ENDIAN
7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit
8 // values that need to be inspected by a virtual machine that only ever
9 // operates on 32bit values. The kernel developers decided how values
10 // should be split into two 32bit words to achieve this goal. But at this
11 // time, there is no existing BPF implementation in the kernel that uses
12 // 64bit big endian values. So, all we have to go by is the consensus
13 // from a discussion on LKLM. Actual implementations, if and when they
14 // happen, might very well differ.
15 // If this code is ever going to be used with such a kernel, you should
16 // disable the "#error" and carefully test the code (e.g. run the unit
17 // tests). If things don't work, search for all occurrences of __BYTE_ORDER
18 // and verify that the proposed implementation agrees with what the kernel
19 // actually does.
20 #error Big endian operation is untested and expected to be broken
21 #endif
22
5 #include "sandbox/linux/seccomp-bpf/codegen.h" 23 #include "sandbox/linux/seccomp-bpf/codegen.h"
6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 24 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
25 #include "sandbox/linux/seccomp-bpf/syscall.h"
7 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" 26 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
8 #include "sandbox/linux/seccomp-bpf/verifier.h" 27 #include "sandbox/linux/seccomp-bpf/verifier.h"
9 28
10 namespace { 29 namespace {
11 30
12 void WriteFailedStderrSetupMessage(int out_fd) { 31 void WriteFailedStderrSetupMessage(int out_fd) {
13 const char* error_string = strerror(errno); 32 const char* error_string = strerror(errno);
14 static const char msg[] = "Failed to set up stderr: "; 33 static const char msg[] = "Failed to set up stderr: ";
15 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && 34 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&
16 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && 35 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
17 HANDLE_EINTR(write(out_fd, "\n", 1))) { 36 HANDLE_EINTR(write(out_fd, "\n", 1))) {
18 } 37 }
19 } 38 }
20 39
40 // We need to tell whether we are performing a "normal" callback, or
41 // whether we were called recursively from within a UnsafeTrap() callback.
42 // This is a little tricky to do, because we need to somehow get access to
43 // per-thread data from within a signal context. Normal TLS storage is not
44 // safely accessible at this time. We could roll our own, but that involves
45 // a lot of complexity. Instead, we co-opt one bit in the signal mask.
46 // If BUS is blocked, we assume that we have been called recursively.
47 // There is a possibility for collision with other code that needs to do
48 // this, but in practice the risks are low.
49 // If SIGBUS turns out to be a problem, we could instead co-opt one of the
50 // realtime signals. There are plenty of them. Unfortunately, there is no
51 // way to mark a signal as allocated. So, the potential for collision is
52 // possibly even worse.
53 bool GetIsInSigHandler(const ucontext_t *ctx) {
54 return sigismember(&ctx->uc_sigmask, SIGBUS);
55 }
56
57 void SetIsInSigHandler() {
58 sigset_t mask;
59 sigemptyset(&mask);
60 sigaddset(&mask, SIGBUS);
61 sigprocmask(SIG_BLOCK, &mask, NULL);
62 }
63
21 } // namespace 64 } // namespace
22 65
23 // The kernel gives us a sandbox, we turn it into a playground :-) 66 // The kernel gives us a sandbox, we turn it into a playground :-)
24 // This is version 2 of the playground; version 1 was built on top of 67 // This is version 2 of the playground; version 1 was built on top of
25 // pre-BPF seccomp mode. 68 // pre-BPF seccomp mode.
26 namespace playground2 { 69 namespace playground2 {
27 70
28 const int kExpectedExitCode = 100; 71 const int kExpectedExitCode = 100;
29 72
30 // We define a really simple sandbox policy. It is just good enough for us 73 // We define a really simple sandbox policy. It is just good enough for us
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 for (SyscallIterator iter(true); !iter.Done(); ) { 355 for (SyscallIterator iter(true); !iter.Done(); ) {
313 uint32_t sysnum = iter.Next(); 356 uint32_t sysnum = iter.Next();
314 if (!isDenied(syscallEvaluator(sysnum, aux))) { 357 if (!isDenied(syscallEvaluator(sysnum, aux))) {
315 SANDBOX_DIE("Policies should deny system calls that are outside the " 358 SANDBOX_DIE("Policies should deny system calls that are outside the "
316 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); 359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
317 } 360 }
318 } 361 }
319 return; 362 return;
320 } 363 }
321 364
365 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
366 if (BPF_CLASS(insn->code) == BPF_RET &&
367 insn->k > SECCOMP_RET_TRAP &&
368 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) {
369 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];
370 if (!err.safe_) {
371 bool *is_unsafe = static_cast<bool *>(aux);
372 *is_unsafe = true;
373 }
374 }
375 }
376
377 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) {
378 // When inside an UnsafeTrap() callback, we want to allow all system calls.
379 // This means, we must conditionally disable the sandbox -- and that's not
380 // something that kernel-side BPF filters can do, as they cannot inspect
381 // any state other than the syscall arguments.
382 // But if we redirect all error handlers to user-space, then we can easily
383 // make this decision.
384 // The performance penalty for this extra round-trip to user-space is not
385 // actually that bad, as we only ever pay it for denied system calls; and a
386 // typical program has very few of these.
387 if (BPF_CLASS(insn->code) == BPF_RET &&
388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
389 insn->k = Trap(ReturnErrno,
390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
391 }
392 }
393
394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
395 // We need to replicate the behavior of RedirectToUserspace(), so that our
396 // Verifier can still work correctly.
397 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
398 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
399 ErrorCode err = evaluator.first(sysnum, evaluator.second);
400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
401 return Trap(ReturnErrno,
402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
403 }
404 return err;
405 }
406
322 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { 407 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {
323 if (status_ == STATUS_ENABLED) { 408 if (status_ == STATUS_ENABLED) {
324 SANDBOX_DIE("Cannot change policy after sandbox has started"); 409 SANDBOX_DIE("Cannot change policy after sandbox has started");
325 } 410 }
326 policySanityChecks(syscallEvaluator, aux); 411 policySanityChecks(syscallEvaluator, aux);
327 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); 412 evaluators_.push_back(std::make_pair(syscallEvaluator, aux));
328 } 413 }
329 414
330 void Sandbox::installFilter(bool quiet) { 415 void Sandbox::installFilter(bool quiet) {
331 // Verify that the user pushed a policy. 416 // Verify that the user pushed a policy.
332 if (evaluators_.empty()) { 417 if (evaluators_.empty()) {
333 filter_failed: 418 filter_failed:
334 SANDBOX_DIE("Failed to configure system call filters"); 419 SANDBOX_DIE("Failed to configure system call filters");
335 } 420 }
336 421
337 // Set new SIGSYS handler 422 // Set new SIGSYS handler
338 struct sigaction sa; 423 struct sigaction sa;
339 memset(&sa, 0, sizeof(sa)); 424 memset(&sa, 0, sizeof(sa));
340 sa.sa_sigaction = &sigSys; 425 sa.sa_sigaction = sigSys;
341 sa.sa_flags = SA_SIGINFO; 426 sa.sa_flags = SA_SIGINFO | SA_NODEFER;
342 if (sigaction(SIGSYS, &sa, NULL) < 0) { 427 if (sigaction(SIGSYS, &sa, NULL) < 0) {
343 goto filter_failed; 428 goto filter_failed;
344 } 429 }
345 430
346 // Unmask SIGSYS 431 // Unmask SIGSYS
347 sigset_t mask; 432 sigset_t mask;
348 if (sigemptyset(&mask) || 433 if (sigemptyset(&mask) ||
349 sigaddset(&mask, SIGSYS) || 434 sigaddset(&mask, SIGSYS) ||
350 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { 435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
351 goto filter_failed; 436 goto filter_failed;
(...skipping 10 matching lines...) Expand all
362 if (!gen) { 447 if (!gen) {
363 SANDBOX_DIE("Out of memory"); 448 SANDBOX_DIE("Out of memory");
364 } 449 }
365 450
366 // If the architecture doesn't match SECCOMP_ARCH, disallow the 451 // If the architecture doesn't match SECCOMP_ARCH, disallow the
367 // system call. 452 // system call.
368 Instruction *tail; 453 Instruction *tail;
369 Instruction *head = 454 Instruction *head =
370 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
371 offsetof(struct arch_seccomp_data, arch), 456 offsetof(struct arch_seccomp_data, arch),
457 tail =
372 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
373 tail = 459 NULL,
374 // Grab the system call number, so that we can implement jump tables.
375 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
376 offsetof(struct arch_seccomp_data, nr)),
377 gen->MakeInstruction(BPF_RET+BPF_K, 460 gen->MakeInstruction(BPF_RET+BPF_K,
378 Kill( 461 Kill(
379 "Invalid audit architecture in BPF filter").err_))); 462 "Invalid audit architecture in BPF filter").err_)));
380 463
381 // On Intel architectures, verify that system call numbers are in the
382 // expected number range. The older i386 and x86-64 APIs clear bit 30
383 // on all system calls. The newer x32 API always sets bit 30.
384 #if defined(__i386__) || defined(__x86_64__)
385 Instruction *invalidX32 =
386 gen->MakeInstruction(BPF_RET+BPF_K,
387 Kill("Illegal mixing of system call ABIs").err_);
388 Instruction *checkX32 =
389 #if defined(__x86_64__) && defined(__ILP32__)
390 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
391 #else
392 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
393 #endif
394 gen->JoinInstructions(tail, checkX32);
395 tail = checkX32;
396 #endif
397
398
399 { 464 {
400 // Evaluate all possible system calls and group their ErrorCodes into 465 // Evaluate all possible system calls and group their ErrorCodes into
401 // ranges of identical codes. 466 // ranges of identical codes.
402 Ranges ranges; 467 Ranges ranges;
403 findRanges(&ranges); 468 findRanges(&ranges);
404 469
405 // Compile the system call ranges to an optimized BPF jumptable 470 // Compile the system call ranges to an optimized BPF jumptable
406 Instruction *jumptable = 471 Instruction *jumptable =
407 assembleJumpTable(gen, ranges.begin(), ranges.end()); 472 assembleJumpTable(gen, ranges.begin(), ranges.end());
408 473
474 // If there is at least one UnsafeTrap() in our program, the entire sandbox
475 // is unsafe. We need to modify the program so that all non-
476 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then
477 // allow us to temporarily disable sandboxing rules inside of callbacks to
478 // UnsafeTrap().
479 has_unsafe_traps_ = false;
480 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);
481
482 // Grab the system call number, so that we can implement jump tables.
483 Instruction *load_nr =
484 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
485 offsetof(struct arch_seccomp_data, nr));
486
487 // If our BPF program has unsafe jumps, enable support for them. This
488 // test happens very early in the BPF filter program. Even before we
489 // consider looking at system call numbers.
490 // As support for unsafe jumps essentially defeats all the security
491 // measures that the sandbox provides, we print a big warning message --
492 // and of course, we make sure to only ever enable this feature if it
493 // is actually requested by the sandbox policy.
494 if (has_unsafe_traps_) {
495 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {
496 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "
497 "architecture");
498 }
499
500 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
501 void *aux = evaluators_.begin()->second;
502 if (!evaluateSyscall(__NR_rt_sigprocmask, aux).
503 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) ||
504 !evaluateSyscall(__NR_rt_sigreturn, aux).
505 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
506 #if defined(__NR_sigprocmask)
507 || !evaluateSyscall(__NR_sigprocmask, aux).
508 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
509 #endif
510 #if defined(__NR_sigreturn)
511 || !evaluateSyscall(__NR_sigreturn, aux).
512 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
513 #endif
514 ) {
515 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "
516 "unconditionally allow sigreturn() and sigprocmask()");
517 }
518
519 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
520 gen->Traverse(jumptable, RedirectToUserspace, NULL);
521
522 // Allow system calls, if they originate from our magic return address
523 // (which we can query by calling SandboxSyscall(-1)).
524 uintptr_t syscall_entry_point =
525 static_cast<uintptr_t>(SandboxSyscall(-1));
526 uint32_t low = static_cast<uint32_t>(syscall_entry_point);
527 #if __SIZEOF_POINTER__ > 4
528 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
529 #endif
530
531 // BPF cannot do native 64bit comparisons. On 64bit architectures, we
532 // have to compare both 32bit halfs of the instruction pointer. If they
533 // match what we expect, we return ERR_ALLOWED. If either or both don't
534 // match, we continue evalutating the rest of the sandbox policy.
535 Instruction *escape_hatch =
536 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
537 offsetof(struct arch_seccomp_data,
538 instruction_pointer) +
539 (__SIZEOF_POINTER__ > 4 &&
540 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),
541 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,
542 #if __SIZEOF_POINTER__ > 4
543 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
544 offsetof(struct arch_seccomp_data,
545 instruction_pointer) +
546 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),
547 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,
548 #endif
549 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),
550 #if __SIZEOF_POINTER__ > 4
551 load_nr)),
552 #endif
553 load_nr));
554 gen->JoinInstructions(tail, escape_hatch);
555 } else {
556 gen->JoinInstructions(tail, load_nr);
557 }
558 tail = load_nr;
559
560 // On Intel architectures, verify that system call numbers are in the
561 // expected number range. The older i386 and x86-64 APIs clear bit 30
562 // on all system calls. The newer x32 API always sets bit 30.
563 #if defined(__i386__) || defined(__x86_64__)
564 Instruction *invalidX32 =
565 gen->MakeInstruction(BPF_RET+BPF_K,
566 Kill("Illegal mixing of system call ABIs").err_);
567 Instruction *checkX32 =
568 #if defined(__x86_64__) && defined(__ILP32__)
569 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
570 #else
571 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
572 #endif
573 gen->JoinInstructions(tail, checkX32);
574 tail = checkX32;
575 #endif
576
409 // Append jump table to our pre-amble 577 // Append jump table to our pre-amble
410 gen->JoinInstructions(tail, jumptable); 578 gen->JoinInstructions(tail, jumptable);
411 } 579 }
412 580
413 // Turn the DAG into a vector of instructions. 581 // Turn the DAG into a vector of instructions.
414 Program *program = new Program(); 582 Program *program = new Program();
415 gen->Compile(head, program); 583 gen->Compile(head, program);
416 delete gen; 584 delete gen;
417 585
418 // Make sure compilation resulted in BPF program that executes 586 // Make sure compilation resulted in BPF program that executes
419 // correctly. Otherwise, there is an internal error in our BPF compiler. 587 // correctly. Otherwise, there is an internal error in our BPF compiler.
420 // There is really nothing the caller can do until the bug is fixed. 588 // There is really nothing the caller can do until the bug is fixed.
421 #ifndef NDEBUG 589 #ifndef NDEBUG
422 const char *err = NULL; 590 {
423 if (!Verifier::VerifyBPF(*program, evaluators_, &err)) { 591 // If we previously rewrote the BPF program so that it calls user-space
424 SANDBOX_DIE(err); 592 // whenever we return an "errno" value from the filter, then we have to
593 // wrap our system call evaluator to perform the same operation. Otherwise,
594 // the verifier would also report a mismatch in return codes.
595 Evaluators redirected_evaluators;
596 redirected_evaluators.push_back(
597 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));
598
599 const char *err = NULL;
600 if (!Verifier::VerifyBPF(
601 *program,
602 has_unsafe_traps_ ? redirected_evaluators : evaluators_,
603 &err)) {
604 SANDBOX_DIE(err);
605 }
425 } 606 }
426 #endif 607 #endif
427 608
428 // We want to be very careful in not imposing any requirements on the 609 // We want to be very careful in not imposing any requirements on the
429 // policies that are set with setSandboxPolicy(). This means, as soon as 610 // policies that are set with setSandboxPolicy(). This means, as soon as
430 // the sandbox is active, we shouldn't be relying on libraries that could 611 // the sandbox is active, we shouldn't be relying on libraries that could
431 // be making system calls. This, for example, means we should avoid 612 // be making system calls. This, for example, means we should avoid
432 // using the heap and we should avoid using STL functions. 613 // using the heap and we should avoid using STL functions.
433 // Temporarily copy the contents of the "program" vector into a 614 // Temporarily copy the contents of the "program" vector into a
434 // stack-allocated array; and then explicitly destroy that object. 615 // stack-allocated array; and then explicitly destroy that object.
435 // This makes sure we don't ex- or implicitly call new/delete after we 616 // This makes sure we don't ex- or implicitly call new/delete after we
436 // installed the BPF filter program in the kernel. Depending on the 617 // installed the BPF filter program in the kernel. Depending on the
437 // system memory allocator that is in effect, these operators can result 618 // system memory allocator that is in effect, these operators can result
438 // in system calls to things like munmap() or brk(). 619 // in system calls to things like munmap() or brk().
439 struct sock_filter bpf[program->size()]; 620 struct sock_filter bpf[program->size()];
440 const struct sock_fprog prog = { 621 const struct sock_fprog prog = {
441 static_cast<unsigned short>(program->size()), bpf }; 622 static_cast<unsigned short>(program->size()), bpf };
442 memcpy(bpf, &(*program)[0], sizeof(bpf)); 623 memcpy(bpf, &(*program)[0], sizeof(bpf));
443 delete program; 624 delete program;
444 625
445 // Release memory that is no longer needed 626 // Release memory that is no longer needed
446 evaluators_.clear(); 627 evaluators_.clear();
447 errMap_.clear();
448 628
449 #if defined(SECCOMP_BPF_VALGRIND_HACKS) 629 #if defined(SECCOMP_BPF_VALGRIND_HACKS)
450 // Valgrind is really not happy about our sandbox. Disable it when running 630 // Valgrind is really not happy about our sandbox. Disable it when running
451 // in Valgrind. This feature is dangerous and should never be enabled by 631 // in Valgrind. This feature is dangerous and should never be enabled by
452 // default. We protect it behind a pre-processor option. 632 // default. We protect it behind a pre-processor option.
453 if (!RUNNING_ON_VALGRIND) 633 if (!RUNNING_ON_VALGRIND)
454 #endif 634 #endif
455 { 635 {
456 // Install BPF filter program 636 // Install BPF filter program
457 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 struct arch_sigsys sigsys; 734 struct arch_sigsys sigsys;
555 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); 735 memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
556 736
557 // Some more sanity checks. 737 // Some more sanity checks.
558 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) || 738 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) ||
559 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) || 739 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) ||
560 sigsys.arch != SECCOMP_ARCH) { 740 sigsys.arch != SECCOMP_ARCH) {
561 goto sigsys_err; 741 goto sigsys_err;
562 } 742 }
563 743
564 // Copy the seccomp-specific data into a arch_seccomp_data structure. This 744 intptr_t rc;
565 // is what we are showing to TrapFnc callbacks that the system call evaluator 745 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
566 // registered with the sandbox. 746 errno = old_errno;
567 struct arch_seccomp_data data = { 747 if (sigsys.nr == __NR_clone) {
568 sigsys.nr, 748 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");
569 SECCOMP_ARCH,
570 reinterpret_cast<uint64_t>(sigsys.ip),
571 {
572 static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
573 static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
574 static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
575 static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
576 static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
577 static_cast<uint64_t>(SECCOMP_PARM6(ctx))
578 } 749 }
579 }; 750 rc = SandboxSyscall(sigsys.nr,
751 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),
752 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),
753 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));
754 } else {
755 const ErrorCode& err = trapArray_[info->si_errno - 1];
756 if (!err.safe_) {
757 SetIsInSigHandler();
758 }
580 759
581 // Now call the TrapFnc callback associated with this particular instance 760 // Copy the seccomp-specific data into a arch_seccomp_data structure. This
582 // of SECCOMP_RET_TRAP. 761 // is what we are showing to TrapFnc callbacks that the system call
583 const ErrorCode& err = trapArray_[info->si_errno - 1]; 762 // evaluator registered with the sandbox.
584 intptr_t rc = err.fnc_(data, err.aux_); 763 struct arch_seccomp_data data = {
764 sigsys.nr,
765 SECCOMP_ARCH,
766 reinterpret_cast<uint64_t>(sigsys.ip),
767 {
768 static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
769 static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
770 static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
771 static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
772 static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
773 static_cast<uint64_t>(SECCOMP_PARM6(ctx))
774 }
775 };
776
777 // Now call the TrapFnc callback associated with this particular instance
778 // of SECCOMP_RET_TRAP.
779 rc = err.fnc_(data, err.aux_);
780 }
585 781
586 // Update the CPU register that stores the return code of the system call 782 // Update the CPU register that stores the return code of the system call
587 // that we just handled, and restore "errno" to the value that it had 783 // that we just handled, and restore "errno" to the value that it had
588 // before entering the signal handler. 784 // before entering the signal handler.
589 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); 785 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc);
590 errno = old_errno; 786 errno = old_errno;
591 787
592 return; 788 return;
593 } 789 }
594 790
595 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { 791 bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const {
792 if (fnc != o.fnc) {
793 return fnc < o.fnc;
794 } else if (aux != o.aux) {
795 return aux < o.aux;
796 } else {
797 return safe < o.safe;
798 }
799 }
800
801 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
802 bool safe) {
596 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance 803 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
597 // of a SECCOMP_RET_TRAP. 804 // of a SECCOMP_RET_TRAP.
598 std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux); 805 TrapKey key(fnc, aux, safe);
599 TrapIds::const_iterator iter = trapIds_.find(key); 806 TrapIds::const_iterator iter = trapIds_.find(key);
600 uint16_t id; 807 uint16_t id;
601 if (iter != trapIds_.end()) { 808 if (iter != trapIds_.end()) {
602 // We have seen this pair before. Return the same id that we assigned 809 // We have seen this pair before. Return the same id that we assigned
603 // earlier. 810 // earlier.
604 id = iter->second; 811 id = iter->second;
605 } else { 812 } else {
606 // This is a new pair. Remember it and assign a new id. 813 // This is a new pair. Remember it and assign a new id.
607 // Please note that we have to store traps in memory that doesn't get 814 // Please note that we have to store traps in memory that doesn't get
608 // deallocated when the program is shutting down. A memory leak is 815 // deallocated when the program is shutting down. A memory leak is
609 // intentional, because we might otherwise not be able to execute 816 // intentional, because we might otherwise not be able to execute
610 // system calls part way through the program shutting down 817 // system calls part way through the program shutting down
611 if (!traps_) { 818 if (!traps_) {
612 traps_ = new Traps(); 819 traps_ = new Traps();
613 } 820 }
614 if (traps_->size() >= SECCOMP_RET_DATA) { 821 if (traps_->size() >= SECCOMP_RET_DATA) {
615 // In practice, this is pretty much impossible to trigger, as there 822 // In practice, this is pretty much impossible to trigger, as there
616 // are other kernel limitations that restrict overall BPF program sizes. 823 // are other kernel limitations that restrict overall BPF program sizes.
617 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); 824 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
618 } 825 }
619 id = traps_->size() + 1; 826 id = traps_->size() + 1;
620 827
621 traps_->push_back(ErrorCode(fnc, aux, id)); 828 traps_->push_back(ErrorCode(fnc, aux, safe, id));
622 trapIds_[key] = id; 829 trapIds_[key] = id;
623 830
624 // We want to access the traps_ vector from our signal handler. But 831 // We want to access the traps_ vector from our signal handler. But
625 // we are not assured that doing so is async-signal safe. On the other 832 // we are not assured that doing so is async-signal safe. On the other
626 // hand, C++ guarantees that the contents of a vector is stored in a 833 // hand, C++ guarantees that the contents of a vector is stored in a
627 // contiguous C-style array. 834 // contiguous C-style array.
628 // So, we look up the address and size of this array outside of the 835 // So, we look up the address and size of this array outside of the
629 // signal handler, where we can safely do so. 836 // signal handler, where we can safely do so.
630 trapArray_ = &(*traps_)[0]; 837 trapArray_ = &(*traps_)[0];
631 trapArraySize_ = id; 838 trapArraySize_ = id;
839 return traps_->back();
632 } 840 }
633 841
634 ErrorCode err = ErrorCode(fnc, aux, id); 842 return ErrorCode(fnc, aux, safe, id);
635 return errMap_[err.err()] = err; 843 }
844
845 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
846 return MakeTrap(fnc, aux, true /* Safe Trap */);
847 }
848
849 ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) {
850 return MakeTrap(fnc, aux, false /* Unsafe Trap */);
851 }
852
853 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {
854 return SandboxSyscall(args.nr,
855 static_cast<intptr_t>(args.args[0]),
856 static_cast<intptr_t>(args.args[1]),
857 static_cast<intptr_t>(args.args[2]),
858 static_cast<intptr_t>(args.args[3]),
859 static_cast<intptr_t>(args.args[4]),
860 static_cast<intptr_t>(args.args[5]));
861 }
862
863 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
864 // TrapFnc functions report error by following the native kernel convention
865 // of returning an exit code in the range of -1..-4096. They do not try to
866 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
867 // ultimately do so for us.
868 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
869 return -err;
636 } 870 }
637 871
638 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { 872 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
639 SANDBOX_DIE(static_cast<char *>(aux)); 873 SANDBOX_DIE(static_cast<char *>(aux));
640 } 874 }
641 875
642 ErrorCode Sandbox::Kill(const char *msg) { 876 ErrorCode Sandbox::Kill(const char *msg) {
643 return Trap(bpfFailure, const_cast<char *>(msg)); 877 return Trap(bpfFailure, const_cast<char *>(msg));
644 } 878 }
645 879
646 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; 880 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
647 int Sandbox::proc_fd_ = -1; 881 int Sandbox::proc_fd_ = -1;
648 Sandbox::Evaluators Sandbox::evaluators_; 882 Sandbox::Evaluators Sandbox::evaluators_;
649 Sandbox::ErrMap Sandbox::errMap_;
650 Sandbox::Traps *Sandbox::traps_ = NULL; 883 Sandbox::Traps *Sandbox::traps_ = NULL;
651 Sandbox::TrapIds Sandbox::trapIds_; 884 Sandbox::TrapIds Sandbox::trapIds_;
652 ErrorCode *Sandbox::trapArray_ = NULL; 885 ErrorCode *Sandbox::trapArray_ = NULL;
653 size_t Sandbox::trapArraySize_ = 0; 886 size_t Sandbox::trapArraySize_ = 0;
887 bool Sandbox::has_unsafe_traps_ = false;
654 888
655 } // namespace 889 } // namespace
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698