| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 7 #include "sandbox/linux/seccomp-bpf/syscall.h" |
| 7 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" | 8 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" |
| 8 #include "sandbox/linux/seccomp-bpf/verifier.h" | 9 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 9 | 10 |
| 10 namespace { | 11 namespace { |
| 11 | 12 |
| 12 void WriteFailedStderrSetupMessage(int out_fd) { | 13 void WriteFailedStderrSetupMessage(int out_fd) { |
| 13 const char* error_string = strerror(errno); | 14 const char* error_string = strerror(errno); |
| 14 static const char msg[] = "Failed to set up stderr: "; | 15 static const char msg[] = "Failed to set up stderr: "; |
| 15 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && | 16 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && |
| 16 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && | 17 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 312 for (SyscallIterator iter(true); !iter.Done(); ) { | 313 for (SyscallIterator iter(true); !iter.Done(); ) { |
| 313 uint32_t sysnum = iter.Next(); | 314 uint32_t sysnum = iter.Next(); |
| 314 if (!isDenied(syscallEvaluator(sysnum, aux))) { | 315 if (!isDenied(syscallEvaluator(sysnum, aux))) { |
| 315 SANDBOX_DIE("Policies should deny system calls that are outside the " | 316 SANDBOX_DIE("Policies should deny system calls that are outside the " |
| 316 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); | 317 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); |
| 317 } | 318 } |
| 318 } | 319 } |
| 319 return; | 320 return; |
| 320 } | 321 } |
| 321 | 322 |
| 323 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { |
| 324 if (BPF_CLASS(insn->code) == BPF_RET && |
| 325 insn->k > SECCOMP_RET_TRAP && |
| 326 insn->k <= SECCOMP_RET_TRAP+trapArraySize_) { |
| 327 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; |
| 328 if (!err.safe_) { |
| 329 bool *is_unsafe = static_cast<bool *>(aux); |
| 330 *is_unsafe = true; |
| 331 } |
| 332 } |
| 333 } |
| 334 |
| 335 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { |
| 336 // When inside an UnsafeTrap() callback, we want to allow all system calls. |
| 337 // This means, we must conditionally disable the sandbox -- and that's not |
| 338 // something that kernel-side BPF filters can do, as they cannot inspect |
| 339 // any state other than the syscall arguments. |
| 340 // But if we redirect all error handlers to user-space, then we can easily |
| 341 // make this decision. |
| 342 // The performance penalty for this extra round-trip to user-space is not |
| 343 // actually that bad, as we only ever pay it for denied system calls; and a |
| 344 // typical program has very few of these. |
| 345 if (BPF_CLASS(insn->code) == BPF_RET && |
| 346 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
| 347 insn->k = Trap(ReturnErrno, |
| 348 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); |
| 349 } |
| 350 } |
| 351 |
| 352 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { |
| 353 // We need to replicate the behavior of RedirectToUserspace(), so that our |
| 354 // Verifier can still work correctly. |
| 355 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); |
| 356 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); |
| 357 ErrorCode err = evaluator.first(sysnum, evaluator.second); |
| 358 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
| 359 return Trap(ReturnErrno, |
| 360 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); |
| 361 } |
| 362 return err; |
| 363 } |
| 364 |
| 322 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { | 365 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { |
| 323 if (status_ == STATUS_ENABLED) { | 366 if (status_ == STATUS_ENABLED) { |
| 324 SANDBOX_DIE("Cannot change policy after sandbox has started"); | 367 SANDBOX_DIE("Cannot change policy after sandbox has started"); |
| 325 } | 368 } |
| 326 policySanityChecks(syscallEvaluator, aux); | 369 policySanityChecks(syscallEvaluator, aux); |
| 327 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); | 370 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); |
| 328 } | 371 } |
| 329 | 372 |
| 330 void Sandbox::installFilter(bool quiet) { | 373 void Sandbox::installFilter(bool quiet) { |
| 331 // Verify that the user pushed a policy. | 374 // Verify that the user pushed a policy. |
| 332 if (evaluators_.empty()) { | 375 if (evaluators_.empty()) { |
| 333 filter_failed: | 376 filter_failed: |
| 334 SANDBOX_DIE("Failed to configure system call filters"); | 377 SANDBOX_DIE("Failed to configure system call filters"); |
| 335 } | 378 } |
| 336 | 379 |
| 337 // Set new SIGSYS handler | 380 // Set new SIGSYS handler |
| 338 struct sigaction sa; | 381 struct sigaction sa; |
| 339 memset(&sa, 0, sizeof(sa)); | 382 memset(&sa, 0, sizeof(sa)); |
| 340 sa.sa_sigaction = &sigSys; | 383 sa.sa_sigaction = sigSys; |
| 341 sa.sa_flags = SA_SIGINFO; | 384 sa.sa_flags = SA_SIGINFO | SA_NODEFER; |
| 342 if (sigaction(SIGSYS, &sa, NULL) < 0) { | 385 if (sigaction(SIGSYS, &sa, NULL) < 0) { |
| 343 goto filter_failed; | 386 goto filter_failed; |
| 344 } | 387 } |
| 345 | 388 |
| 346 // Unmask SIGSYS | 389 // Unmask SIGSYS |
| 347 sigset_t mask; | 390 sigset_t mask; |
| 348 if (sigemptyset(&mask) || | 391 if (sigemptyset(&mask) || |
| 349 sigaddset(&mask, SIGSYS) || | 392 sigaddset(&mask, SIGSYS) || |
| 350 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | 393 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { |
| 351 goto filter_failed; | 394 goto filter_failed; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 362 if (!gen) { | 405 if (!gen) { |
| 363 SANDBOX_DIE("Out of memory"); | 406 SANDBOX_DIE("Out of memory"); |
| 364 } | 407 } |
| 365 | 408 |
| 366 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 409 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
| 367 // system call. | 410 // system call. |
| 368 Instruction *tail; | 411 Instruction *tail; |
| 369 Instruction *head = | 412 Instruction *head = |
| 370 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 413 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 371 offsetof(struct arch_seccomp_data, arch), | 414 offsetof(struct arch_seccomp_data, arch), |
| 415 tail = |
| 372 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, | 416 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
| 373 tail = | 417 NULL, |
| 374 // Grab the system call number, so that we can implement jump tables. | |
| 375 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
| 376 offsetof(struct arch_seccomp_data, nr)), | |
| 377 gen->MakeInstruction(BPF_RET+BPF_K, | 418 gen->MakeInstruction(BPF_RET+BPF_K, |
| 378 Kill( | 419 Kill( |
| 379 "Invalid audit architecture in BPF filter").err_))); | 420 "Invalid audit architecture in BPF filter").err_))); |
| 380 | 421 |
| 381 // On Intel architectures, verify that system call numbers are in the | |
| 382 // expected number range. The older i386 and x86-64 APIs clear bit 30 | |
| 383 // on all system calls. The newer x32 API always sets bit 30. | |
| 384 #if defined(__i386__) || defined(__x86_64__) | |
| 385 Instruction *invalidX32 = | |
| 386 gen->MakeInstruction(BPF_RET+BPF_K, | |
| 387 Kill("Illegal mixing of system call ABIs").err_); | |
| 388 Instruction *checkX32 = | |
| 389 #if defined(__x86_64__) && defined(__ILP32__) | |
| 390 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); | |
| 391 #else | |
| 392 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); | |
| 393 #endif | |
| 394 gen->JoinInstructions(tail, checkX32); | |
| 395 tail = checkX32; | |
| 396 #endif | |
| 397 | |
| 398 | |
| 399 { | 422 { |
| 400 // Evaluate all possible system calls and group their ErrorCodes into | 423 // Evaluate all possible system calls and group their ErrorCodes into |
| 401 // ranges of identical codes. | 424 // ranges of identical codes. |
| 402 Ranges ranges; | 425 Ranges ranges; |
| 403 findRanges(&ranges); | 426 findRanges(&ranges); |
| 404 | 427 |
| 405 // Compile the system call ranges to an optimized BPF jumptable | 428 // Compile the system call ranges to an optimized BPF jumptable |
| 406 Instruction *jumptable = | 429 Instruction *jumptable = |
| 407 assembleJumpTable(gen, ranges.begin(), ranges.end()); | 430 assembleJumpTable(gen, ranges.begin(), ranges.end()); |
| 408 | 431 |
| 432 // If there is at least one UnsafeTrap() in our program, the entire sandbox |
| 433 // is unsafe. We need to modify the program so that all non- |
| 434 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then |
| 435 // allow us to temporarily disable sandboxing rules inside of callbacks to |
| 436 // UnsafeTrap(). |
| 437 has_unsafe_traps_ = false; |
| 438 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); |
| 439 |
| 440 // Grab the system call number, so that we can implement jump tables. |
| 441 Instruction *load_nr = |
| 442 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 443 offsetof(struct arch_seccomp_data, nr)); |
| 444 |
| 445 // If our BPF program has unsafe jumps, enable support for them. This |
| 446 // test happens very early in the BPF filter program. Even before we |
| 447 // consider looking at system call numbers. |
| 448 // As support for unsafe jumps essentially defeats all the security |
| 449 // measures that the sandbox provides, we print a big warning message -- |
| 450 // and of course, we make sure to only ever enable this feature if it |
| 451 // is actually requested by the sandbox policy. |
| 452 if (has_unsafe_traps_) { |
| 453 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| 454 void *aux = evaluators_.begin()->second; |
| 455 if (!evaluateSyscall(__NR_rt_sigprocmask, aux). |
| 456 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || |
| 457 !evaluateSyscall(__NR_rt_sigreturn, aux). |
| 458 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| 459 #if defined(__NR_sigprocmask) |
| 460 || !evaluateSyscall(__NR_sigprocmask, aux). |
| 461 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| 462 #endif |
| 463 #if defined(__NR_sigreturn) |
| 464 || !evaluateSyscall(__NR_sigreturn, aux). |
| 465 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) |
| 466 #endif |
| 467 ) { |
| 468 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " |
| 469 "unconditionally allow sigreturn() and sigprocmask()"); |
| 470 } |
| 471 |
| 472 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); |
| 473 gen->Traverse(jumptable, RedirectToUserspace, NULL); |
| 474 |
| 475 // Allow system calls, if they originate from our magic return address |
| 476 // (which we can query by calling Syscall(-1)). |
| 477 uintptr_t syscall_entry_point = static_cast<uintptr_t>(Syscall(-1)); |
| 478 uint32_t low = static_cast<uint32_t>(syscall_entry_point); |
| 479 #if __SIZEOF_POINTER__ > 4 |
| 480 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); |
| 481 #endif |
| 482 |
| 483 // BPF cannot do native 64bit comparisons. On 64bit architectures, we |
| 484 // have to compare both 32bit halfs of the instruction pointer. If they |
| 485 // match what we expect, we return ERR_ALLOWED. If either or both don't |
| 486 // match, we continue evalutating the rest of the sandbox policy. |
| 487 Instruction *escape_hatch = |
| 488 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 489 offsetof(struct arch_seccomp_data, |
| 490 instruction_pointer) + |
| 491 (__SIZEOF_POINTER__ > 4 && |
| 492 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0), |
| 493 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, |
| 494 #if __SIZEOF_POINTER__ > 4 |
| 495 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 496 offsetof(struct arch_seccomp_data, |
| 497 instruction_pointer) + |
| 498 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4), |
| 499 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, |
| 500 #endif |
| 501 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), |
| 502 #if __SIZEOF_POINTER__ > 4 |
| 503 load_nr)), |
| 504 #endif |
| 505 load_nr)); |
| 506 gen->JoinInstructions(tail, escape_hatch); |
| 507 } else { |
| 508 gen->JoinInstructions(tail, load_nr); |
| 509 } |
| 510 tail = load_nr; |
| 511 |
| 512 // On Intel architectures, verify that system call numbers are in the |
| 513 // expected number range. The older i386 and x86-64 APIs clear bit 30 |
| 514 // on all system calls. The newer x32 API always sets bit 30. |
| 515 #if defined(__i386__) || defined(__x86_64__) |
| 516 Instruction *invalidX32 = |
| 517 gen->MakeInstruction(BPF_RET+BPF_K, |
| 518 Kill("Illegal mixing of system call ABIs").err_); |
| 519 Instruction *checkX32 = |
| 520 #if defined(__x86_64__) && defined(__ILP32__) |
| 521 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); |
| 522 #else |
| 523 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); |
| 524 #endif |
| 525 gen->JoinInstructions(tail, checkX32); |
| 526 tail = checkX32; |
| 527 #endif |
| 528 |
| 409 // Append jump table to our pre-amble | 529 // Append jump table to our pre-amble |
| 410 gen->JoinInstructions(tail, jumptable); | 530 gen->JoinInstructions(tail, jumptable); |
| 411 } | 531 } |
| 412 | 532 |
| 413 // Turn the DAG into a vector of instructions. | 533 // Turn the DAG into a vector of instructions. |
| 414 Program *program = new Program(); | 534 Program *program = new Program(); |
| 415 gen->Compile(head, program); | 535 gen->Compile(head, program); |
| 416 delete gen; | 536 delete gen; |
| 417 | 537 |
| 418 // Make sure compilation resulted in BPF program that executes | 538 // Make sure compilation resulted in BPF program that executes |
| 419 // correctly. Otherwise, there is an internal error in our BPF compiler. | 539 // correctly. Otherwise, there is an internal error in our BPF compiler. |
| 420 // There is really nothing the caller can do until the bug is fixed. | 540 // There is really nothing the caller can do until the bug is fixed. |
| 421 #ifndef NDEBUG | 541 #ifndef NDEBUG |
| 422 const char *err = NULL; | 542 { |
| 423 if (!Verifier::VerifyBPF(*program, evaluators_, &err)) { | 543 // If we previously rewrote the BPF program so that it calls user-space |
| 424 SANDBOX_DIE(err); | 544 // whenever we return an "errno" value from the filter, then we have to |
| 545 // wrap our system call evaluator to perform the same operation. Otherwise, |
| 546 // the verifier would also report a mismatch in return codes. |
| 547 Evaluators redirected_evaluators; |
| 548 redirected_evaluators.push_back( |
| 549 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); |
| 550 |
| 551 const char *err = NULL; |
| 552 if (!Verifier::VerifyBPF(*program, |
| 553 has_unsafe_traps_ ? redirected_evaluators : evaluators_, &err)) { |
| 554 SANDBOX_DIE(err); |
| 555 } |
| 425 } | 556 } |
| 426 #endif | 557 #endif |
| 427 | 558 |
| 428 // We want to be very careful in not imposing any requirements on the | 559 // We want to be very careful in not imposing any requirements on the |
| 429 // policies that are set with setSandboxPolicy(). This means, as soon as | 560 // policies that are set with setSandboxPolicy(). This means, as soon as |
| 430 // the sandbox is active, we shouldn't be relying on libraries that could | 561 // the sandbox is active, we shouldn't be relying on libraries that could |
| 431 // be making system calls. This, for example, means we should avoid | 562 // be making system calls. This, for example, means we should avoid |
| 432 // using the heap and we should avoid using STL functions. | 563 // using the heap and we should avoid using STL functions. |
| 433 // Temporarily copy the contents of the "program" vector into a | 564 // Temporarily copy the contents of the "program" vector into a |
| 434 // stack-allocated array; and then explicitly destroy that object. | 565 // stack-allocated array; and then explicitly destroy that object. |
| 435 // This makes sure we don't ex- or implicitly call new/delete after we | 566 // This makes sure we don't ex- or implicitly call new/delete after we |
| 436 // installed the BPF filter program in the kernel. Depending on the | 567 // installed the BPF filter program in the kernel. Depending on the |
| 437 // system memory allocator that is in effect, these operators can result | 568 // system memory allocator that is in effect, these operators can result |
| 438 // in system calls to things like munmap() or brk(). | 569 // in system calls to things like munmap() or brk(). |
| 439 struct sock_filter bpf[program->size()]; | 570 struct sock_filter bpf[program->size()]; |
| 440 const struct sock_fprog prog = { | 571 const struct sock_fprog prog = { |
| 441 static_cast<unsigned short>(program->size()), bpf }; | 572 static_cast<unsigned short>(program->size()), bpf }; |
| 442 memcpy(bpf, &(*program)[0], sizeof(bpf)); | 573 memcpy(bpf, &(*program)[0], sizeof(bpf)); |
| 443 delete program; | 574 delete program; |
| 444 | 575 |
| 445 // Release memory that is no longer needed | 576 // Release memory that is no longer needed |
| 446 evaluators_.clear(); | 577 evaluators_.clear(); |
| 447 errMap_.clear(); | |
| 448 | 578 |
| 449 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 579 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
| 450 // Valgrind is really not happy about our sandbox. Disable it when running | 580 // Valgrind is really not happy about our sandbox. Disable it when running |
| 451 // in Valgrind. This feature is dangerous and should never be enabled by | 581 // in Valgrind. This feature is dangerous and should never be enabled by |
| 452 // default. We protect it behind a pre-processor option. | 582 // default. We protect it behind a pre-processor option. |
| 453 if (!RUNNING_ON_VALGRIND) | 583 if (!RUNNING_ON_VALGRIND) |
| 454 #endif | 584 #endif |
| 455 { | 585 { |
| 456 // Install BPF filter program | 586 // Install BPF filter program |
| 457 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | 587 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 554 struct arch_sigsys sigsys; | 684 struct arch_sigsys sigsys; |
| 555 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); | 685 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); |
| 556 | 686 |
| 557 // Some more sanity checks. | 687 // Some more sanity checks. |
| 558 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) || | 688 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) || |
| 559 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) || | 689 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) || |
| 560 sigsys.arch != SECCOMP_ARCH) { | 690 sigsys.arch != SECCOMP_ARCH) { |
| 561 goto sigsys_err; | 691 goto sigsys_err; |
| 562 } | 692 } |
| 563 | 693 |
| 564 // Copy the seccomp-specific data into a arch_seccomp_data structure. This | 694 // We need to tell whether we are performing a "normal" callback, or |
| 565 // is what we are showing to TrapFnc callbacks that the system call evaluator | 695 // whether we were called recursively from within a UnsafeTrap() callback. |
| 566 // registered with the sandbox. | 696 // This is a little tricky to do, because we need to somehow get access to |
| 567 struct arch_seccomp_data data = { | 697 // per-thread data from within a signal context. Normal TLS storage is not |
| 568 sigsys.nr, | 698 // safely accessible at this time. We could roll our own, but that involves |
| 569 SECCOMP_ARCH, | 699 // a lot of complexity. Instead, we co-opt one bit in the signal mask. |
| 570 reinterpret_cast<uint64_t>(sigsys.ip), | 700 // If BUS is blocked, we assume that we have been called recursively. |
| 571 { | 701 // There is a possibility for collision with other code that needs to do |
| 572 static_cast<uint64_t>(SECCOMP_PARM1(ctx)), | 702 // this, but in practice the risks are low. |
| 573 static_cast<uint64_t>(SECCOMP_PARM2(ctx)), | 703 intptr_t rc; |
| 574 static_cast<uint64_t>(SECCOMP_PARM3(ctx)), | 704 if (has_unsafe_traps_ && |
| 575 static_cast<uint64_t>(SECCOMP_PARM4(ctx)), | 705 sigismember(&ctx->uc_sigmask, SIGBUS)) { |
| 576 static_cast<uint64_t>(SECCOMP_PARM5(ctx)), | 706 errno = old_errno; |
| 577 static_cast<uint64_t>(SECCOMP_PARM6(ctx)) | 707 rc = Syscall(sigsys.nr, |
| 708 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), |
| 709 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), |
| 710 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); |
| 711 } else { |
| 712 const ErrorCode& err = trapArray_[info->si_errno - 1]; |
| 713 if (!err.safe_) { |
| 714 sigset_t mask; |
| 715 sigemptyset(&mask); |
| 716 sigaddset(&mask, SIGBUS); |
| 717 sigprocmask(SIG_BLOCK, &mask, NULL); |
| 578 } | 718 } |
| 579 }; | |
| 580 | 719 |
| 581 // Now call the TrapFnc callback associated with this particular instance | 720 // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
| 582 // of SECCOMP_RET_TRAP. | 721 // is what we are showing to TrapFnc callbacks that the system call |
| 583 const ErrorCode& err = trapArray_[info->si_errno - 1]; | 722 // evaluator registered with the sandbox. |
| 584 intptr_t rc = err.fnc_(data, err.aux_); | 723 struct arch_seccomp_data data = { |
| 724 sigsys.nr, |
| 725 SECCOMP_ARCH, |
| 726 reinterpret_cast<uint64_t>(sigsys.ip), |
| 727 { |
| 728 static_cast<uint64_t>(SECCOMP_PARM1(ctx)), |
| 729 static_cast<uint64_t>(SECCOMP_PARM2(ctx)), |
| 730 static_cast<uint64_t>(SECCOMP_PARM3(ctx)), |
| 731 static_cast<uint64_t>(SECCOMP_PARM4(ctx)), |
| 732 static_cast<uint64_t>(SECCOMP_PARM5(ctx)), |
| 733 static_cast<uint64_t>(SECCOMP_PARM6(ctx)) |
| 734 } |
| 735 }; |
| 736 |
| 737 // Now call the TrapFnc callback associated with this particular instance |
| 738 // of SECCOMP_RET_TRAP. |
| 739 rc = err.fnc_(data, err.aux_); |
| 740 } |
| 585 | 741 |
| 586 // Update the CPU register that stores the return code of the system call | 742 // Update the CPU register that stores the return code of the system call |
| 587 // that we just handled, and restore "errno" to the value that it had | 743 // that we just handled, and restore "errno" to the value that it had |
| 588 // before entering the signal handler. | 744 // before entering the signal handler. |
| 589 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); | 745 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); |
| 590 errno = old_errno; | 746 errno = old_errno; |
| 591 | 747 |
| 592 return; | 748 return; |
| 593 } | 749 } |
| 594 | 750 |
| 595 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { | 751 bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const { |
| 752 if (fnc != o.fnc) { |
| 753 return fnc < o.fnc; |
| 754 } else if (aux != o.aux) { |
| 755 return aux < o.aux; |
| 756 } else { |
| 757 return safe < o.safe; |
| 758 } |
| 759 } |
| 760 |
| 761 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, |
| 762 bool safe) { |
| 596 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance | 763 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
| 597 // of a SECCOMP_RET_TRAP. | 764 // of a SECCOMP_RET_TRAP. |
| 598 std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux); | 765 TrapKey key(fnc, aux, safe); |
| 599 TrapIds::const_iterator iter = trapIds_.find(key); | 766 TrapIds::const_iterator iter = trapIds_.find(key); |
| 600 uint16_t id; | 767 uint16_t id; |
| 601 if (iter != trapIds_.end()) { | 768 if (iter != trapIds_.end()) { |
| 602 // We have seen this pair before. Return the same id that we assigned | 769 // We have seen this pair before. Return the same id that we assigned |
| 603 // earlier. | 770 // earlier. |
| 604 id = iter->second; | 771 id = iter->second; |
| 605 } else { | 772 } else { |
| 606 // This is a new pair. Remember it and assign a new id. | 773 // This is a new pair. Remember it and assign a new id. |
| 607 // Please note that we have to store traps in memory that doesn't get | 774 // Please note that we have to store traps in memory that doesn't get |
| 608 // deallocated when the program is shutting down. A memory leak is | 775 // deallocated when the program is shutting down. A memory leak is |
| 609 // intentional, because we might otherwise not be able to execute | 776 // intentional, because we might otherwise not be able to execute |
| 610 // system calls part way through the program shutting down | 777 // system calls part way through the program shutting down |
| 611 if (!traps_) { | 778 if (!traps_) { |
| 612 traps_ = new Traps(); | 779 traps_ = new Traps(); |
| 613 } | 780 } |
| 614 if (traps_->size() >= SECCOMP_RET_DATA) { | 781 if (traps_->size() >= SECCOMP_RET_DATA) { |
| 615 // In practice, this is pretty much impossible to trigger, as there | 782 // In practice, this is pretty much impossible to trigger, as there |
| 616 // are other kernel limitations that restrict overall BPF program sizes. | 783 // are other kernel limitations that restrict overall BPF program sizes. |
| 617 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); | 784 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); |
| 618 } | 785 } |
| 619 id = traps_->size() + 1; | 786 id = traps_->size() + 1; |
| 620 | 787 |
| 621 traps_->push_back(ErrorCode(fnc, aux, id)); | 788 traps_->push_back(ErrorCode(fnc, aux, safe, id)); |
| 622 trapIds_[key] = id; | 789 trapIds_[key] = id; |
| 623 | 790 |
| 624 // We want to access the traps_ vector from our signal handler. But | 791 // We want to access the traps_ vector from our signal handler. But |
| 625 // we are not assured that doing so is async-signal safe. On the other | 792 // we are not assured that doing so is async-signal safe. On the other |
| 626 // hand, C++ guarantees that the contents of a vector is stored in a | 793 // hand, C++ guarantees that the contents of a vector is stored in a |
| 627 // contiguous C-style array. | 794 // contiguous C-style array. |
| 628 // So, we look up the address and size of this array outside of the | 795 // So, we look up the address and size of this array outside of the |
| 629 // signal handler, where we can safely do so. | 796 // signal handler, where we can safely do so. |
| 630 trapArray_ = &(*traps_)[0]; | 797 trapArray_ = &(*traps_)[0]; |
| 631 trapArraySize_ = id; | 798 trapArraySize_ = id; |
| 799 return traps_->back(); |
| 632 } | 800 } |
| 633 | 801 |
| 634 ErrorCode err = ErrorCode(fnc, aux, id); | 802 return ErrorCode(fnc, aux, safe, id); |
| 635 return errMap_[err.err()] = err; | 803 } |
| 804 |
| 805 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
| 806 return MakeTrap(fnc, aux, true); |
| 807 } |
| 808 |
| 809 ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) { |
| 810 return MakeTrap(fnc, aux, false); |
| 811 } |
| 812 |
| 813 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { |
| 814 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; |
| 815 return -err; |
| 636 } | 816 } |
| 637 | 817 |
| 638 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { | 818 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { |
| 639 SANDBOX_DIE(static_cast<char *>(aux)); | 819 SANDBOX_DIE(static_cast<char *>(aux)); |
| 640 } | 820 } |
| 641 | 821 |
| 642 ErrorCode Sandbox::Kill(const char *msg) { | 822 ErrorCode Sandbox::Kill(const char *msg) { |
| 643 return Trap(bpfFailure, const_cast<char *>(msg)); | 823 return Trap(bpfFailure, const_cast<char *>(msg)); |
| 644 } | 824 } |
| 645 | 825 |
| 646 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 826 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 647 int Sandbox::proc_fd_ = -1; | 827 int Sandbox::proc_fd_ = -1; |
| 648 Sandbox::Evaluators Sandbox::evaluators_; | 828 Sandbox::Evaluators Sandbox::evaluators_; |
| 649 Sandbox::ErrMap Sandbox::errMap_; | |
| 650 Sandbox::Traps *Sandbox::traps_ = NULL; | 829 Sandbox::Traps *Sandbox::traps_ = NULL; |
| 651 Sandbox::TrapIds Sandbox::trapIds_; | 830 Sandbox::TrapIds Sandbox::trapIds_; |
| 652 ErrorCode *Sandbox::trapArray_ = NULL; | 831 ErrorCode *Sandbox::trapArray_ = NULL; |
| 653 size_t Sandbox::trapArraySize_ = 0; | 832 size_t Sandbox::trapArraySize_ = 0; |
| 833 bool Sandbox::has_unsafe_traps_ = false; |
| 654 | 834 |
| 655 } // namespace | 835 } // namespace |
| OLD | NEW |