Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 7 #include "sandbox/linux/seccomp-bpf/syscall.h" | |
| 7 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" | 8 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" |
| 8 #include "sandbox/linux/seccomp-bpf/verifier.h" | 9 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 9 | 10 |
| 10 namespace { | 11 namespace { |
| 11 | 12 |
| 12 void WriteFailedStderrSetupMessage(int out_fd) { | 13 void WriteFailedStderrSetupMessage(int out_fd) { |
| 13 const char* error_string = strerror(errno); | 14 const char* error_string = strerror(errno); |
| 14 static const char msg[] = "Failed to set up stderr: "; | 15 static const char msg[] = "Failed to set up stderr: "; |
| 15 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && | 16 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && |
| 16 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && | 17 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 312 for (SyscallIterator iter(true); !iter.Done(); ) { | 313 for (SyscallIterator iter(true); !iter.Done(); ) { |
| 313 uint32_t sysnum = iter.Next(); | 314 uint32_t sysnum = iter.Next(); |
| 314 if (!isDenied(syscallEvaluator(sysnum, aux))) { | 315 if (!isDenied(syscallEvaluator(sysnum, aux))) { |
| 315 SANDBOX_DIE("Policies should deny system calls that are outside the " | 316 SANDBOX_DIE("Policies should deny system calls that are outside the " |
| 316 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); | 317 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); |
| 317 } | 318 } |
| 318 } | 319 } |
| 319 return; | 320 return; |
| 320 } | 321 } |
| 321 | 322 |
| 323 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { | |
| 324 if (BPF_CLASS(insn->code) == BPF_RET && | |
| 325 insn->k > SECCOMP_RET_TRAP && | |
| 326 insn->k <= SECCOMP_RET_TRAP+trapArraySize_) { | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Better to do insn->k - SECCOMP_RET_TRAP <= trapArr
| |
| 327 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; | |
| 328 if (!err.safe_) { | |
| 329 bool *is_unsafe = static_cast<bool *>(aux); | |
| 330 *is_unsafe = true; | |
| 331 } | |
| 332 } | |
| 333 } | |
| 334 | |
| 335 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { | |
| 336 // When inside an UnsafeTrap() callback, we want to allow all system calls. | |
| 337 // This means, we must conditionally disable the sandbox -- and that's not | |
| 338 // something that kernel-side BPF filters can do, as they cannot inspect | |
| 339 // any state other than the syscall arguments. | |
| 340 // But if we redirect all error handlers to user-space, then we can easily | |
| 341 // make this decision. | |
| 342 // The performance penalty for this extra round-trip to user-space is not | |
| 343 // actually that bad, as we only ever pay it for denied system calls; and a | |
| 344 // typical program has very few of these. | |
| 345 if (BPF_CLASS(insn->code) == BPF_RET && | |
| 346 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | |
| 347 insn->k = Trap(ReturnErrno, | |
| 348 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); | |
| 349 } | |
| 350 } | |
| 351 | |
| 352 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { | |
| 353 // We need to replicate the behavior of RedirectToUserspace(), so that our | |
| 354 // Verifier can still work correctly. | |
| 355 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); | |
| 356 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); | |
| 357 ErrorCode err = evaluator.first(sysnum, evaluator.second); | |
| 358 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | |
| 359 return Trap(ReturnErrno, | |
| 360 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); | |
| 361 } | |
| 362 return err; | |
| 363 } | |
| 364 | |
| 322 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { | 365 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { |
| 323 if (status_ == STATUS_ENABLED) { | 366 if (status_ == STATUS_ENABLED) { |
| 324 SANDBOX_DIE("Cannot change policy after sandbox has started"); | 367 SANDBOX_DIE("Cannot change policy after sandbox has started"); |
| 325 } | 368 } |
| 326 policySanityChecks(syscallEvaluator, aux); | 369 policySanityChecks(syscallEvaluator, aux); |
| 327 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); | 370 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); |
| 328 } | 371 } |
| 329 | 372 |
| 330 void Sandbox::installFilter(bool quiet) { | 373 void Sandbox::installFilter(bool quiet) { |
| 331 // Verify that the user pushed a policy. | 374 // Verify that the user pushed a policy. |
| 332 if (evaluators_.empty()) { | 375 if (evaluators_.empty()) { |
| 333 filter_failed: | 376 filter_failed: |
| 334 SANDBOX_DIE("Failed to configure system call filters"); | 377 SANDBOX_DIE("Failed to configure system call filters"); |
| 335 } | 378 } |
| 336 | 379 |
| 337 // Set new SIGSYS handler | 380 // Set new SIGSYS handler |
| 338 struct sigaction sa; | 381 struct sigaction sa; |
| 339 memset(&sa, 0, sizeof(sa)); | 382 memset(&sa, 0, sizeof(sa)); |
| 340 sa.sa_sigaction = &sigSys; | 383 sa.sa_sigaction = sigSys; |
| 341 sa.sa_flags = SA_SIGINFO; | 384 sa.sa_flags = SA_SIGINFO | SA_NODEFER; |
| 342 if (sigaction(SIGSYS, &sa, NULL) < 0) { | 385 if (sigaction(SIGSYS, &sa, NULL) < 0) { |
| 343 goto filter_failed; | 386 goto filter_failed; |
| 344 } | 387 } |
| 345 | 388 |
| 346 // Unmask SIGSYS | 389 // Unmask SIGSYS |
| 347 sigset_t mask; | 390 sigset_t mask; |
| 348 if (sigemptyset(&mask) || | 391 if (sigemptyset(&mask) || |
| 349 sigaddset(&mask, SIGSYS) || | 392 sigaddset(&mask, SIGSYS) || |
| 350 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | 393 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { |
| 351 goto filter_failed; | 394 goto filter_failed; |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 362 if (!gen) { | 405 if (!gen) { |
| 363 SANDBOX_DIE("Out of memory"); | 406 SANDBOX_DIE("Out of memory"); |
| 364 } | 407 } |
| 365 | 408 |
| 366 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 409 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
| 367 // system call. | 410 // system call. |
| 368 Instruction *tail; | 411 Instruction *tail; |
| 369 Instruction *head = | 412 Instruction *head = |
| 370 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 413 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
| 371 offsetof(struct arch_seccomp_data, arch), | 414 offsetof(struct arch_seccomp_data, arch), |
| 415 tail = | |
| 372 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, | 416 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
| 373 tail = | 417 NULL, |
| 374 // Grab the system call number, so that we can implement jump tables. | |
| 375 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
| 376 offsetof(struct arch_seccomp_data, nr)), | |
| 377 gen->MakeInstruction(BPF_RET+BPF_K, | 418 gen->MakeInstruction(BPF_RET+BPF_K, |
| 378 Kill( | 419 Kill( |
| 379 "Invalid audit architecture in BPF filter").err_))); | 420 "Invalid audit architecture in BPF filter").err_))); |
| 380 | 421 |
| 381 // On Intel architectures, verify that system call numbers are in the | |
| 382 // expected number range. The older i386 and x86-64 APIs clear bit 30 | |
| 383 // on all system calls. The newer x32 API always sets bit 30. | |
| 384 #if defined(__i386__) || defined(__x86_64__) | |
| 385 Instruction *invalidX32 = | |
| 386 gen->MakeInstruction(BPF_RET+BPF_K, | |
| 387 Kill("Illegal mixing of system call ABIs").err_); | |
| 388 Instruction *checkX32 = | |
| 389 #if defined(__x86_64__) && defined(__ILP32__) | |
| 390 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); | |
| 391 #else | |
| 392 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); | |
| 393 #endif | |
| 394 gen->JoinInstructions(tail, checkX32); | |
| 395 tail = checkX32; | |
| 396 #endif | |
| 397 | |
| 398 | |
| 399 { | 422 { |
| 400 // Evaluate all possible system calls and group their ErrorCodes into | 423 // Evaluate all possible system calls and group their ErrorCodes into |
| 401 // ranges of identical codes. | 424 // ranges of identical codes. |
| 402 Ranges ranges; | 425 Ranges ranges; |
| 403 findRanges(&ranges); | 426 findRanges(&ranges); |
| 404 | 427 |
| 405 // Compile the system call ranges to an optimized BPF jumptable | 428 // Compile the system call ranges to an optimized BPF jumptable |
| 406 Instruction *jumptable = | 429 Instruction *jumptable = |
| 407 assembleJumpTable(gen, ranges.begin(), ranges.end()); | 430 assembleJumpTable(gen, ranges.begin(), ranges.end()); |
| 408 | 431 |
| 432 // If there is at least one UnsafeTrap() in our program, the entire sandbox | |
| 433 // is unsafe. We need to modify the program so that all non- | |
| 434 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then | |
| 435 // allow us to temporarily disable sandboxing rules inside of callbacks to | |
| 436 // UnsafeTrap(). | |
| 437 has_unsafe_traps_ = false; | |
| 438 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); | |
| 439 | |
| 440 // Grab the system call number, so that we can implement jump tables. | |
| 441 Instruction *load_nr = | |
| 442 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
| 443 offsetof(struct arch_seccomp_data, nr)); | |
| 444 | |
| 445 // If our BPF program has unsafe jumps, enable support for them. This | |
| 446 // test happens very early in the BPF filter program. Even before we | |
| 447 // consider looking at system call numbers. | |
| 448 // As support for unsafe jumps essentially defeats all the security | |
| 449 // measures that the sandbox provides, we print a big warning message -- | |
| 450 // and of course, we make sure to only ever enable this feature if it | |
| 451 // is actually requested by the sandbox policy. | |
| 452 if (has_unsafe_traps_) { | |
| 453 if (Syscall(-1) == -1 && errno == ENOSYS) { | |
| 454 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this " | |
| 455 "architecture"); | |
| 456 } | |
| 457 | |
| 458 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
| 459 void *aux = evaluators_.begin()->second; | |
| 460 if (!evaluateSyscall(__NR_rt_sigprocmask, aux). | |
| 461 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || | |
| 462 !evaluateSyscall(__NR_rt_sigreturn, aux). | |
| 463 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) | |
| 464 #if defined(__NR_sigprocmask) | |
| 465 || !evaluateSyscall(__NR_sigprocmask, aux). | |
| 466 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) | |
| 467 #endif | |
| 468 #if defined(__NR_sigreturn) | |
| 469 || !evaluateSyscall(__NR_sigreturn, aux). | |
| 470 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) | |
| 471 #endif | |
| 472 ) { | |
| 473 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " | |
| 474 "unconditionally allow sigreturn() and sigprocmask()"); | |
| 475 } | |
| 476 | |
| 477 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); | |
| 478 gen->Traverse(jumptable, RedirectToUserspace, NULL); | |
| 479 | |
| 480 // Allow system calls, if they originate from our magic return address | |
| 481 // (which we can query by calling Syscall(-1)). | |
| 482 uintptr_t syscall_entry_point = static_cast<uintptr_t>(Syscall(-1)); | |
| 483 uint32_t low = static_cast<uint32_t>(syscall_entry_point); | |
| 484 #if __SIZEOF_POINTER__ > 4 | |
| 485 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); | |
| 486 #endif | |
| 487 | |
| 488 // BPF cannot do native 64bit comparisons. On 64bit architectures, we | |
| 489 // have to compare both 32bit halfs of the instruction pointer. If they | |
| 490 // match what we expect, we return ERR_ALLOWED. If either or both don't | |
| 491 // match, we continue evalutating the rest of the sandbox policy. | |
| 492 Instruction *escape_hatch = | |
| 493 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
| 494 offsetof(struct arch_seccomp_data, | |
| 495 instruction_pointer) + | |
| 496 (__SIZEOF_POINTER__ > 4 && | |
| 497 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0), | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
As agreed offline, please add an #error on __BIG_E
| |
| 498 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, | |
| 499 #if __SIZEOF_POINTER__ > 4 | |
| 500 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
| 501 offsetof(struct arch_seccomp_data, | |
| 502 instruction_pointer) + | |
| 503 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4), | |
| 504 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, | |
| 505 #endif | |
| 506 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), | |
| 507 #if __SIZEOF_POINTER__ > 4 | |
| 508 load_nr)), | |
| 509 #endif | |
| 510 load_nr)); | |
| 511 gen->JoinInstructions(tail, escape_hatch); | |
| 512 } else { | |
| 513 gen->JoinInstructions(tail, load_nr); | |
| 514 } | |
| 515 tail = load_nr; | |
| 516 | |
| 517 // On Intel architectures, verify that system call numbers are in the | |
| 518 // expected number range. The older i386 and x86-64 APIs clear bit 30 | |
| 519 // on all system calls. The newer x32 API always sets bit 30. | |
| 520 #if defined(__i386__) || defined(__x86_64__) | |
| 521 Instruction *invalidX32 = | |
| 522 gen->MakeInstruction(BPF_RET+BPF_K, | |
| 523 Kill("Illegal mixing of system call ABIs").err_); | |
| 524 Instruction *checkX32 = | |
| 525 #if defined(__x86_64__) && defined(__ILP32__) | |
| 526 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); | |
| 527 #else | |
| 528 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); | |
| 529 #endif | |
| 530 gen->JoinInstructions(tail, checkX32); | |
| 531 tail = checkX32; | |
| 532 #endif | |
| 533 | |
| 409 // Append jump table to our pre-amble | 534 // Append jump table to our pre-amble |
| 410 gen->JoinInstructions(tail, jumptable); | 535 gen->JoinInstructions(tail, jumptable); |
| 411 } | 536 } |
| 412 | 537 |
| 413 // Turn the DAG into a vector of instructions. | 538 // Turn the DAG into a vector of instructions. |
| 414 Program *program = new Program(); | 539 Program *program = new Program(); |
| 415 gen->Compile(head, program); | 540 gen->Compile(head, program); |
| 416 delete gen; | 541 delete gen; |
| 417 | 542 |
| 418 // Make sure compilation resulted in BPF program that executes | 543 // Make sure compilation resulted in BPF program that executes |
| 419 // correctly. Otherwise, there is an internal error in our BPF compiler. | 544 // correctly. Otherwise, there is an internal error in our BPF compiler. |
| 420 // There is really nothing the caller can do until the bug is fixed. | 545 // There is really nothing the caller can do until the bug is fixed. |
| 421 #ifndef NDEBUG | 546 #ifndef NDEBUG |
| 422 const char *err = NULL; | 547 { |
| 423 if (!Verifier::VerifyBPF(*program, evaluators_, &err)) { | 548 // If we previously rewrote the BPF program so that it calls user-space |
| 424 SANDBOX_DIE(err); | 549 // whenever we return an "errno" value from the filter, then we have to |
| 550 // wrap our system call evaluator to perform the same operation. Otherwise, | |
| 551 // the verifier would also report a mismatch in return codes. | |
| 552 Evaluators redirected_evaluators; | |
| 553 redirected_evaluators.push_back( | |
| 554 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
nit: two more spaces for indent.
| |
| 555 | |
| 556 const char *err = NULL; | |
| 557 if (!Verifier::VerifyBPF(*program, | |
| 558 has_unsafe_traps_ ? redirected_evaluators : evaluators_, &err)) { | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Nit: ident arguments together.
| |
| 559 SANDBOX_DIE(err); | |
| 560 } | |
| 425 } | 561 } |
| 426 #endif | 562 #endif |
| 427 | 563 |
| 428 // We want to be very careful in not imposing any requirements on the | 564 // We want to be very careful in not imposing any requirements on the |
| 429 // policies that are set with setSandboxPolicy(). This means, as soon as | 565 // policies that are set with setSandboxPolicy(). This means, as soon as |
| 430 // the sandbox is active, we shouldn't be relying on libraries that could | 566 // the sandbox is active, we shouldn't be relying on libraries that could |
| 431 // be making system calls. This, for example, means we should avoid | 567 // be making system calls. This, for example, means we should avoid |
| 432 // using the heap and we should avoid using STL functions. | 568 // using the heap and we should avoid using STL functions. |
| 433 // Temporarily copy the contents of the "program" vector into a | 569 // Temporarily copy the contents of the "program" vector into a |
| 434 // stack-allocated array; and then explicitly destroy that object. | 570 // stack-allocated array; and then explicitly destroy that object. |
| 435 // This makes sure we don't ex- or implicitly call new/delete after we | 571 // This makes sure we don't ex- or implicitly call new/delete after we |
| 436 // installed the BPF filter program in the kernel. Depending on the | 572 // installed the BPF filter program in the kernel. Depending on the |
| 437 // system memory allocator that is in effect, these operators can result | 573 // system memory allocator that is in effect, these operators can result |
| 438 // in system calls to things like munmap() or brk(). | 574 // in system calls to things like munmap() or brk(). |
| 439 struct sock_filter bpf[program->size()]; | 575 struct sock_filter bpf[program->size()]; |
| 440 const struct sock_fprog prog = { | 576 const struct sock_fprog prog = { |
| 441 static_cast<unsigned short>(program->size()), bpf }; | 577 static_cast<unsigned short>(program->size()), bpf }; |
| 442 memcpy(bpf, &(*program)[0], sizeof(bpf)); | 578 memcpy(bpf, &(*program)[0], sizeof(bpf)); |
| 443 delete program; | 579 delete program; |
| 444 | 580 |
| 445 // Release memory that is no longer needed | 581 // Release memory that is no longer needed |
| 446 evaluators_.clear(); | 582 evaluators_.clear(); |
| 447 errMap_.clear(); | |
| 448 | 583 |
| 449 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 584 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
| 450 // Valgrind is really not happy about our sandbox. Disable it when running | 585 // Valgrind is really not happy about our sandbox. Disable it when running |
| 451 // in Valgrind. This feature is dangerous and should never be enabled by | 586 // in Valgrind. This feature is dangerous and should never be enabled by |
| 452 // default. We protect it behind a pre-processor option. | 587 // default. We protect it behind a pre-processor option. |
| 453 if (!RUNNING_ON_VALGRIND) | 588 if (!RUNNING_ON_VALGRIND) |
| 454 #endif | 589 #endif |
| 455 { | 590 { |
| 456 // Install BPF filter program | 591 // Install BPF filter program |
| 457 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | 592 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 554 struct arch_sigsys sigsys; | 689 struct arch_sigsys sigsys; |
| 555 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); | 690 memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); |
| 556 | 691 |
| 557 // Some more sanity checks. | 692 // Some more sanity checks. |
| 558 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) || | 693 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) || |
| 559 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) || | 694 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) || |
| 560 sigsys.arch != SECCOMP_ARCH) { | 695 sigsys.arch != SECCOMP_ARCH) { |
| 561 goto sigsys_err; | 696 goto sigsys_err; |
| 562 } | 697 } |
| 563 | 698 |
| 564 // Copy the seccomp-specific data into a arch_seccomp_data structure. This | 699 // We need to tell whether we are performing a "normal" callback, or |
| 565 // is what we are showing to TrapFnc callbacks that the system call evaluator | 700 // whether we were called recursively from within a UnsafeTrap() callback. |
| 566 // registered with the sandbox. | 701 // This is a little tricky to do, because we need to somehow get access to |
| 567 struct arch_seccomp_data data = { | 702 // per-thread data from within a signal context. Normal TLS storage is not |
| 568 sigsys.nr, | 703 // safely accessible at this time. We could roll our own, but that involves |
| 569 SECCOMP_ARCH, | 704 // a lot of complexity. Instead, we co-opt one bit in the signal mask. |
| 570 reinterpret_cast<uint64_t>(sigsys.ip), | 705 // If BUS is blocked, we assume that we have been called recursively. |
| 571 { | 706 // There is a possibility for collision with other code that needs to do |
| 572 static_cast<uint64_t>(SECCOMP_PARM1(ctx)), | 707 // this, but in practice the risks are low. |
| 573 static_cast<uint64_t>(SECCOMP_PARM2(ctx)), | 708 intptr_t rc; |
| 574 static_cast<uint64_t>(SECCOMP_PARM3(ctx)), | 709 if (has_unsafe_traps_ && |
| 575 static_cast<uint64_t>(SECCOMP_PARM4(ctx)), | 710 sigismember(&ctx->uc_sigmask, SIGBUS)) { |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
This is pretty hack-ish. It's ok-ish since it's a
| |
| 576 static_cast<uint64_t>(SECCOMP_PARM5(ctx)), | 711 errno = old_errno; |
| 577 static_cast<uint64_t>(SECCOMP_PARM6(ctx)) | 712 if (sigsys.nr == __NR_clone) { |
| 713 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); | |
| 578 } | 714 } |
| 579 }; | 715 rc = Syscall(sigsys.nr, |
| 716 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), | |
| 717 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), | |
| 718 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); | |
| 719 } else { | |
| 720 const ErrorCode& err = trapArray_[info->si_errno - 1]; | |
| 721 if (!err.safe_) { | |
| 722 sigset_t mask; | |
| 723 sigemptyset(&mask); | |
| 724 sigaddset(&mask, SIGBUS); | |
| 725 sigprocmask(SIG_BLOCK, &mask, NULL); | |
| 726 } | |
| 580 | 727 |
| 581 // Now call the TrapFnc callback associated with this particular instance | 728 // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
| 582 // of SECCOMP_RET_TRAP. | 729 // is what we are showing to TrapFnc callbacks that the system call |
| 583 const ErrorCode& err = trapArray_[info->si_errno - 1]; | 730 // evaluator registered with the sandbox. |
| 584 intptr_t rc = err.fnc_(data, err.aux_); | 731 struct arch_seccomp_data data = { |
| 732 sigsys.nr, | |
| 733 SECCOMP_ARCH, | |
| 734 reinterpret_cast<uint64_t>(sigsys.ip), | |
| 735 { | |
| 736 static_cast<uint64_t>(SECCOMP_PARM1(ctx)), | |
| 737 static_cast<uint64_t>(SECCOMP_PARM2(ctx)), | |
| 738 static_cast<uint64_t>(SECCOMP_PARM3(ctx)), | |
| 739 static_cast<uint64_t>(SECCOMP_PARM4(ctx)), | |
| 740 static_cast<uint64_t>(SECCOMP_PARM5(ctx)), | |
| 741 static_cast<uint64_t>(SECCOMP_PARM6(ctx)) | |
| 742 } | |
| 743 }; | |
| 744 | |
| 745 // Now call the TrapFnc callback associated with this particular instance | |
| 746 // of SECCOMP_RET_TRAP. | |
| 747 rc = err.fnc_(data, err.aux_); | |
| 748 } | |
| 585 | 749 |
| 586 // Update the CPU register that stores the return code of the system call | 750 // Update the CPU register that stores the return code of the system call |
| 587 // that we just handled, and restore "errno" to the value that it had | 751 // that we just handled, and restore "errno" to the value that it had |
| 588 // before entering the signal handler. | 752 // before entering the signal handler. |
| 589 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); | 753 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc); |
| 590 errno = old_errno; | 754 errno = old_errno; |
| 591 | 755 |
| 592 return; | 756 return; |
| 593 } | 757 } |
| 594 | 758 |
| 595 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { | 759 bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const { |
| 760 if (fnc != o.fnc) { | |
| 761 return fnc < o.fnc; | |
| 762 } else if (aux != o.aux) { | |
| 763 return aux < o.aux; | |
| 764 } else { | |
| 765 return safe < o.safe; | |
| 766 } | |
| 767 } | |
| 768 | |
| 769 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, | |
| 770 bool safe) { | |
| 596 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance | 771 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
| 597 // of a SECCOMP_RET_TRAP. | 772 // of a SECCOMP_RET_TRAP. |
| 598 std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux); | 773 TrapKey key(fnc, aux, safe); |
| 599 TrapIds::const_iterator iter = trapIds_.find(key); | 774 TrapIds::const_iterator iter = trapIds_.find(key); |
| 600 uint16_t id; | 775 uint16_t id; |
| 601 if (iter != trapIds_.end()) { | 776 if (iter != trapIds_.end()) { |
| 602 // We have seen this pair before. Return the same id that we assigned | 777 // We have seen this pair before. Return the same id that we assigned |
| 603 // earlier. | 778 // earlier. |
| 604 id = iter->second; | 779 id = iter->second; |
| 605 } else { | 780 } else { |
| 606 // This is a new pair. Remember it and assign a new id. | 781 // This is a new pair. Remember it and assign a new id. |
| 607 // Please note that we have to store traps in memory that doesn't get | 782 // Please note that we have to store traps in memory that doesn't get |
| 608 // deallocated when the program is shutting down. A memory leak is | 783 // deallocated when the program is shutting down. A memory leak is |
| 609 // intentional, because we might otherwise not be able to execute | 784 // intentional, because we might otherwise not be able to execute |
| 610 // system calls part way through the program shutting down | 785 // system calls part way through the program shutting down |
| 611 if (!traps_) { | 786 if (!traps_) { |
| 612 traps_ = new Traps(); | 787 traps_ = new Traps(); |
| 613 } | 788 } |
| 614 if (traps_->size() >= SECCOMP_RET_DATA) { | 789 if (traps_->size() >= SECCOMP_RET_DATA) { |
| 615 // In practice, this is pretty much impossible to trigger, as there | 790 // In practice, this is pretty much impossible to trigger, as there |
| 616 // are other kernel limitations that restrict overall BPF program sizes. | 791 // are other kernel limitations that restrict overall BPF program sizes. |
| 617 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); | 792 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); |
| 618 } | 793 } |
| 619 id = traps_->size() + 1; | 794 id = traps_->size() + 1; |
| 620 | 795 |
| 621 traps_->push_back(ErrorCode(fnc, aux, id)); | 796 traps_->push_back(ErrorCode(fnc, aux, safe, id)); |
| 622 trapIds_[key] = id; | 797 trapIds_[key] = id; |
| 623 | 798 |
| 624 // We want to access the traps_ vector from our signal handler. But | 799 // We want to access the traps_ vector from our signal handler. But |
| 625 // we are not assured that doing so is async-signal safe. On the other | 800 // we are not assured that doing so is async-signal safe. On the other |
| 626 // hand, C++ guarantees that the contents of a vector is stored in a | 801 // hand, C++ guarantees that the contents of a vector is stored in a |
| 627 // contiguous C-style array. | 802 // contiguous C-style array. |
| 628 // So, we look up the address and size of this array outside of the | 803 // So, we look up the address and size of this array outside of the |
| 629 // signal handler, where we can safely do so. | 804 // signal handler, where we can safely do so. |
| 630 trapArray_ = &(*traps_)[0]; | 805 trapArray_ = &(*traps_)[0]; |
| 631 trapArraySize_ = id; | 806 trapArraySize_ = id; |
| 807 return traps_->back(); | |
| 632 } | 808 } |
| 633 | 809 |
| 634 ErrorCode err = ErrorCode(fnc, aux, id); | 810 return ErrorCode(fnc, aux, safe, id); |
| 635 return errMap_[err.err()] = err; | 811 } |
| 812 | |
| 813 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { | |
| 814 return MakeTrap(fnc, aux, true); | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Add a comment near true "/* Safe trap */"
| |
| 815 } | |
| 816 | |
| 817 ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) { | |
| 818 return MakeTrap(fnc, aux, false); | |
|
jln (very slow on Chromium)
2012/11/20 01:08:31
Same here.
| |
| 819 } | |
| 820 | |
| 821 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { | |
| 822 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; | |
| 823 return -err; | |
| 636 } | 824 } |
| 637 | 825 |
| 638 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { | 826 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { |
| 639 SANDBOX_DIE(static_cast<char *>(aux)); | 827 SANDBOX_DIE(static_cast<char *>(aux)); |
| 640 } | 828 } |
| 641 | 829 |
| 642 ErrorCode Sandbox::Kill(const char *msg) { | 830 ErrorCode Sandbox::Kill(const char *msg) { |
| 643 return Trap(bpfFailure, const_cast<char *>(msg)); | 831 return Trap(bpfFailure, const_cast<char *>(msg)); |
| 644 } | 832 } |
| 645 | 833 |
| 646 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 834 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 647 int Sandbox::proc_fd_ = -1; | 835 int Sandbox::proc_fd_ = -1; |
| 648 Sandbox::Evaluators Sandbox::evaluators_; | 836 Sandbox::Evaluators Sandbox::evaluators_; |
| 649 Sandbox::ErrMap Sandbox::errMap_; | |
| 650 Sandbox::Traps *Sandbox::traps_ = NULL; | 837 Sandbox::Traps *Sandbox::traps_ = NULL; |
| 651 Sandbox::TrapIds Sandbox::trapIds_; | 838 Sandbox::TrapIds Sandbox::trapIds_; |
| 652 ErrorCode *Sandbox::trapArray_ = NULL; | 839 ErrorCode *Sandbox::trapArray_ = NULL; |
| 653 size_t Sandbox::trapArraySize_ = 0; | 840 size_t Sandbox::trapArraySize_ = 0; |
| 841 bool Sandbox::has_unsafe_traps_ = false; | |
| 654 | 842 |
| 655 } // namespace | 843 } // namespace |
| OLD | NEW |