sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 11363212: Added support for greylisting of system calls.

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11363212: Added support for greylisting of system calls. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Addressed reviewer's comments Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/codegen.h"	5 #include "sandbox/linux/seccomp-bpf/codegen.h"

6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	6 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

	7 #include "sandbox/linux/seccomp-bpf/syscall.h"

7 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"	8 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"

8 #include "sandbox/linux/seccomp-bpf/verifier.h"	9 #include "sandbox/linux/seccomp-bpf/verifier.h"

9	10

10 namespace {	11 namespace {

11	12

12 void WriteFailedStderrSetupMessage(int out_fd) {	13 void WriteFailedStderrSetupMessage(int out_fd) {

13 const char* error_string = strerror(errno);	14 const char* error_string = strerror(errno);

14 static const char msg[] = "Failed to set up stderr: ";	15 static const char msg[] = "Failed to set up stderr: ";

15 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&	16 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&

16 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&	17 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&

(...skipping 295 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
312 for (SyscallIterator iter(true); !iter.Done(); ) {	313 for (SyscallIterator iter(true); !iter.Done(); ) {

313 uint32_t sysnum = iter.Next();	314 uint32_t sysnum = iter.Next();

314 if (!isDenied(syscallEvaluator(sysnum, aux))) {	315 if (!isDenied(syscallEvaluator(sysnum, aux))) {

315 SANDBOX_DIE("Policies should deny system calls that are outside the "	316 SANDBOX_DIE("Policies should deny system calls that are outside the "

316 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");	317 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");

317 }	318 }

318 }	319 }

319 return;	320 return;

320 }	321 }

321	322

	323 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {

	324 if (BPF_CLASS(insn->code) == BPF_RET &&

	325 insn->k > SECCOMP_RET_TRAP &&

	326 insn->k <= SECCOMP_RET_TRAP+trapArraySize_) {
	jln (very slow on Chromium) 2012/11/20 01:08:31 Better to do insn->k - SECCOMP_RET_TRAP <= trapArr Better to do insn->k - SECCOMP_RET_TRAP <= trapArraySize_ to avoid int overflow completely.
	327 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];

	328 if (!err.safe_) {

	329 bool is_unsafe = static_cast<bool >(aux);

	330 *is_unsafe = true;

	331 }

	332 }

	333 }

	334

	335 void Sandbox::RedirectToUserspace(Instruction insn, void aux) {

	336 // When inside an UnsafeTrap() callback, we want to allow all system calls.

	337 // This means, we must conditionally disable the sandbox -- and that's not

	338 // something that kernel-side BPF filters can do, as they cannot inspect

	339 // any state other than the syscall arguments.

	340 // But if we redirect all error handlers to user-space, then we can easily

	341 // make this decision.

	342 // The performance penalty for this extra round-trip to user-space is not

	343 // actually that bad, as we only ever pay it for denied system calls; and a

	344 // typical program has very few of these.

	345 if (BPF_CLASS(insn->code) == BPF_RET &&

	346 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	347 insn->k = Trap(ReturnErrno,

	348 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

	349 }

	350 }

	351

	352 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {

	353 // We need to replicate the behavior of RedirectToUserspace(), so that our

	354 // Verifier can still work correctly.

	355 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);

	356 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();

	357 ErrorCode err = evaluator.first(sysnum, evaluator.second);

	358 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	359 return Trap(ReturnErrno,

	360 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

	361 }

	362 return err;

	363 }

	364

322 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {	365 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {

323 if (status_ == STATUS_ENABLED) {	366 if (status_ == STATUS_ENABLED) {

324 SANDBOX_DIE("Cannot change policy after sandbox has started");	367 SANDBOX_DIE("Cannot change policy after sandbox has started");

325 }	368 }

326 policySanityChecks(syscallEvaluator, aux);	369 policySanityChecks(syscallEvaluator, aux);

327 evaluators_.push_back(std::make_pair(syscallEvaluator, aux));	370 evaluators_.push_back(std::make_pair(syscallEvaluator, aux));

328 }	371 }

329	372

330 void Sandbox::installFilter(bool quiet) {	373 void Sandbox::installFilter(bool quiet) {

331 // Verify that the user pushed a policy.	374 // Verify that the user pushed a policy.

332 if (evaluators_.empty()) {	375 if (evaluators_.empty()) {

333 filter_failed:	376 filter_failed:

334 SANDBOX_DIE("Failed to configure system call filters");	377 SANDBOX_DIE("Failed to configure system call filters");

335 }	378 }

336	379

337 // Set new SIGSYS handler	380 // Set new SIGSYS handler

338 struct sigaction sa;	381 struct sigaction sa;

339 memset(&sa, 0, sizeof(sa));	382 memset(&sa, 0, sizeof(sa));

340 sa.sa_sigaction = &sigSys;	383 sa.sa_sigaction = sigSys;

341 sa.sa_flags = SA_SIGINFO;	384 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;

342 if (sigaction(SIGSYS, &sa, NULL) < 0) {	385 if (sigaction(SIGSYS, &sa, NULL) < 0) {

343 goto filter_failed;	386 goto filter_failed;

344 }	387 }

345	388

346 // Unmask SIGSYS	389 // Unmask SIGSYS

347 sigset_t mask;	390 sigset_t mask;

348 if (sigemptyset(&mask) \|\|	391 if (sigemptyset(&mask) \|\|

349 sigaddset(&mask, SIGSYS) \|\|	392 sigaddset(&mask, SIGSYS) \|\|

350 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {	393 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {

351 goto filter_failed;	394 goto filter_failed;

(...skipping 10 matching lines...) Expand all Loading...
362 if (!gen) {	405 if (!gen) {

363 SANDBOX_DIE("Out of memory");	406 SANDBOX_DIE("Out of memory");

364 }	407 }

365	408

366 // If the architecture doesn't match SECCOMP_ARCH, disallow the	409 // If the architecture doesn't match SECCOMP_ARCH, disallow the

367 // system call.	410 // system call.

368 Instruction *tail;	411 Instruction *tail;

369 Instruction *head =	412 Instruction *head =

370 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	413 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

371 offsetof(struct arch_seccomp_data, arch),	414 offsetof(struct arch_seccomp_data, arch),

	415 tail =

372 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,	416 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,

373 tail =	417 NULL,

374 // Grab the system call number, so that we can implement jump tables.

375 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

376 offsetof(struct arch_seccomp_data, nr)),

377 gen->MakeInstruction(BPF_RET+BPF_K,	418 gen->MakeInstruction(BPF_RET+BPF_K,

378 Kill(	419 Kill(

379 "Invalid audit architecture in BPF filter").err_)));	420 "Invalid audit architecture in BPF filter").err_)));

380	421

381 // On Intel architectures, verify that system call numbers are in the

382 // expected number range. The older i386 and x86-64 APIs clear bit 30

383 // on all system calls. The newer x32 API always sets bit 30.

384 #if defined(__i386__) \|\| defined(__x86_64__)

385 Instruction *invalidX32 =

386 gen->MakeInstruction(BPF_RET+BPF_K,

387 Kill("Illegal mixing of system call ABIs").err_);

388 Instruction *checkX32 =

389 #if defined(__x86_64__) && defined(__ILP32__)

390 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);

391 #else

392 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);

393 #endif

394 gen->JoinInstructions(tail, checkX32);

395 tail = checkX32;

396 #endif

397

398

399 {	422 {

400 // Evaluate all possible system calls and group their ErrorCodes into	423 // Evaluate all possible system calls and group their ErrorCodes into

401 // ranges of identical codes.	424 // ranges of identical codes.

402 Ranges ranges;	425 Ranges ranges;

403 findRanges(&ranges);	426 findRanges(&ranges);

404	427

405 // Compile the system call ranges to an optimized BPF jumptable	428 // Compile the system call ranges to an optimized BPF jumptable

406 Instruction *jumptable =	429 Instruction *jumptable =

407 assembleJumpTable(gen, ranges.begin(), ranges.end());	430 assembleJumpTable(gen, ranges.begin(), ranges.end());

408	431

	432 // If there is at least one UnsafeTrap() in our program, the entire sandbox

	433 // is unsafe. We need to modify the program so that all non-

	434 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then

	435 // allow us to temporarily disable sandboxing rules inside of callbacks to

	436 // UnsafeTrap().

	437 has_unsafe_traps_ = false;

	438 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);

	439

	440 // Grab the system call number, so that we can implement jump tables.

	441 Instruction *load_nr =

	442 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	443 offsetof(struct arch_seccomp_data, nr));

	444

	445 // If our BPF program has unsafe jumps, enable support for them. This

	446 // test happens very early in the BPF filter program. Even before we

	447 // consider looking at system call numbers.

	448 // As support for unsafe jumps essentially defeats all the security

	449 // measures that the sandbox provides, we print a big warning message --

	450 // and of course, we make sure to only ever enable this feature if it

	451 // is actually requested by the sandbox policy.

	452 if (has_unsafe_traps_) {

	453 if (Syscall(-1) == -1 && errno == ENOSYS) {

	454 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "

	455 "architecture");

	456 }

	457

	458 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

	459 void *aux = evaluators_.begin()->second;

	460 if (!evaluateSyscall(__NR_rt_sigprocmask, aux).

	461 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) \|\|

	462 !evaluateSyscall(__NR_rt_sigreturn, aux).

	463 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

	464 #if defined(__NR_sigprocmask)

	465 \|\| !evaluateSyscall(__NR_sigprocmask, aux).

	466 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

	467 #endif

	468 #if defined(__NR_sigreturn)

	469 \|\| !evaluateSyscall(__NR_sigreturn, aux).

	470 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

	471 #endif

	472 ) {

	473 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "

	474 "unconditionally allow sigreturn() and sigprocmask()");

	475 }

	476

	477 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");

	478 gen->Traverse(jumptable, RedirectToUserspace, NULL);

	479

	480 // Allow system calls, if they originate from our magic return address

	481 // (which we can query by calling Syscall(-1)).

	482 uintptr_t syscall_entry_point = static_cast<uintptr_t>(Syscall(-1));

	483 uint32_t low = static_cast<uint32_t>(syscall_entry_point);

	484 #if __SIZEOF_POINTER__ > 4

	485 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);

	486 #endif

	487

	488 // BPF cannot do native 64bit comparisons. On 64bit architectures, we

	489 // have to compare both 32bit halfs of the instruction pointer. If they

	490 // match what we expect, we return ERR_ALLOWED. If either or both don't

	491 // match, we continue evalutating the rest of the sandbox policy.

	492 Instruction *escape_hatch =

	493 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	494 offsetof(struct arch_seccomp_data,

	495 instruction_pointer) +

	496 (__SIZEOF_POINTER__ > 4 &&

	497 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),
	jln (very slow on Chromium) 2012/11/20 01:08:31 As agreed offline, please add an #error on __BIG_E As agreed offline, please add an #error on __BIG_ENDIAN since this is completely untested.
	498 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,

	499 #if __SIZEOF_POINTER__ > 4

	500 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	501 offsetof(struct arch_seccomp_data,

	502 instruction_pointer) +

	503 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),

	504 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,

	505 #endif

	506 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),

	507 #if __SIZEOF_POINTER__ > 4

	508 load_nr)),

	509 #endif

	510 load_nr));

	511 gen->JoinInstructions(tail, escape_hatch);

	512 } else {

	513 gen->JoinInstructions(tail, load_nr);

	514 }

	515 tail = load_nr;

	516

	517 // On Intel architectures, verify that system call numbers are in the

	518 // expected number range. The older i386 and x86-64 APIs clear bit 30

	519 // on all system calls. The newer x32 API always sets bit 30.

	520 #if defined(__i386__) \|\| defined(__x86_64__)

	521 Instruction *invalidX32 =

	522 gen->MakeInstruction(BPF_RET+BPF_K,

	523 Kill("Illegal mixing of system call ABIs").err_);

	524 Instruction *checkX32 =

	525 #if defined(__x86_64__) && defined(__ILP32__)

	526 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);

	527 #else

	528 gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);

	529 #endif

	530 gen->JoinInstructions(tail, checkX32);

	531 tail = checkX32;

	532 #endif

	533

409 // Append jump table to our pre-amble	534 // Append jump table to our pre-amble

410 gen->JoinInstructions(tail, jumptable);	535 gen->JoinInstructions(tail, jumptable);

411 }	536 }

412	537

413 // Turn the DAG into a vector of instructions.	538 // Turn the DAG into a vector of instructions.

414 Program *program = new Program();	539 Program *program = new Program();

415 gen->Compile(head, program);	540 gen->Compile(head, program);

416 delete gen;	541 delete gen;

417	542

418 // Make sure compilation resulted in BPF program that executes	543 // Make sure compilation resulted in BPF program that executes

419 // correctly. Otherwise, there is an internal error in our BPF compiler.	544 // correctly. Otherwise, there is an internal error in our BPF compiler.

420 // There is really nothing the caller can do until the bug is fixed.	545 // There is really nothing the caller can do until the bug is fixed.

421 #ifndef NDEBUG	546 #ifndef NDEBUG

422 const char *err = NULL;	547 {

423 if (!Verifier::VerifyBPF(*program, evaluators_, &err)) {	548 // If we previously rewrote the BPF program so that it calls user-space

424 SANDBOX_DIE(err);	549 // whenever we return an "errno" value from the filter, then we have to

	550 // wrap our system call evaluator to perform the same operation. Otherwise,

	551 // the verifier would also report a mismatch in return codes.

	552 Evaluators redirected_evaluators;

	553 redirected_evaluators.push_back(

	554 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));
	jln (very slow on Chromium) 2012/11/20 01:08:31 nit: two more spaces for indent. nit: two more spaces for indent.
	555

	556 const char *err = NULL;

	557 if (!Verifier::VerifyBPF(*program,

	558 has_unsafe_traps_ ? redirected_evaluators : evaluators_, &err)) {
	jln (very slow on Chromium) 2012/11/20 01:08:31 Nit: ident arguments together. Nit: ident arguments together.
	559 SANDBOX_DIE(err);

	560 }

425 }	561 }

426 #endif	562 #endif

427	563

428 // We want to be very careful in not imposing any requirements on the	564 // We want to be very careful in not imposing any requirements on the

429 // policies that are set with setSandboxPolicy(). This means, as soon as	565 // policies that are set with setSandboxPolicy(). This means, as soon as

430 // the sandbox is active, we shouldn't be relying on libraries that could	566 // the sandbox is active, we shouldn't be relying on libraries that could

431 // be making system calls. This, for example, means we should avoid	567 // be making system calls. This, for example, means we should avoid

432 // using the heap and we should avoid using STL functions.	568 // using the heap and we should avoid using STL functions.

433 // Temporarily copy the contents of the "program" vector into a	569 // Temporarily copy the contents of the "program" vector into a

434 // stack-allocated array; and then explicitly destroy that object.	570 // stack-allocated array; and then explicitly destroy that object.

435 // This makes sure we don't ex- or implicitly call new/delete after we	571 // This makes sure we don't ex- or implicitly call new/delete after we

436 // installed the BPF filter program in the kernel. Depending on the	572 // installed the BPF filter program in the kernel. Depending on the

437 // system memory allocator that is in effect, these operators can result	573 // system memory allocator that is in effect, these operators can result

438 // in system calls to things like munmap() or brk().	574 // in system calls to things like munmap() or brk().

439 struct sock_filter bpf[program->size()];	575 struct sock_filter bpf[program->size()];

440 const struct sock_fprog prog = {	576 const struct sock_fprog prog = {

441 static_cast<unsigned short>(program->size()), bpf };	577 static_cast<unsigned short>(program->size()), bpf };

442 memcpy(bpf, &(*program)[0], sizeof(bpf));	578 memcpy(bpf, &(*program)[0], sizeof(bpf));

443 delete program;	579 delete program;

444	580

445 // Release memory that is no longer needed	581 // Release memory that is no longer needed

446 evaluators_.clear();	582 evaluators_.clear();

447 errMap_.clear();

448	583

449 #if defined(SECCOMP_BPF_VALGRIND_HACKS)	584 #if defined(SECCOMP_BPF_VALGRIND_HACKS)

450 // Valgrind is really not happy about our sandbox. Disable it when running	585 // Valgrind is really not happy about our sandbox. Disable it when running

451 // in Valgrind. This feature is dangerous and should never be enabled by	586 // in Valgrind. This feature is dangerous and should never be enabled by

452 // default. We protect it behind a pre-processor option.	587 // default. We protect it behind a pre-processor option.

453 if (!RUNNING_ON_VALGRIND)	588 if (!RUNNING_ON_VALGRIND)

454 #endif	589 #endif

455 {	590 {

456 // Install BPF filter program	591 // Install BPF filter program

457 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {	592 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
554 struct arch_sigsys sigsys;	689 struct arch_sigsys sigsys;

555 memcpy(&sigsys, &info->_sifields, sizeof(sigsys));	690 memcpy(&sigsys, &info->_sifields, sizeof(sigsys));

556	691

557 // Some more sanity checks.	692 // Some more sanity checks.

558 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) \|\|	693 if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) \|\|

559 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) \|\|	694 sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) \|\|

560 sigsys.arch != SECCOMP_ARCH) {	695 sigsys.arch != SECCOMP_ARCH) {

561 goto sigsys_err;	696 goto sigsys_err;

562 }	697 }

563	698

564 // Copy the seccomp-specific data into a arch_seccomp_data structure. This	699 // We need to tell whether we are performing a "normal" callback, or

565 // is what we are showing to TrapFnc callbacks that the system call evaluator	700 // whether we were called recursively from within a UnsafeTrap() callback.

566 // registered with the sandbox.	701 // This is a little tricky to do, because we need to somehow get access to

567 struct arch_seccomp_data data = {	702 // per-thread data from within a signal context. Normal TLS storage is not

568 sigsys.nr,	703 // safely accessible at this time. We could roll our own, but that involves

569 SECCOMP_ARCH,	704 // a lot of complexity. Instead, we co-opt one bit in the signal mask.

570 reinterpret_cast<uint64_t>(sigsys.ip),	705 // If BUS is blocked, we assume that we have been called recursively.

571 {	706 // There is a possibility for collision with other code that needs to do

572 static_cast<uint64_t>(SECCOMP_PARM1(ctx)),	707 // this, but in practice the risks are low.

573 static_cast<uint64_t>(SECCOMP_PARM2(ctx)),	708 intptr_t rc;

574 static_cast<uint64_t>(SECCOMP_PARM3(ctx)),	709 if (has_unsafe_traps_ &&

575 static_cast<uint64_t>(SECCOMP_PARM4(ctx)),	710 sigismember(&ctx->uc_sigmask, SIGBUS)) {
	jln (very slow on Chromium) 2012/11/20 01:08:31 This is pretty hack-ish. It's ok-ish since it's a This is pretty hack-ish. It's ok-ish since it's a debugging feature. Maybe it could be made a bit more clear by wrapping this into SetIsInSigSysHandler() / GetIsInSigSysHandler() functions?
576 static_cast<uint64_t>(SECCOMP_PARM5(ctx)),	711 errno = old_errno;

577 static_cast<uint64_t>(SECCOMP_PARM6(ctx))	712 if (sigsys.nr == __NR_clone) {

	713 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");

578 }	714 }

579 };	715 rc = Syscall(sigsys.nr,

	716 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),

	717 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),

	718 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));

	719 } else {

	720 const ErrorCode& err = trapArray_[info->si_errno - 1];

	721 if (!err.safe_) {

	722 sigset_t mask;

	723 sigemptyset(&mask);

	724 sigaddset(&mask, SIGBUS);

	725 sigprocmask(SIG_BLOCK, &mask, NULL);

	726 }

580	727

581 // Now call the TrapFnc callback associated with this particular instance	728 // Copy the seccomp-specific data into a arch_seccomp_data structure. This

582 // of SECCOMP_RET_TRAP.	729 // is what we are showing to TrapFnc callbacks that the system call

583 const ErrorCode& err = trapArray_[info->si_errno - 1];	730 // evaluator registered with the sandbox.

584 intptr_t rc = err.fnc_(data, err.aux_);	731 struct arch_seccomp_data data = {

	732 sigsys.nr,

	733 SECCOMP_ARCH,

	734 reinterpret_cast<uint64_t>(sigsys.ip),

	735 {

	736 static_cast<uint64_t>(SECCOMP_PARM1(ctx)),

	737 static_cast<uint64_t>(SECCOMP_PARM2(ctx)),

	738 static_cast<uint64_t>(SECCOMP_PARM3(ctx)),

	739 static_cast<uint64_t>(SECCOMP_PARM4(ctx)),

	740 static_cast<uint64_t>(SECCOMP_PARM5(ctx)),

	741 static_cast<uint64_t>(SECCOMP_PARM6(ctx))

	742 }

	743 };

	744

	745 // Now call the TrapFnc callback associated with this particular instance

	746 // of SECCOMP_RET_TRAP.

	747 rc = err.fnc_(data, err.aux_);

	748 }

585	749

586 // Update the CPU register that stores the return code of the system call	750 // Update the CPU register that stores the return code of the system call

587 // that we just handled, and restore "errno" to the value that it had	751 // that we just handled, and restore "errno" to the value that it had

588 // before entering the signal handler.	752 // before entering the signal handler.

589 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc);	753 SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc);

590 errno = old_errno;	754 errno = old_errno;

591	755

592 return;	756 return;

593 }	757 }

594	758

595 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {	759 bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const {

	760 if (fnc != o.fnc) {

	761 return fnc < o.fnc;

	762 } else if (aux != o.aux) {

	763 return aux < o.aux;

	764 } else {

	765 return safe < o.safe;

	766 }

	767 }

	768

	769 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,

	770 bool safe) {

596 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance	771 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance

597 // of a SECCOMP_RET_TRAP.	772 // of a SECCOMP_RET_TRAP.

598 std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux);	773 TrapKey key(fnc, aux, safe);

599 TrapIds::const_iterator iter = trapIds_.find(key);	774 TrapIds::const_iterator iter = trapIds_.find(key);

600 uint16_t id;	775 uint16_t id;

601 if (iter != trapIds_.end()) {	776 if (iter != trapIds_.end()) {

602 // We have seen this pair before. Return the same id that we assigned	777 // We have seen this pair before. Return the same id that we assigned

603 // earlier.	778 // earlier.

604 id = iter->second;	779 id = iter->second;

605 } else {	780 } else {

606 // This is a new pair. Remember it and assign a new id.	781 // This is a new pair. Remember it and assign a new id.

607 // Please note that we have to store traps in memory that doesn't get	782 // Please note that we have to store traps in memory that doesn't get

608 // deallocated when the program is shutting down. A memory leak is	783 // deallocated when the program is shutting down. A memory leak is

609 // intentional, because we might otherwise not be able to execute	784 // intentional, because we might otherwise not be able to execute

610 // system calls part way through the program shutting down	785 // system calls part way through the program shutting down

611 if (!traps_) {	786 if (!traps_) {

612 traps_ = new Traps();	787 traps_ = new Traps();

613 }	788 }

614 if (traps_->size() >= SECCOMP_RET_DATA) {	789 if (traps_->size() >= SECCOMP_RET_DATA) {

615 // In practice, this is pretty much impossible to trigger, as there	790 // In practice, this is pretty much impossible to trigger, as there

616 // are other kernel limitations that restrict overall BPF program sizes.	791 // are other kernel limitations that restrict overall BPF program sizes.

617 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");	792 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");

618 }	793 }

619 id = traps_->size() + 1;	794 id = traps_->size() + 1;

620	795

621 traps_->push_back(ErrorCode(fnc, aux, id));	796 traps_->push_back(ErrorCode(fnc, aux, safe, id));

622 trapIds_[key] = id;	797 trapIds_[key] = id;

623	798

624 // We want to access the traps_ vector from our signal handler. But	799 // We want to access the traps_ vector from our signal handler. But

625 // we are not assured that doing so is async-signal safe. On the other	800 // we are not assured that doing so is async-signal safe. On the other

626 // hand, C++ guarantees that the contents of a vector is stored in a	801 // hand, C++ guarantees that the contents of a vector is stored in a

627 // contiguous C-style array.	802 // contiguous C-style array.

628 // So, we look up the address and size of this array outside of the	803 // So, we look up the address and size of this array outside of the

629 // signal handler, where we can safely do so.	804 // signal handler, where we can safely do so.

630 trapArray_ = &(*traps_)[0];	805 trapArray_ = &(*traps_)[0];

631 trapArraySize_ = id;	806 trapArraySize_ = id;

	807 return traps_->back();

632 }	808 }

633	809

634 ErrorCode err = ErrorCode(fnc, aux, id);	810 return ErrorCode(fnc, aux, safe, id);

635 return errMap_[err.err()] = err;	811 }

	812

	813 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {

	814 return MakeTrap(fnc, aux, true);
	jln (very slow on Chromium) 2012/11/20 01:08:31 Add a comment near true "/* Safe trap /" Add a comment near true "/ Safe trap */"
	815 }

	816

	817 ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) {

	818 return MakeTrap(fnc, aux, false);
	jln (very slow on Chromium) 2012/11/20 01:08:31 Same here. Same here.
	819 }

	820

	821 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {

	822 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

	823 return -err;

636 }	824 }

637	825

638 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {	826 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {

639 SANDBOX_DIE(static_cast<char *>(aux));	827 SANDBOX_DIE(static_cast<char *>(aux));

640 }	828 }

641	829

642 ErrorCode Sandbox::Kill(const char *msg) {	830 ErrorCode Sandbox::Kill(const char *msg) {

643 return Trap(bpfFailure, const_cast<char *>(msg));	831 return Trap(bpfFailure, const_cast<char *>(msg));

644 }	832 }

645	833

646 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	834 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

647 int Sandbox::proc_fd_ = -1;	835 int Sandbox::proc_fd_ = -1;

648 Sandbox::Evaluators Sandbox::evaluators_;	836 Sandbox::Evaluators Sandbox::evaluators_;

649 Sandbox::ErrMap Sandbox::errMap_;

650 Sandbox::Traps *Sandbox::traps_ = NULL;	837 Sandbox::Traps *Sandbox::traps_ = NULL;

651 Sandbox::TrapIds Sandbox::trapIds_;	838 Sandbox::TrapIds Sandbox::trapIds_;

652 ErrorCode *Sandbox::trapArray_ = NULL;	839 ErrorCode *Sandbox::trapArray_ = NULL;

653 size_t Sandbox::trapArraySize_ = 0;	840 size_t Sandbox::trapArraySize_ = 0;

	841 bool Sandbox::has_unsafe_traps_ = false;

654	842

655 } // namespace	843 } // namespace

OLD	NEW