sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods.

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebase (now, that the bitmask change has landed in the tree) Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Some headers on Android are missing cdefs: crbug.com/172337.	5 // Some headers on Android are missing cdefs: crbug.com/172337.

6 // (We can't use OS_ANDROID here since build_config.h is not included).	6 // (We can't use OS_ANDROID here since build_config.h is not included).

7 #if defined(ANDROID)	7 #if defined(ANDROID)

8 #include <sys/cdefs.h>	8 #include <sys/cdefs.h>

9 #endif	9 #endif

10	10

	11 #include <errno.h>

	12 #include <fcntl.h>

	13 #include <string.h>

11 #include <sys/prctl.h>	14 #include <sys/prctl.h>

	15 #include <sys/stat.h>

12 #include <sys/syscall.h>	16 #include <sys/syscall.h>

	17 #include <sys/types.h>

	18 #include <time.h>

	19 #include <unistd.h>

13	20

14 #ifndef SECCOMP_BPF_STANDALONE	21 #ifndef SECCOMP_BPF_STANDALONE

15 #include "base/logging.h"	22 #include "base/logging.h"

16 #include "base/posix/eintr_wrapper.h"	23 #include "base/posix/eintr_wrapper.h"

17 #endif	24 #endif

18	25

19 #include "sandbox/linux/seccomp-bpf/codegen.h"	26 #include "sandbox/linux/seccomp-bpf/codegen.h"

20 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	27 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

21 #include "sandbox/linux/seccomp-bpf/syscall.h"	28 #include "sandbox/linux/seccomp-bpf/syscall.h"

22 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"	29 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"

23 #include "sandbox/linux/seccomp-bpf/verifier.h"	30 #include "sandbox/linux/seccomp-bpf/verifier.h"

24	31

	32 // The kernel gives us a sandbox, we turn it into a playground :-)

	33 // This is version 2 of the playground; version 1 was built on top of

	34 // pre-BPF seccomp mode.

	35 namespace playground2 {
	jln (very slow on Chromium) 2013/02/15 20:58:25 Most of this can remain a real anonymous namespace Most of this can remain a real anonymous namespace, can't it ? Please try to save it as much as you can instead of embedding it when you don't need to.
	36

	37 const int kExpectedExitCode = 100;

	38

25 namespace {	39 namespace {

26	40

27 void WriteFailedStderrSetupMessage(int out_fd) {

28 const char* error_string = strerror(errno);

29 static const char msg[] = "You have reproduced a puzzling issue.\n"

30 "Please, report to crbug.com/152530!\n"

31 "Failed to set up stderr: ";

32 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&

33 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&

34 HANDLE_EINTR(write(out_fd, "\n", 1))) {

35 }

36 }

37

38 template<class T> int popcount(T x);	41 template<class T> int popcount(T x);

39 template<> int popcount<unsigned int>(unsigned int x) {	42 template<> int popcount<unsigned int>(unsigned int x) {

40 return __builtin_popcount(x);	43 return __builtin_popcount(x);

41 }	44 }

42 template<> int popcount<unsigned long>(unsigned long x) {	45 template<> int popcount<unsigned long>(unsigned long x) {

43 return __builtin_popcountl(x);	46 return __builtin_popcountl(x);

44 }	47 }

45 template<> int popcount<unsigned long long>(unsigned long long x) {	48 template<> int popcount<unsigned long long>(unsigned long long x) {

46 return __builtin_popcountll(x);	49 return __builtin_popcountll(x);

47 }	50 }

48	51

49 } // namespace	52 void WriteFailedStderrSetupMessage(int out_fd) {

50	53 const char* error_string = strerror(errno);

51 // The kernel gives us a sandbox, we turn it into a playground :-)	54 static const char msg[] = "You have reproduced a puzzling issue.\n"

52 // This is version 2 of the playground; version 1 was built on top of	55 "Please, report to crbug.com/152530!\n"

53 // pre-BPF seccomp mode.	56 "Failed to set up stderr: ";

54 namespace playground2 {	57 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&

55	58 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&

56 const int kExpectedExitCode = 100;	59 HANDLE_EINTR(write(out_fd, "\n", 1))) {

	60 }

	61 }

57	62

58 // We define a really simple sandbox policy. It is just good enough for us	63 // We define a really simple sandbox policy. It is just good enough for us

59 // to tell that the sandbox has actually been activated.	64 // to tell that the sandbox has actually been activated.

60 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {	65 ErrorCode ProbeEvaluator(Sandbox , int sysnum, void ) __attribute__((const));

	66 ErrorCode ProbeEvaluator(Sandbox , int sysnum, void ) {

61 switch (sysnum) {	67 switch (sysnum) {

62 case __NR_getpid:	68 case __NR_getpid:

63 // Return EPERM so that we can check that the filter actually ran.	69 // Return EPERM so that we can check that the filter actually ran.

64 return ErrorCode(EPERM);	70 return ErrorCode(EPERM);

65 case __NR_exit_group:	71 case __NR_exit_group:

66 // Allow exit() with a non-default return code.	72 // Allow exit() with a non-default return code.

67 return ErrorCode(ErrorCode::ERR_ALLOWED);	73 return ErrorCode(ErrorCode::ERR_ALLOWED);

68 default:	74 default:

69 // Make everything else fail in an easily recognizable way.	75 // Make everything else fail in an easily recognizable way.

70 return ErrorCode(EINVAL);	76 return ErrorCode(EINVAL);

71 }	77 }

72 }	78 }

73	79

74 void Sandbox::ProbeProcess(void) {	80 void ProbeProcess(void) {

75 if (syscall(__NR_getpid) < 0 && errno == EPERM) {	81 if (syscall(__NR_getpid) < 0 && errno == EPERM) {

76 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	82 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

77 }	83 }

78 }	84 }

79	85

80 bool Sandbox::IsValidSyscallNumber(int sysnum) {	86 ErrorCode AllowAllEvaluator(Sandbox , int sysnum, void ) {

81 return SyscallIterator::IsValid(sysnum);	87 if (!Sandbox::IsValidSyscallNumber(sysnum)) {

82 }

83

84 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {

85 if (!IsValidSyscallNumber(sysnum)) {

86 return ErrorCode(ENOSYS);	88 return ErrorCode(ENOSYS);

87 }	89 }

88 return ErrorCode(ErrorCode::ERR_ALLOWED);	90 return ErrorCode(ErrorCode::ERR_ALLOWED);

89 }	91 }

90	92

91 void Sandbox::TryVsyscallProcess(void) {	93 void TryVsyscallProcess(void) {

92 time_t current_time;	94 time_t current_time;

93 // time() is implemented as a vsyscall. With an older glibc, with	95 // time() is implemented as a vsyscall. With an older glibc, with

94 // vsyscall=emulate and some versions of the seccomp BPF patch	96 // vsyscall=emulate and some versions of the seccomp BPF patch

95 // we may get SIGKILL-ed. Detect this!	97 // we may get SIGKILL-ed. Detect this!

96 if (time(&current_time) != static_cast<time_t>(-1)) {	98 if (time(&current_time) != static_cast<time_t>(-1)) {

97 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	99 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

98 }	100 }

99 }	101 }

100	102

	103 bool IsSingleThreaded(int proc_fd) {

	104 if (proc_fd < 0) {

	105 // Cannot determine whether program is single-threaded. Hope for

	106 // the best...

	107 return true;

	108 }

	109

	110 struct stat sb;

	111 int task = -1;

	112 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

	113 fstat(task, &sb) != 0 \|\|

	114 sb.st_nlink != 3 \|\|

	115 HANDLE_EINTR(close(task))) {

	116 if (task >= 0) {

	117 if (HANDLE_EINTR(close(task))) { }

	118 }

	119 return false;

	120 }

	121 return true;

	122 }

	123

	124 bool IsDenied(const ErrorCode& code) {

	125 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

	126 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

	127 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

	128 }

	129

	130 // Function that can be passed as a callback function to CodeGen::Traverse().

	131 // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it

	132 // sets the "bool" variable pointed to by "aux".

	133 void CheckForUnsafeErrorCodes(Instruction insn, void aux) {

	134 bool is_unsafe = static_cast<bool >(aux);

	135 if (!*is_unsafe) {

	136 if (BPF_CLASS(insn->code) == BPF_RET &&

	137 insn->k > SECCOMP_RET_TRAP &&

	138 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {

	139 const ErrorCode& err =

	140 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);

	141 if (err.error_type() != ErrorCode::ET_INVALID && !err.safe()) {

	142 *is_unsafe = true;

	143 }

	144 }

	145 }

	146 }

	147

	148 // A Trap() handler that returns an "errno" value. The value is encoded

	149 // in the "aux" parameter.

	150 intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux) {

	151 // TrapFnc functions report error by following the native kernel convention

	152 // of returning an exit code in the range of -1..-4096. They do not try to

	153 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

	154 // ultimately do so for us.

	155 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

	156 return -err;

	157 }

	158

	159 // Function that can be passed as a callback function to CodeGen::Traverse().

	160 // Checks whether the "insn" returns an errno value from a BPF filter. If so,

	161 // it rewrites the instruction to instead call a Trap() handler that does

	162 // the same thing. "aux" is ignored.

	163 void RedirectToUserspace(Instruction insn, void aux) {

	164 // When inside an UnsafeTrap() callback, we want to allow all system calls.

	165 // This means, we must conditionally disable the sandbox -- and that's not

	166 // something that kernel-side BPF filters can do, as they cannot inspect

	167 // any state other than the syscall arguments.

	168 // But if we redirect all error handlers to user-space, then we can easily

	169 // make this decision.

	170 // The performance penalty for this extra round-trip to user-space is not

	171 // actually that bad, as we only ever pay it for denied system calls; and a

	172 // typical program has very few of these.

	173 Sandbox sandbox = static_cast<Sandbox >(aux);

	174 if (BPF_CLASS(insn->code) == BPF_RET &&

	175 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	176 insn->k = sandbox->Trap(ReturnErrno,

	177 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

	178 }

	179 }

	180

	181 // Stackable wrapper around an Evaluators handler. Changes ErrorCodes

	182 // returned by a system call evaluator to match the changes made by

	183 // RedirectToUserspace(). "aux" should be pointer to wrapped system call

	184 // evaluator.

	185 ErrorCode RedirectToUserspaceEvalWrapper(Sandbox *sandbox, int sysnum,

	186 void *aux) {

	187 // We need to replicate the behavior of RedirectToUserspace(), so that our

	188 // Verifier can still work correctly.

	189 Sandbox::Evaluators *evaluators =

	190 reinterpret_cast<Sandbox::Evaluators *>(aux);

	191 const std::pair<Sandbox::EvaluateSyscall, void *>& evaluator =

	192 *evaluators->begin();

	193

	194 ErrorCode err = evaluator.first(sandbox, sysnum, evaluator.second);

	195 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	196 return sandbox->Trap(ReturnErrno,

	197 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

	198 }

	199 return err;

	200 }

	201

	202 intptr_t BpfFailure(const struct arch_seccomp_data&, void *aux) {

	203 SANDBOX_DIE(static_cast<char *>(aux));

	204 }

	205

	206 } // namespace

	207

	208 Sandbox::Sandbox()

	209 : quiet_(false),

	210 proc_fd_(-1),

	211 evaluators_(new Evaluators),

	212 conds_(new Conds) {

	213 }

	214

	215 Sandbox::~Sandbox() {

	216 // It is generally unsafe to call any memory allocator operations or to even

	217 // call arbitrary destructors after having installed a new policy. We just

	218 // have no way to tell whether this policy would allow the system calls that

	219 // the constructors can trigger.

	220 // So, we normally destroy all of our complex state prior to starting the

	221 // sandbox. But this won't happen, if the Sandbox object was created and

	222 // never actually used to set up a sandbox. So, just in case, we are

	223 // destroying any remaining state.

	224 // The "if ()" statements are technically superfluous. But let's be explicit

	225 // that we really don't want to run any code, when we already destroyed

	226 // objects before setting up the sandbox.

	227 if (evaluators_) {

	228 delete evaluators_;

	229 }

	230 if (conds_) {

	231 delete conds_;

	232 }

	233 }

	234

	235 bool Sandbox::IsValidSyscallNumber(int sysnum) {

	236 return SyscallIterator::IsValid(sysnum);

	237 }

	238

	239

101 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),	240 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),

102 EvaluateSyscall syscall_evaluator,	241 Sandbox::EvaluateSyscall syscall_evaluator,

103 void *aux,	242 void *aux) {

104 int proc_fd) {

105 // Block all signals before forking a child process. This prevents an	243 // Block all signals before forking a child process. This prevents an

106 // attacker from manipulating our test by sending us an unexpected signal.	244 // attacker from manipulating our test by sending us an unexpected signal.

107 sigset_t old_mask, new_mask;	245 sigset_t old_mask, new_mask;

108 if (sigfillset(&new_mask) \|\|	246 if (sigfillset(&new_mask) \|\|

109 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {	247 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {

110 SANDBOX_DIE("sigprocmask() failed");	248 SANDBOX_DIE("sigprocmask() failed");

111 }	249 }

112 int fds[2];	250 int fds[2];

113 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {	251 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {

114 SANDBOX_DIE("pipe() failed");	252 SANDBOX_DIE("pipe() failed");

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
161 }	299 }

162 if (HANDLE_EINTR(close(fds[1]))) {	300 if (HANDLE_EINTR(close(fds[1]))) {

163 // This call to close() has been failing in strange ways. See	301 // This call to close() has been failing in strange ways. See

164 // crbug.com/152530. So we only fail in debug mode now.	302 // crbug.com/152530. So we only fail in debug mode now.

165 #if !defined(NDEBUG)	303 #if !defined(NDEBUG)

166 WriteFailedStderrSetupMessage(fds[1]);	304 WriteFailedStderrSetupMessage(fds[1]);

167 SANDBOX_DIE(NULL);	305 SANDBOX_DIE(NULL);

168 #endif	306 #endif

169 }	307 }

170	308

171 evaluators_.clear();

172 SetSandboxPolicy(syscall_evaluator, aux);	309 SetSandboxPolicy(syscall_evaluator, aux);

173 set_proc_fd(proc_fd);	310 StartSandbox();

174

175 // By passing "quiet=true" to "startSandboxInternal()" we suppress

176 // messages for expected and benign failures (e.g. if the current

177 // kernel lacks support for BPF filters).

178 StartSandboxInternal(true);

179	311

180 // Run our code in the sandbox.	312 // Run our code in the sandbox.

181 code_in_sandbox();	313 code_in_sandbox();

182	314

183 // code_in_sandbox() is not supposed to return here.	315 // code_in_sandbox() is not supposed to return here.

184 SANDBOX_DIE(NULL);	316 SANDBOX_DIE(NULL);

185 }	317 }

186	318

187 // In the parent process.	319 // In the parent process.

188 if (HANDLE_EINTR(close(fds[1]))) {	320 if (HANDLE_EINTR(close(fds[1]))) {

(...skipping 24 matching lines...) Expand all Loading...
213 SANDBOX_DIE(buf);	345 SANDBOX_DIE(buf);

214 }	346 }

215 }	347 }

216 if (HANDLE_EINTR(close(fds[0]))) {	348 if (HANDLE_EINTR(close(fds[0]))) {

217 SANDBOX_DIE("close() failed");	349 SANDBOX_DIE("close() failed");

218 }	350 }

219	351

220 return rc;	352 return rc;

221 }	353 }

222	354

223 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {	355 bool Sandbox::KernelSupportSeccompBPF() {

224 return	356 return

225 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&	357 RunFunctionInPolicy(ProbeProcess, ProbeEvaluator, 0) &&

226 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,	358 RunFunctionInPolicy(TryVsyscallProcess, AllowAllEvaluator, 0);

227 proc_fd);

228 }	359 }

229	360

230 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {	361 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {

231 // It the sandbox is currently active, we clearly must have support for	362 // It the sandbox is currently active, we clearly must have support for

232 // sandboxing.	363 // sandboxing.

233 if (status_ == STATUS_ENABLED) {	364 if (status_ == STATUS_ENABLED) {

234 return status_;	365 return status_;

235 }	366 }

236	367

237 // Even if the sandbox was previously available, something might have	368 // Even if the sandbox was previously available, something might have

(...skipping 14 matching lines...) Expand all Loading...
252 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is	383 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

253 // actually available.	384 // actually available.

254 status_ = STATUS_AVAILABLE;	385 status_ = STATUS_AVAILABLE;

255 return status_;	386 return status_;

256 }	387 }

257	388

258 // If we have not previously checked for availability of the sandbox or if	389 // If we have not previously checked for availability of the sandbox or if

259 // we otherwise don't believe to have a good cached value, we have to	390 // we otherwise don't believe to have a good cached value, we have to

260 // perform a thorough check now.	391 // perform a thorough check now.

261 if (status_ == STATUS_UNKNOWN) {	392 if (status_ == STATUS_UNKNOWN) {

262 status_ = KernelSupportSeccompBPF(proc_fd)	393 // We create our own private copy of a "Sandbox" object. This ensures that

	394 // the object does not have any policies configured, that might interfere

	395 // with the tests done by "KernelSupportSeccompBPF()".

	396 Sandbox sandbox;

	397

	398 // By setting "quiet_ = true" we suppress messages for expected and benign

	399 // failures (e.g. if the current kernel lacks support for BPF filters).

	400 sandbox.quiet_ = true;

	401 sandbox.set_proc_fd(proc_fd);

	402 status_ = sandbox.KernelSupportSeccompBPF()

263 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;	403 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

264	404

265 // As we are performing our tests from a child process, the run-time	405 // As we are performing our tests from a child process, the run-time

266 // environment that is visible to the sandbox is always guaranteed to be	406 // environment that is visible to the sandbox is always guaranteed to be

267 // single-threaded. Let's check here whether the caller is single-	407 // single-threaded. Let's check here whether the caller is single-

268 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.	408 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

269 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {	409 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {

270 status_ = STATUS_UNAVAILABLE;	410 status_ = STATUS_UNAVAILABLE;

271 }	411 }

272 }	412 }

273 return status_;	413 return status_;

274 }	414 }

275	415

276 void Sandbox::set_proc_fd(int proc_fd) {	416 void Sandbox::set_proc_fd(int proc_fd) {

277 proc_fd_ = proc_fd;	417 proc_fd_ = proc_fd;

278 }	418 }

279	419

280 void Sandbox::StartSandboxInternal(bool quiet) {	420 void Sandbox::StartSandbox() {

281 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {	421 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {

282 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "	422 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "

283 "unavailable");	423 "unavailable");

284 } else if (status_ == STATUS_ENABLED) {	424 } else if (!evaluators_ \|\| !conds_) {

285 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "	425 SANDBOX_DIE("Cannot repeatedly start sandbox. Create a separate Sandbox "

286 "setSandboxPolicy() to stack policies instead");	426 "object instead.");

287 }	427 }

288 if (proc_fd_ < 0) {	428 if (proc_fd_ < 0) {

289 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);	429 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

290 }	430 }

291 if (proc_fd_ < 0) {	431 if (proc_fd_ < 0) {

292 // For now, continue in degraded mode, if we can't access /proc.	432 // For now, continue in degraded mode, if we can't access /proc.

293 // In the future, we might want to tighten this requirement.	433 // In the future, we might want to tighten this requirement.

294 }	434 }

295 if (!IsSingleThreaded(proc_fd_)) {	435 if (!IsSingleThreaded(proc_fd_)) {

296 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");	436 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");

297 }	437 }

298	438

299 // We no longer need access to any files in /proc. We want to do this	439 // We no longer need access to any files in /proc. We want to do this

300 // before installing the filters, just in case that our policy denies	440 // before installing the filters, just in case that our policy denies

301 // close().	441 // close().

302 if (proc_fd_ >= 0) {	442 if (proc_fd_ >= 0) {

303 if (HANDLE_EINTR(close(proc_fd_))) {	443 if (HANDLE_EINTR(close(proc_fd_))) {

304 SANDBOX_DIE("Failed to close file descriptor for /proc");	444 SANDBOX_DIE("Failed to close file descriptor for /proc");

305 }	445 }

306 proc_fd_ = -1;	446 proc_fd_ = -1;

307 }	447 }

308	448

309 // Install the filters.	449 // Install the filters.

310 InstallFilter(quiet);	450 InstallFilter();

311	451

312 // We are now inside the sandbox.	452 // We are now inside the sandbox.

313 status_ = STATUS_ENABLED;	453 status_ = STATUS_ENABLED;

314 }	454 }

315	455

316 bool Sandbox::IsSingleThreaded(int proc_fd) {

317 if (proc_fd < 0) {

318 // Cannot determine whether program is single-threaded. Hope for

319 // the best...

320 return true;

321 }

322

323 struct stat sb;

324 int task = -1;

325 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

326 fstat(task, &sb) != 0 \|\|

327 sb.st_nlink != 3 \|\|

328 HANDLE_EINTR(close(task))) {

329 if (task >= 0) {

330 if (HANDLE_EINTR(close(task))) { }

331 }

332 return false;

333 }

334 return true;

335 }

336

337 bool Sandbox::IsDenied(const ErrorCode& code) {

338 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

339 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

340 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

341 }

342

343 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,	456 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,

344 void *aux) {	457 void *aux) {

345 for (SyscallIterator iter(true); !iter.Done(); ) {	458 for (SyscallIterator iter(true); !iter.Done(); ) {

346 uint32_t sysnum = iter.Next();	459 uint32_t sysnum = iter.Next();

347 if (!IsDenied(syscall_evaluator(sysnum, aux))) {	460 if (!IsDenied(syscall_evaluator(this, sysnum, aux))) {

348 SANDBOX_DIE("Policies should deny system calls that are outside the "	461 SANDBOX_DIE("Policies should deny system calls that are outside the "

349 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");	462 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");

350 }	463 }

351 }	464 }

352 return;	465 return;

353 }	466 }

354	467

355 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {

356 bool is_unsafe = static_cast<bool >(aux);

357 if (!*is_unsafe) {

358 if (BPF_CLASS(insn->code) == BPF_RET &&

359 insn->k > SECCOMP_RET_TRAP &&

360 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {

361 const ErrorCode& err =

362 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);

363 if (err.error_type_ != ErrorCode::ET_INVALID && !err.safe_) {

364 *is_unsafe = true;

365 }

366 }

367 }

368 }

369

370 void Sandbox::RedirectToUserspace(Instruction insn, void ) {

371 // When inside an UnsafeTrap() callback, we want to allow all system calls.

372 // This means, we must conditionally disable the sandbox -- and that's not

373 // something that kernel-side BPF filters can do, as they cannot inspect

374 // any state other than the syscall arguments.

375 // But if we redirect all error handlers to user-space, then we can easily

376 // make this decision.

377 // The performance penalty for this extra round-trip to user-space is not

378 // actually that bad, as we only ever pay it for denied system calls; and a

379 // typical program has very few of these.

380 if (BPF_CLASS(insn->code) == BPF_RET &&

381 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

382 insn->k = Trap(ReturnErrno,

383 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

384 }

385 }

386

387 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {

388 // We need to replicate the behavior of RedirectToUserspace(), so that our

389 // Verifier can still work correctly.

390 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);

391 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();

392 ErrorCode err = evaluator.first(sysnum, evaluator.second);

393 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

394 return Trap(ReturnErrno,

395 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

396 }

397 return err;

398 }

399

400 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {	468 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {

401 if (status_ == STATUS_ENABLED) {	469 if (!evaluators_ \|\| !conds_) {

402 SANDBOX_DIE("Cannot change policy after sandbox has started");	470 SANDBOX_DIE("Cannot change policy after sandbox has started");

403 }	471 }

404 PolicySanityChecks(syscall_evaluator, aux);	472 PolicySanityChecks(syscall_evaluator, aux);

405 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));	473 evaluators_->push_back(std::make_pair(syscall_evaluator, aux));

406 }	474 }

407	475

408 void Sandbox::InstallFilter(bool quiet) {	476 void Sandbox::InstallFilter() {

409 // We want to be very careful in not imposing any requirements on the	477 // We want to be very careful in not imposing any requirements on the

410 // policies that are set with SetSandboxPolicy(). This means, as soon as	478 // policies that are set with SetSandboxPolicy(). This means, as soon as

411 // the sandbox is active, we shouldn't be relying on libraries that could	479 // the sandbox is active, we shouldn't be relying on libraries that could

412 // be making system calls. This, for example, means we should avoid	480 // be making system calls. This, for example, means we should avoid

413 // using the heap and we should avoid using STL functions.	481 // using the heap and we should avoid using STL functions.

414 // Temporarily copy the contents of the "program" vector into a	482 // Temporarily copy the contents of the "program" vector into a

415 // stack-allocated array; and then explicitly destroy that object.	483 // stack-allocated array; and then explicitly destroy that object.

416 // This makes sure we don't ex- or implicitly call new/delete after we	484 // This makes sure we don't ex- or implicitly call new/delete after we

417 // installed the BPF filter program in the kernel. Depending on the	485 // installed the BPF filter program in the kernel. Depending on the

418 // system memory allocator that is in effect, these operators can result	486 // system memory allocator that is in effect, these operators can result

419 // in system calls to things like munmap() or brk().	487 // in system calls to things like munmap() or brk().

420 Program program = AssembleFilter(false / force_verification */);	488 Program program = AssembleFilter(false / force_verification */);

421	489

422 struct sock_filter bpf[program->size()];	490 struct sock_filter bpf[program->size()];

423 const struct sock_fprog prog = {	491 const struct sock_fprog prog = {

424 static_cast<unsigned short>(program->size()), bpf };	492 static_cast<unsigned short>(program->size()), bpf };

425 memcpy(bpf, &(*program)[0], sizeof(bpf));	493 memcpy(bpf, &(*program)[0], sizeof(bpf));

426 delete program;	494 delete program;

427	495

428 // Release memory that is no longer needed	496 // Release memory that is no longer needed

429 evaluators_.clear();	497 delete evaluators_;

430 conds_.clear();	498 delete conds_;

	499 evaluators_ = NULL;

	500 conds_ = NULL;

431	501

432 // Install BPF filter program	502 // Install BPF filter program

433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {	503 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

434 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");	504 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");

435 } else {	505 } else {

436 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {	506 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

437 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");	507 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");

438 }	508 }

439 }	509 }

440	510

441 return;	511 return;

442 }	512 }

443	513

444 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {	514 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {

445 #if !defined(NDEBUG)	515 #if !defined(NDEBUG)

446 force_verification = true;	516 force_verification = true;

447 #endif	517 #endif

448	518

449 // Verify that the user pushed a policy.	519 // Verify that the user pushed a policy.

450 if (evaluators_.empty()) {	520 if (evaluators_->empty()) {

451 SANDBOX_DIE("Failed to configure system call filters");	521 SANDBOX_DIE("Failed to configure system call filters");

452 }	522 }

453	523

454 // We can't handle stacked evaluators, yet. We'll get there eventually	524 // We can't handle stacked evaluators, yet. We'll get there eventually

455 // though. Hang tight.	525 // though. Hang tight.

456 if (evaluators_.size() != 1) {	526 if (evaluators_->size() != 1) {

457 SANDBOX_DIE("Not implemented");	527 SANDBOX_DIE("Not implemented");

458 }	528 }

459	529

460 // Assemble the BPF filter program.	530 // Assemble the BPF filter program.

461 CodeGen *gen = new CodeGen();	531 CodeGen *gen = new CodeGen();

462 if (!gen) {	532 if (!gen) {

463 SANDBOX_DIE("Out of memory");	533 SANDBOX_DIE("Out of memory");

464 }	534 }

465	535

466 // If the architecture doesn't match SECCOMP_ARCH, disallow the	536 // If the architecture doesn't match SECCOMP_ARCH, disallow the

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
502 // As support for unsafe jumps essentially defeats all the security	572 // As support for unsafe jumps essentially defeats all the security

503 // measures that the sandbox provides, we print a big warning message --	573 // measures that the sandbox provides, we print a big warning message --

504 // and of course, we make sure to only ever enable this feature if it	574 // and of course, we make sure to only ever enable this feature if it

505 // is actually requested by the sandbox policy.	575 // is actually requested by the sandbox policy.

506 if (has_unsafe_traps) {	576 if (has_unsafe_traps) {

507 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {	577 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {

508 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "	578 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "

509 "architecture");	579 "architecture");

510 }	580 }

511	581

512 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	582 EvaluateSyscall evaluateSyscall = evaluators_->begin()->first;

513 void *aux = evaluators_.begin()->second;	583 void *aux = evaluators_->begin()->second;

514 if (!evaluateSyscall(__NR_rt_sigprocmask, aux).	584 if (!evaluateSyscall(this, __NR_rt_sigprocmask, aux).

515 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) \|\|	585 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) \|\|

516 !evaluateSyscall(__NR_rt_sigreturn, aux).	586 !evaluateSyscall(this, __NR_rt_sigreturn, aux).

517 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	587 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

518 #if defined(__NR_sigprocmask)	588 #if defined(__NR_sigprocmask)

519 \|\| !evaluateSyscall(__NR_sigprocmask, aux).	589 \|\| !evaluateSyscall(this, __NR_sigprocmask, aux).

520 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	590 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

521 #endif	591 #endif

522 #if defined(__NR_sigreturn)	592 #if defined(__NR_sigreturn)

523 \|\| !evaluateSyscall(__NR_sigreturn, aux).	593 \|\| !evaluateSyscall(this, __NR_sigreturn, aux).

524 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	594 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

525 #endif	595 #endif

526 ) {	596 ) {

527 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "	597 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "

528 "unconditionally allow sigreturn() and sigprocmask()");	598 "unconditionally allow sigreturn() and sigprocmask()");

529 }	599 }

530	600

531 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {	601 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {

532 // We should never be able to get here, as UnsafeTrap() should never	602 // We should never be able to get here, as UnsafeTrap() should never

533 // actually return a valid ErrorCode object unless the user set the	603 // actually return a valid ErrorCode object unless the user set the

534 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,	604 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,

535 // "has_unsafe_traps" would always be false. But better double-check	605 // "has_unsafe_traps" would always be false. But better double-check

536 // than enabling dangerous code.	606 // than enabling dangerous code.

537 SANDBOX_DIE("We'd rather die than enable unsafe traps");	607 SANDBOX_DIE("We'd rather die than enable unsafe traps");

538 }	608 }

539 gen->Traverse(jumptable, RedirectToUserspace, NULL);	609 gen->Traverse(jumptable, RedirectToUserspace, this);

540	610

541 // Allow system calls, if they originate from our magic return address	611 // Allow system calls, if they originate from our magic return address

542 // (which we can query by calling SandboxSyscall(-1)).	612 // (which we can query by calling SandboxSyscall(-1)).

543 uintptr_t syscall_entry_point =	613 uintptr_t syscall_entry_point =

544 static_cast<uintptr_t>(SandboxSyscall(-1));	614 static_cast<uintptr_t>(SandboxSyscall(-1));

545 uint32_t low = static_cast<uint32_t>(syscall_entry_point);	615 uint32_t low = static_cast<uint32_t>(syscall_entry_point);

546 #if __SIZEOF_POINTER__ > 4	616 #if __SIZEOF_POINTER__ > 4

547 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);	617 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);

548 #endif	618 #endif

549	619

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
608 return program;	678 return program;

609 }	679 }

610	680

611 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {	681 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {

612 // If we previously rewrote the BPF program so that it calls user-space	682 // If we previously rewrote the BPF program so that it calls user-space

613 // whenever we return an "errno" value from the filter, then we have to	683 // whenever we return an "errno" value from the filter, then we have to

614 // wrap our system call evaluator to perform the same operation. Otherwise,	684 // wrap our system call evaluator to perform the same operation. Otherwise,

615 // the verifier would also report a mismatch in return codes.	685 // the verifier would also report a mismatch in return codes.

616 Evaluators redirected_evaluators;	686 Evaluators redirected_evaluators;

617 redirected_evaluators.push_back(	687 redirected_evaluators.push_back(

618 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));	688 std::make_pair(RedirectToUserspaceEvalWrapper, evaluators_));

619	689

620 const char *err = NULL;	690 const char *err = NULL;

621 if (!Verifier::VerifyBPF(	691 if (!Verifier::VerifyBPF(

	692 this,

622 program,	693 program,

623 has_unsafe_traps ? redirected_evaluators : evaluators_,	694 has_unsafe_traps ? redirected_evaluators : *evaluators_,

624 &err)) {	695 &err)) {

625 CodeGen::PrintProgram(program);	696 CodeGen::PrintProgram(program);

626 SANDBOX_DIE(err);	697 SANDBOX_DIE(err);

627 }	698 }

628 }	699 }

629	700

630 void Sandbox::FindRanges(Ranges *ranges) {	701 void Sandbox::FindRanges(Ranges *ranges) {

631 // Please note that "struct seccomp_data" defines system calls as a signed	702 // Please note that "struct seccomp_data" defines system calls as a signed

632 // int32_t, but BPF instructions always operate on unsigned quantities. We	703 // int32_t, but BPF instructions always operate on unsigned quantities. We

633 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,	704 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

634 // and then verifying that the rest of the number range (both positive and	705 // and then verifying that the rest of the number range (both positive and

635 // negative) all return the same ErrorCode.	706 // negative) all return the same ErrorCode.

636 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;	707 EvaluateSyscall evaluate_syscall = evaluators_->begin()->first;

637 void *aux = evaluators_.begin()->second;	708 void *aux = evaluators_->begin()->second;

638 uint32_t old_sysnum = 0;	709 uint32_t old_sysnum = 0;

639 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);	710 ErrorCode old_err = evaluate_syscall(this, old_sysnum, aux);

640 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);	711 ErrorCode invalid_err = evaluate_syscall(this, MIN_SYSCALL - 1,

	712 aux);

641 for (SyscallIterator iter(false); !iter.Done(); ) {	713 for (SyscallIterator iter(false); !iter.Done(); ) {

642 uint32_t sysnum = iter.Next();	714 uint32_t sysnum = iter.Next();

643 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);	715 ErrorCode err = evaluate_syscall(this, static_cast<int>(sysnum), aux);

644 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {	716 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {

645 // A proper sandbox policy should always treat system calls outside of	717 // A proper sandbox policy should always treat system calls outside of

646 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns	718 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns

647 // "false" for SyscallIterator::IsValid()) identically. Typically, all	719 // "false" for SyscallIterator::IsValid()) identically. Typically, all

648 // of these system calls would be denied with the same ErrorCode.	720 // of these system calls would be denied with the same ErrorCode.

649 SANDBOX_DIE("Invalid seccomp policy");	721 SANDBOX_DIE("Invalid seccomp policy");

650 }	722 }

651 if (!err.Equals(old_err) \|\| iter.Done()) {	723 if (!err.Equals(old_err) \|\| iter.Done()) {

652 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));	724 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));

653 old_sysnum = sysnum;	725 old_sysnum = sysnum;

(...skipping 229 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
883 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {	955 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {

884 return SandboxSyscall(args.nr,	956 return SandboxSyscall(args.nr,

885 static_cast<intptr_t>(args.args[0]),	957 static_cast<intptr_t>(args.args[0]),

886 static_cast<intptr_t>(args.args[1]),	958 static_cast<intptr_t>(args.args[1]),

887 static_cast<intptr_t>(args.args[2]),	959 static_cast<intptr_t>(args.args[2]),

888 static_cast<intptr_t>(args.args[3]),	960 static_cast<intptr_t>(args.args[3]),

889 static_cast<intptr_t>(args.args[4]),	961 static_cast<intptr_t>(args.args[4]),

890 static_cast<intptr_t>(args.args[5]));	962 static_cast<intptr_t>(args.args[5]));

891 }	963 }

892	964

893 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {

894 // TrapFnc functions report error by following the native kernel convention

895 // of returning an exit code in the range of -1..-4096. They do not try to

896 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

897 // ultimately do so for us.

898 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

899 return -err;

900 }

901

902 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,	965 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,

903 ErrorCode::Operation op, uint64_t value,	966 ErrorCode::Operation op, uint64_t value,

904 const ErrorCode& passed, const ErrorCode& failed) {	967 const ErrorCode& passed, const ErrorCode& failed) {

905 return ErrorCode(argno, width, op, value,	968 return ErrorCode(argno, width, op, value,

906 &*conds_.insert(passed).first,	969 &*conds_->insert(passed).first,

907 &*conds_.insert(failed).first);	970 &*conds_->insert(failed).first);

908 }

909

910 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {

911 SANDBOX_DIE(static_cast<char *>(aux));

912 }	971 }

913	972

914 ErrorCode Sandbox::Kill(const char *msg) {	973 ErrorCode Sandbox::Kill(const char *msg) {

915 return Trap(BpfFailure, const_cast<char *>(msg));	974 return Trap(BpfFailure, const_cast<char *>(msg));

916 }	975 }

917	976

918 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	977 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

919 int Sandbox::proc_fd_ = -1;

920 Sandbox::Evaluators Sandbox::evaluators_;

921 Sandbox::Conds Sandbox::conds_;

922	978

923 } // namespace	979 } // namespace
	jln (very slow on Chromium) 2013/02/15 20:58:25 // namespace playground2 // namespace playground2
OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('j') | sandbox/linux/seccomp-bpf/verifier.cc » ('J')