sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods.

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebase Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Some headers on Android are missing cdefs: crbug.com/172337.	5 // Some headers on Android are missing cdefs: crbug.com/172337.

6 // (We can't use OS_ANDROID here since build_config.h is not included).	6 // (We can't use OS_ANDROID here since build_config.h is not included).

7 #if defined(ANDROID)	7 #if defined(ANDROID)

8 #include <sys/cdefs.h>	8 #include <sys/cdefs.h>

9 #endif	9 #endif

10	10

	11 #include <errno.h>

	12 #include <fcntl.h>

	13 #include <string.h>

11 #include <sys/prctl.h>	14 #include <sys/prctl.h>

	15 #include <sys/stat.h>

12 #include <sys/syscall.h>	16 #include <sys/syscall.h>

	17 #include <sys/types.h>

	18 #include <time.h>

	19 #include <unistd.h>

13	20

14 #ifndef SECCOMP_BPF_STANDALONE	21 #ifndef SECCOMP_BPF_STANDALONE

15 #include "base/logging.h"	22 #include "base/logging.h"

16 #include "base/posix/eintr_wrapper.h"	23 #include "base/posix/eintr_wrapper.h"

17 #endif	24 #endif

18	25

19 #include "sandbox/linux/seccomp-bpf/codegen.h"	26 #include "sandbox/linux/seccomp-bpf/codegen.h"

20 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	27 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

21 #include "sandbox/linux/seccomp-bpf/syscall.h"	28 #include "sandbox/linux/seccomp-bpf/syscall.h"

22 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"	29 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"

23 #include "sandbox/linux/seccomp-bpf/verifier.h"	30 #include "sandbox/linux/seccomp-bpf/verifier.h"

24	31

25 namespace {	32 namespace {

26	33

27 void WriteFailedStderrSetupMessage(int out_fd) {	34 using playground2::ErrorCode;

28 const char* error_string = strerror(errno);	35 using playground2::Instruction;

29 static const char msg[] = "You have reproduced a puzzling issue.\n"	36 using playground2::Sandbox;

30 "Please, report to crbug.com/152530!\n"	37 using playground2::Trap;

31 "Failed to set up stderr: ";	38 using playground2::arch_seccomp_data;

32 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&	39

33 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&	40 const int kExpectedExitCode = 100;

34 HANDLE_EINTR(write(out_fd, "\n", 1))) {

35 }

36 }

37	41

38 template<class T> int popcount(T x);	42 template<class T> int popcount(T x);

39 template<> int popcount<unsigned int>(unsigned int x) {	43 template<> int popcount<unsigned int>(unsigned int x) {

40 return __builtin_popcount(x);	44 return __builtin_popcount(x);

41 }	45 }

42 template<> int popcount<unsigned long>(unsigned long x) {	46 template<> int popcount<unsigned long>(unsigned long x) {

43 return __builtin_popcountl(x);	47 return __builtin_popcountl(x);

44 }	48 }

45 template<> int popcount<unsigned long long>(unsigned long long x) {	49 template<> int popcount<unsigned long long>(unsigned long long x) {

46 return __builtin_popcountll(x);	50 return __builtin_popcountll(x);

47 }	51 }

48	52

49 } // namespace	53 void WriteFailedStderrSetupMessage(int out_fd) {

50	54 const char* error_string = strerror(errno);

51 // The kernel gives us a sandbox, we turn it into a playground :-)	55 static const char msg[] = "You have reproduced a puzzling issue.\n"

52 // This is version 2 of the playground; version 1 was built on top of	56 "Please, report to crbug.com/152530!\n"

53 // pre-BPF seccomp mode.	57 "Failed to set up stderr: ";

54 namespace playground2 {	58 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&

55	59 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&

56 const int kExpectedExitCode = 100;	60 HANDLE_EINTR(write(out_fd, "\n", 1))) {

	61 }

	62 }

57	63

58 // We define a really simple sandbox policy. It is just good enough for us	64 // We define a really simple sandbox policy. It is just good enough for us

59 // to tell that the sandbox has actually been activated.	65 // to tell that the sandbox has actually been activated.

60 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {	66 ErrorCode ProbeEvaluator(Sandbox , int sysnum, void ) __attribute__((const));

	67 ErrorCode ProbeEvaluator(Sandbox , int sysnum, void ) {

61 switch (sysnum) {	68 switch (sysnum) {

62 case __NR_getpid:	69 case __NR_getpid:

63 // Return EPERM so that we can check that the filter actually ran.	70 // Return EPERM so that we can check that the filter actually ran.

64 return ErrorCode(EPERM);	71 return ErrorCode(EPERM);

65 case __NR_exit_group:	72 case __NR_exit_group:

66 // Allow exit() with a non-default return code.	73 // Allow exit() with a non-default return code.

67 return ErrorCode(ErrorCode::ERR_ALLOWED);	74 return ErrorCode(ErrorCode::ERR_ALLOWED);

68 default:	75 default:

69 // Make everything else fail in an easily recognizable way.	76 // Make everything else fail in an easily recognizable way.

70 return ErrorCode(EINVAL);	77 return ErrorCode(EINVAL);

71 }	78 }

72 }	79 }

73	80

74 void Sandbox::ProbeProcess(void) {	81 void ProbeProcess(void) {

75 if (syscall(__NR_getpid) < 0 && errno == EPERM) {	82 if (syscall(__NR_getpid) < 0 && errno == EPERM) {

76 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	83 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

77 }	84 }

78 }	85 }

79	86

80 bool Sandbox::IsValidSyscallNumber(int sysnum) {	87 ErrorCode AllowAllEvaluator(Sandbox , int sysnum, void ) {

81 return SyscallIterator::IsValid(sysnum);	88 if (!Sandbox::IsValidSyscallNumber(sysnum)) {

82 }

83

84 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {

85 if (!IsValidSyscallNumber(sysnum)) {

86 return ErrorCode(ENOSYS);	89 return ErrorCode(ENOSYS);

87 }	90 }

88 return ErrorCode(ErrorCode::ERR_ALLOWED);	91 return ErrorCode(ErrorCode::ERR_ALLOWED);

89 }	92 }

90	93

91 void Sandbox::TryVsyscallProcess(void) {	94 void TryVsyscallProcess(void) {

92 time_t current_time;	95 time_t current_time;

93 // time() is implemented as a vsyscall. With an older glibc, with	96 // time() is implemented as a vsyscall. With an older glibc, with

94 // vsyscall=emulate and some versions of the seccomp BPF patch	97 // vsyscall=emulate and some versions of the seccomp BPF patch

95 // we may get SIGKILL-ed. Detect this!	98 // we may get SIGKILL-ed. Detect this!

96 if (time(&current_time) != static_cast<time_t>(-1)) {	99 if (time(&current_time) != static_cast<time_t>(-1)) {

97 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	100 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

98 }	101 }

99 }	102 }

100	103

	104 bool IsSingleThreaded(int proc_fd) {

	105 if (proc_fd < 0) {

	106 // Cannot determine whether program is single-threaded. Hope for

	107 // the best...

	108 return true;

	109 }

	110

	111 struct stat sb;

	112 int task = -1;

	113 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

	114 fstat(task, &sb) != 0 \|\|

	115 sb.st_nlink != 3 \|\|

	116 HANDLE_EINTR(close(task))) {

	117 if (task >= 0) {

	118 if (HANDLE_EINTR(close(task))) { }

	119 }

	120 return false;

	121 }

	122 return true;

	123 }

	124

	125 bool IsDenied(const ErrorCode& code) {

	126 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

	127 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

	128 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

	129 }

	130

	131 // Function that can be passed as a callback function to CodeGen::Traverse().

	132 // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it

	133 // sets the "bool" variable pointed to by "aux".

	134 void CheckForUnsafeErrorCodes(Instruction insn, void aux) {

	135 bool is_unsafe = static_cast<bool >(aux);

	136 if (!*is_unsafe) {

	137 if (BPF_CLASS(insn->code) == BPF_RET &&

	138 insn->k > SECCOMP_RET_TRAP &&

	139 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {

	140 const ErrorCode& err =

	141 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);

	142 if (err.error_type() != ErrorCode::ET_INVALID && !err.safe()) {

	143 *is_unsafe = true;

	144 }

	145 }

	146 }

	147 }

	148

	149 // A Trap() handler that returns an "errno" value. The value is encoded

	150 // in the "aux" parameter.

	151 intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux) {

	152 // TrapFnc functions report error by following the native kernel convention

	153 // of returning an exit code in the range of -1..-4096. They do not try to

	154 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

	155 // ultimately do so for us.

	156 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

	157 return -err;

	158 }

	159

	160 // Function that can be passed as a callback function to CodeGen::Traverse().

	161 // Checks whether the "insn" returns an errno value from a BPF filter. If so,

	162 // it rewrites the instruction to instead call a Trap() handler that does

	163 // the same thing. "aux" is ignored.

	164 void RedirectToUserspace(Instruction insn, void aux) {

	165 // When inside an UnsafeTrap() callback, we want to allow all system calls.

	166 // This means, we must conditionally disable the sandbox -- and that's not

	167 // something that kernel-side BPF filters can do, as they cannot inspect

	168 // any state other than the syscall arguments.

	169 // But if we redirect all error handlers to user-space, then we can easily

	170 // make this decision.

	171 // The performance penalty for this extra round-trip to user-space is not

	172 // actually that bad, as we only ever pay it for denied system calls; and a

	173 // typical program has very few of these.

	174 Sandbox sandbox = static_cast<Sandbox >(aux);

	175 if (BPF_CLASS(insn->code) == BPF_RET &&

	176 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	177 insn->k = sandbox->Trap(ReturnErrno,

	178 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

	179 }

	180 }

	181

	182 // Stackable wrapper around an Evaluators handler. Changes ErrorCodes

	183 // returned by a system call evaluator to match the changes made by

	184 // RedirectToUserspace(). "aux" should be pointer to wrapped system call

	185 // evaluator.

	186 ErrorCode RedirectToUserspaceEvalWrapper(Sandbox *sandbox, int sysnum,

	187 void *aux) {

	188 // We need to replicate the behavior of RedirectToUserspace(), so that our

	189 // Verifier can still work correctly.

	190 Sandbox::Evaluators *evaluators =

	191 reinterpret_cast<Sandbox::Evaluators *>(aux);

	192 const std::pair<Sandbox::EvaluateSyscall, void *>& evaluator =

	193 *evaluators->begin();

	194

	195 ErrorCode err = evaluator.first(sandbox, sysnum, evaluator.second);

	196 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

	197 return sandbox->Trap(ReturnErrno,

	198 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

	199 }

	200 return err;

	201 }

	202

	203 intptr_t BpfFailure(const struct arch_seccomp_data&, void *aux) {

	204 SANDBOX_DIE(static_cast<char *>(aux));

	205 }

	206

	207 } // namespace

	208

	209 // The kernel gives us a sandbox, we turn it into a playground :-)

	210 // This is version 2 of the playground; version 1 was built on top of

	211 // pre-BPF seccomp mode.

	212 namespace playground2 {

	213

	214 Sandbox::Sandbox()

	215 : quiet_(false),

	216 proc_fd_(-1),

	217 evaluators_(new Evaluators),

	218 conds_(new Conds) {

	219 }

	220

	221 Sandbox::~Sandbox() {

	222 // It is generally unsafe to call any memory allocator operations or to even

	223 // call arbitrary destructors after having installed a new policy. We just

	224 // have no way to tell whether this policy would allow the system calls that

	225 // the constructors can trigger.

	226 // So, we normally destroy all of our complex state prior to starting the

	227 // sandbox. But this won't happen, if the Sandbox object was created and

	228 // never actually used to set up a sandbox. So, just in case, we are

	229 // destroying any remaining state.

	230 // The "if ()" statements are technically superfluous. But let's be explicit

	231 // that we really don't want to run any code, when we already destroyed

	232 // objects before setting up the sandbox.

	233 if (evaluators_) {

	234 delete evaluators_;

	235 }

	236 if (conds_) {

	237 delete conds_;

	238 }

	239 }

	240

	241 bool Sandbox::IsValidSyscallNumber(int sysnum) {

	242 return SyscallIterator::IsValid(sysnum);

	243 }

	244

	245

101 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),	246 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),

102 EvaluateSyscall syscall_evaluator,	247 Sandbox::EvaluateSyscall syscall_evaluator,

103 void *aux,	248 void *aux) {

104 int proc_fd) {

105 // Block all signals before forking a child process. This prevents an	249 // Block all signals before forking a child process. This prevents an

106 // attacker from manipulating our test by sending us an unexpected signal.	250 // attacker from manipulating our test by sending us an unexpected signal.

107 sigset_t old_mask, new_mask;	251 sigset_t old_mask, new_mask;

108 if (sigfillset(&new_mask) \|\|	252 if (sigfillset(&new_mask) \|\|

109 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {	253 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {

110 SANDBOX_DIE("sigprocmask() failed");	254 SANDBOX_DIE("sigprocmask() failed");

111 }	255 }

112 int fds[2];	256 int fds[2];

113 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {	257 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {

114 SANDBOX_DIE("pipe() failed");	258 SANDBOX_DIE("pipe() failed");

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
161 }	305 }

162 if (HANDLE_EINTR(close(fds[1]))) {	306 if (HANDLE_EINTR(close(fds[1]))) {

163 // This call to close() has been failing in strange ways. See	307 // This call to close() has been failing in strange ways. See

164 // crbug.com/152530. So we only fail in debug mode now.	308 // crbug.com/152530. So we only fail in debug mode now.

165 #if !defined(NDEBUG)	309 #if !defined(NDEBUG)

166 WriteFailedStderrSetupMessage(fds[1]);	310 WriteFailedStderrSetupMessage(fds[1]);

167 SANDBOX_DIE(NULL);	311 SANDBOX_DIE(NULL);

168 #endif	312 #endif

169 }	313 }

170	314

171 evaluators_.clear();

172 SetSandboxPolicy(syscall_evaluator, aux);	315 SetSandboxPolicy(syscall_evaluator, aux);

173 set_proc_fd(proc_fd);	316 StartSandbox();

174

175 // By passing "quiet=true" to "startSandboxInternal()" we suppress

176 // messages for expected and benign failures (e.g. if the current

177 // kernel lacks support for BPF filters).

178 StartSandboxInternal(true);

179	317

180 // Run our code in the sandbox.	318 // Run our code in the sandbox.

181 code_in_sandbox();	319 code_in_sandbox();

182	320

183 // code_in_sandbox() is not supposed to return here.	321 // code_in_sandbox() is not supposed to return here.

184 SANDBOX_DIE(NULL);	322 SANDBOX_DIE(NULL);

185 }	323 }

186	324

187 // In the parent process.	325 // In the parent process.

188 if (HANDLE_EINTR(close(fds[1]))) {	326 if (HANDLE_EINTR(close(fds[1]))) {

(...skipping 24 matching lines...) Expand all Loading...
213 SANDBOX_DIE(buf);	351 SANDBOX_DIE(buf);

214 }	352 }

215 }	353 }

216 if (HANDLE_EINTR(close(fds[0]))) {	354 if (HANDLE_EINTR(close(fds[0]))) {

217 SANDBOX_DIE("close() failed");	355 SANDBOX_DIE("close() failed");

218 }	356 }

219	357

220 return rc;	358 return rc;

221 }	359 }

222	360

223 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {	361 bool Sandbox::KernelSupportSeccompBPF() {

224 return	362 return

225 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&	363 RunFunctionInPolicy(ProbeProcess, ProbeEvaluator, 0) &&

226 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,	364 RunFunctionInPolicy(TryVsyscallProcess, AllowAllEvaluator, 0);

227 proc_fd);

228 }	365 }

229	366

230 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {	367 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {

231 // It the sandbox is currently active, we clearly must have support for	368 // It the sandbox is currently active, we clearly must have support for

232 // sandboxing.	369 // sandboxing.

233 if (status_ == STATUS_ENABLED) {	370 if (status_ == STATUS_ENABLED) {

234 return status_;	371 return status_;

235 }	372 }

236	373

237 // Even if the sandbox was previously available, something might have	374 // Even if the sandbox was previously available, something might have

(...skipping 14 matching lines...) Expand all Loading...
252 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is	389 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

253 // actually available.	390 // actually available.

254 status_ = STATUS_AVAILABLE;	391 status_ = STATUS_AVAILABLE;

255 return status_;	392 return status_;

256 }	393 }

257	394

258 // If we have not previously checked for availability of the sandbox or if	395 // If we have not previously checked for availability of the sandbox or if

259 // we otherwise don't believe to have a good cached value, we have to	396 // we otherwise don't believe to have a good cached value, we have to

260 // perform a thorough check now.	397 // perform a thorough check now.

261 if (status_ == STATUS_UNKNOWN) {	398 if (status_ == STATUS_UNKNOWN) {

262 status_ = KernelSupportSeccompBPF(proc_fd)	399 // We create our own private copy of a "Sandbox" object. This ensures that

	400 // the object does not have any policies configured, that might interfere

	401 // with the tests done by "KernelSupportSeccompBPF()".

	402 Sandbox sandbox;

	403

	404 // By setting "quiet_ = true" we suppress messages for expected and benign

	405 // failures (e.g. if the current kernel lacks support for BPF filters).

	406 sandbox.quiet_ = true;

	407 sandbox.set_proc_fd(proc_fd);

	408 status_ = sandbox.KernelSupportSeccompBPF()

263 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;	409 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

264	410

265 // As we are performing our tests from a child process, the run-time	411 // As we are performing our tests from a child process, the run-time

266 // environment that is visible to the sandbox is always guaranteed to be	412 // environment that is visible to the sandbox is always guaranteed to be

267 // single-threaded. Let's check here whether the caller is single-	413 // single-threaded. Let's check here whether the caller is single-

268 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.	414 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

269 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {	415 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {

270 status_ = STATUS_UNAVAILABLE;	416 status_ = STATUS_UNAVAILABLE;

271 }	417 }

272 }	418 }

273 return status_;	419 return status_;

274 }	420 }

275	421

276 void Sandbox::set_proc_fd(int proc_fd) {	422 void Sandbox::set_proc_fd(int proc_fd) {

277 proc_fd_ = proc_fd;	423 proc_fd_ = proc_fd;

278 }	424 }

279	425

280 void Sandbox::StartSandboxInternal(bool quiet) {	426 void Sandbox::StartSandbox() {

281 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {	427 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {

282 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "	428 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "

283 "unavailable");	429 "unavailable");

284 } else if (status_ == STATUS_ENABLED) {	430 } else if (!evaluators_ \|\| !conds_) {

285 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "	431 SANDBOX_DIE("Cannot repeatedly start sandbox. Create a separate Sandbox "

286 "setSandboxPolicy() to stack policies instead");	432 "object instead.");

287 }	433 }

288 if (proc_fd_ < 0) {	434 if (proc_fd_ < 0) {

289 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);	435 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

290 }	436 }

291 if (proc_fd_ < 0) {	437 if (proc_fd_ < 0) {

292 // For now, continue in degraded mode, if we can't access /proc.	438 // For now, continue in degraded mode, if we can't access /proc.

293 // In the future, we might want to tighten this requirement.	439 // In the future, we might want to tighten this requirement.

294 }	440 }

295 if (!IsSingleThreaded(proc_fd_)) {	441 if (!IsSingleThreaded(proc_fd_)) {

296 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");	442 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");

297 }	443 }

298	444

299 // We no longer need access to any files in /proc. We want to do this	445 // We no longer need access to any files in /proc. We want to do this

300 // before installing the filters, just in case that our policy denies	446 // before installing the filters, just in case that our policy denies

301 // close().	447 // close().

302 if (proc_fd_ >= 0) {	448 if (proc_fd_ >= 0) {

303 if (HANDLE_EINTR(close(proc_fd_))) {	449 if (HANDLE_EINTR(close(proc_fd_))) {

304 SANDBOX_DIE("Failed to close file descriptor for /proc");	450 SANDBOX_DIE("Failed to close file descriptor for /proc");

305 }	451 }

306 proc_fd_ = -1;	452 proc_fd_ = -1;

307 }	453 }

308	454

309 // Install the filters.	455 // Install the filters.

310 InstallFilter(quiet);	456 InstallFilter();

311	457

312 // We are now inside the sandbox.	458 // We are now inside the sandbox.

313 status_ = STATUS_ENABLED;	459 status_ = STATUS_ENABLED;

314 }	460 }

315	461

316 bool Sandbox::IsSingleThreaded(int proc_fd) {

317 if (proc_fd < 0) {

318 // Cannot determine whether program is single-threaded. Hope for

319 // the best...

320 return true;

321 }

322

323 struct stat sb;

324 int task = -1;

325 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

326 fstat(task, &sb) != 0 \|\|

327 sb.st_nlink != 3 \|\|

328 HANDLE_EINTR(close(task))) {

329 if (task >= 0) {

330 if (HANDLE_EINTR(close(task))) { }

331 }

332 return false;

333 }

334 return true;

335 }

336

337 bool Sandbox::IsDenied(const ErrorCode& code) {

338 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

339 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

340 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

341 }

342

343 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,	462 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,

344 void *aux) {	463 void *aux) {

345 for (SyscallIterator iter(true); !iter.Done(); ) {	464 for (SyscallIterator iter(true); !iter.Done(); ) {

346 uint32_t sysnum = iter.Next();	465 uint32_t sysnum = iter.Next();

347 if (!IsDenied(syscall_evaluator(sysnum, aux))) {	466 if (!IsDenied(syscall_evaluator(this, sysnum, aux))) {

348 SANDBOX_DIE("Policies should deny system calls that are outside the "	467 SANDBOX_DIE("Policies should deny system calls that are outside the "

349 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");	468 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");

350 }	469 }

351 }	470 }

352 return;	471 return;

353 }	472 }

354	473

355 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {

356 bool is_unsafe = static_cast<bool >(aux);

357 if (!*is_unsafe) {

358 if (BPF_CLASS(insn->code) == BPF_RET &&

359 insn->k > SECCOMP_RET_TRAP &&

360 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {

361 const ErrorCode& err =

362 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);

363 if (err.error_type_ != ErrorCode::ET_INVALID && !err.safe_) {

364 *is_unsafe = true;

365 }

366 }

367 }

368 }

369

370 void Sandbox::RedirectToUserspace(Instruction insn, void ) {

371 // When inside an UnsafeTrap() callback, we want to allow all system calls.

372 // This means, we must conditionally disable the sandbox -- and that's not

373 // something that kernel-side BPF filters can do, as they cannot inspect

374 // any state other than the syscall arguments.

375 // But if we redirect all error handlers to user-space, then we can easily

376 // make this decision.

377 // The performance penalty for this extra round-trip to user-space is not

378 // actually that bad, as we only ever pay it for denied system calls; and a

379 // typical program has very few of these.

380 if (BPF_CLASS(insn->code) == BPF_RET &&

381 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

382 insn->k = Trap(ReturnErrno,

383 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

384 }

385 }

386

387 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {

388 // We need to replicate the behavior of RedirectToUserspace(), so that our

389 // Verifier can still work correctly.

390 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);

391 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();

392 ErrorCode err = evaluator.first(sysnum, evaluator.second);

393 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

394 return Trap(ReturnErrno,

395 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

396 }

397 return err;

398 }

399

400 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {	474 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {

401 if (status_ == STATUS_ENABLED) {	475 if (!evaluators_ \|\| !conds_) {

402 SANDBOX_DIE("Cannot change policy after sandbox has started");	476 SANDBOX_DIE("Cannot change policy after sandbox has started");

403 }	477 }

404 PolicySanityChecks(syscall_evaluator, aux);	478 PolicySanityChecks(syscall_evaluator, aux);

405 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));	479 evaluators_->push_back(std::make_pair(syscall_evaluator, aux));

406 }	480 }

407	481

408 void Sandbox::InstallFilter(bool quiet) {	482 void Sandbox::InstallFilter() {

409 // We want to be very careful in not imposing any requirements on the	483 // We want to be very careful in not imposing any requirements on the

410 // policies that are set with SetSandboxPolicy(). This means, as soon as	484 // policies that are set with SetSandboxPolicy(). This means, as soon as

411 // the sandbox is active, we shouldn't be relying on libraries that could	485 // the sandbox is active, we shouldn't be relying on libraries that could

412 // be making system calls. This, for example, means we should avoid	486 // be making system calls. This, for example, means we should avoid

413 // using the heap and we should avoid using STL functions.	487 // using the heap and we should avoid using STL functions.

414 // Temporarily copy the contents of the "program" vector into a	488 // Temporarily copy the contents of the "program" vector into a

415 // stack-allocated array; and then explicitly destroy that object.	489 // stack-allocated array; and then explicitly destroy that object.

416 // This makes sure we don't ex- or implicitly call new/delete after we	490 // This makes sure we don't ex- or implicitly call new/delete after we

417 // installed the BPF filter program in the kernel. Depending on the	491 // installed the BPF filter program in the kernel. Depending on the

418 // system memory allocator that is in effect, these operators can result	492 // system memory allocator that is in effect, these operators can result

419 // in system calls to things like munmap() or brk().	493 // in system calls to things like munmap() or brk().

420 Program program = AssembleFilter(false / force_verification */);	494 Program program = AssembleFilter(false / force_verification */);

421	495

422 struct sock_filter bpf[program->size()];	496 struct sock_filter bpf[program->size()];

423 const struct sock_fprog prog = {	497 const struct sock_fprog prog = {

424 static_cast<unsigned short>(program->size()), bpf };	498 static_cast<unsigned short>(program->size()), bpf };

425 memcpy(bpf, &(*program)[0], sizeof(bpf));	499 memcpy(bpf, &(*program)[0], sizeof(bpf));

426 delete program;	500 delete program;

427	501

428 // Release memory that is no longer needed	502 // Release memory that is no longer needed

429 evaluators_.clear();	503 delete evaluators_;

430 conds_.clear();	504 delete conds_;

	505 evaluators_ = NULL;

	506 conds_ = NULL;

431	507

432 // Install BPF filter program	508 // Install BPF filter program

433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {	509 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

434 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");	510 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");

435 } else {	511 } else {

436 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {	512 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

437 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");	513 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");

438 }	514 }

439 }	515 }

440	516

441 return;	517 return;

442 }	518 }

443	519

444 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {	520 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {

445 #if !defined(NDEBUG)	521 #if !defined(NDEBUG)

446 force_verification = true;	522 force_verification = true;

447 #endif	523 #endif

448	524

449 // Verify that the user pushed a policy.	525 // Verify that the user pushed a policy.

450 if (evaluators_.empty()) {	526 if (evaluators_->empty()) {

451 SANDBOX_DIE("Failed to configure system call filters");	527 SANDBOX_DIE("Failed to configure system call filters");

452 }	528 }

453	529

454 // We can't handle stacked evaluators, yet. We'll get there eventually	530 // We can't handle stacked evaluators, yet. We'll get there eventually

455 // though. Hang tight.	531 // though. Hang tight.

456 if (evaluators_.size() != 1) {	532 if (evaluators_->size() != 1) {

457 SANDBOX_DIE("Not implemented");	533 SANDBOX_DIE("Not implemented");

458 }	534 }

459	535

460 // Assemble the BPF filter program.	536 // Assemble the BPF filter program.

461 CodeGen *gen = new CodeGen();	537 CodeGen *gen = new CodeGen();

462 if (!gen) {	538 if (!gen) {

463 SANDBOX_DIE("Out of memory");	539 SANDBOX_DIE("Out of memory");

464 }	540 }

465	541

466 // If the architecture doesn't match SECCOMP_ARCH, disallow the	542 // If the architecture doesn't match SECCOMP_ARCH, disallow the

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
502 // As support for unsafe jumps essentially defeats all the security	578 // As support for unsafe jumps essentially defeats all the security

503 // measures that the sandbox provides, we print a big warning message --	579 // measures that the sandbox provides, we print a big warning message --

504 // and of course, we make sure to only ever enable this feature if it	580 // and of course, we make sure to only ever enable this feature if it

505 // is actually requested by the sandbox policy.	581 // is actually requested by the sandbox policy.

506 if (has_unsafe_traps) {	582 if (has_unsafe_traps) {

507 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {	583 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {

508 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "	584 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "

509 "architecture");	585 "architecture");

510 }	586 }

511	587

512 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	588 EvaluateSyscall evaluateSyscall = evaluators_->begin()->first;

513 void *aux = evaluators_.begin()->second;	589 void *aux = evaluators_->begin()->second;

514 if (!evaluateSyscall(__NR_rt_sigprocmask, aux).	590 if (!evaluateSyscall(this, __NR_rt_sigprocmask, aux).

515 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) \|\|	591 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) \|\|

516 !evaluateSyscall(__NR_rt_sigreturn, aux).	592 !evaluateSyscall(this, __NR_rt_sigreturn, aux).

517 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	593 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

518 #if defined(__NR_sigprocmask)	594 #if defined(__NR_sigprocmask)

519 \|\| !evaluateSyscall(__NR_sigprocmask, aux).	595 \|\| !evaluateSyscall(this, __NR_sigprocmask, aux).

520 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	596 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

521 #endif	597 #endif

522 #if defined(__NR_sigreturn)	598 #if defined(__NR_sigreturn)

523 \|\| !evaluateSyscall(__NR_sigreturn, aux).	599 \|\| !evaluateSyscall(this, __NR_sigreturn, aux).

524 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))	600 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))

525 #endif	601 #endif

526 ) {	602 ) {

527 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "	603 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "

528 "unconditionally allow sigreturn() and sigprocmask()");	604 "unconditionally allow sigreturn() and sigprocmask()");

529 }	605 }

530	606

531 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {	607 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {

532 // We should never be able to get here, as UnsafeTrap() should never	608 // We should never be able to get here, as UnsafeTrap() should never

533 // actually return a valid ErrorCode object unless the user set the	609 // actually return a valid ErrorCode object unless the user set the

534 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,	610 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,

535 // "has_unsafe_traps" would always be false. But better double-check	611 // "has_unsafe_traps" would always be false. But better double-check

536 // than enabling dangerous code.	612 // than enabling dangerous code.

537 SANDBOX_DIE("We'd rather die than enable unsafe traps");	613 SANDBOX_DIE("We'd rather die than enable unsafe traps");

538 }	614 }

539 gen->Traverse(jumptable, RedirectToUserspace, NULL);	615 gen->Traverse(jumptable, RedirectToUserspace, this);

540	616

541 // Allow system calls, if they originate from our magic return address	617 // Allow system calls, if they originate from our magic return address

542 // (which we can query by calling SandboxSyscall(-1)).	618 // (which we can query by calling SandboxSyscall(-1)).

543 uintptr_t syscall_entry_point =	619 uintptr_t syscall_entry_point =

544 static_cast<uintptr_t>(SandboxSyscall(-1));	620 static_cast<uintptr_t>(SandboxSyscall(-1));

545 uint32_t low = static_cast<uint32_t>(syscall_entry_point);	621 uint32_t low = static_cast<uint32_t>(syscall_entry_point);

546 #if __SIZEOF_POINTER__ > 4	622 #if __SIZEOF_POINTER__ > 4

547 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);	623 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);

548 #endif	624 #endif

549	625

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
608 return program;	684 return program;

609 }	685 }

610	686

611 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {	687 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {

612 // If we previously rewrote the BPF program so that it calls user-space	688 // If we previously rewrote the BPF program so that it calls user-space

613 // whenever we return an "errno" value from the filter, then we have to	689 // whenever we return an "errno" value from the filter, then we have to

614 // wrap our system call evaluator to perform the same operation. Otherwise,	690 // wrap our system call evaluator to perform the same operation. Otherwise,

615 // the verifier would also report a mismatch in return codes.	691 // the verifier would also report a mismatch in return codes.

616 Evaluators redirected_evaluators;	692 Evaluators redirected_evaluators;

617 redirected_evaluators.push_back(	693 redirected_evaluators.push_back(

618 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));	694 std::make_pair(RedirectToUserspaceEvalWrapper, evaluators_));

619	695

620 const char *err = NULL;	696 const char *err = NULL;

621 if (!Verifier::VerifyBPF(	697 if (!Verifier::VerifyBPF(

	698 this,

622 program,	699 program,

623 has_unsafe_traps ? redirected_evaluators : evaluators_,	700 has_unsafe_traps ? redirected_evaluators : *evaluators_,

624 &err)) {	701 &err)) {

625 CodeGen::PrintProgram(program);	702 CodeGen::PrintProgram(program);

626 SANDBOX_DIE(err);	703 SANDBOX_DIE(err);

627 }	704 }

628 }	705 }

629	706

630 void Sandbox::FindRanges(Ranges *ranges) {	707 void Sandbox::FindRanges(Ranges *ranges) {

631 // Please note that "struct seccomp_data" defines system calls as a signed	708 // Please note that "struct seccomp_data" defines system calls as a signed

632 // int32_t, but BPF instructions always operate on unsigned quantities. We	709 // int32_t, but BPF instructions always operate on unsigned quantities. We

633 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,	710 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

634 // and then verifying that the rest of the number range (both positive and	711 // and then verifying that the rest of the number range (both positive and

635 // negative) all return the same ErrorCode.	712 // negative) all return the same ErrorCode.

636 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;	713 EvaluateSyscall evaluate_syscall = evaluators_->begin()->first;

637 void *aux = evaluators_.begin()->second;	714 void *aux = evaluators_->begin()->second;

638 uint32_t old_sysnum = 0;	715 uint32_t old_sysnum = 0;

639 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);	716 ErrorCode old_err = evaluate_syscall(this, old_sysnum, aux);

640 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);	717 ErrorCode invalid_err = evaluate_syscall(this, MIN_SYSCALL - 1,

	718 aux);

641 for (SyscallIterator iter(false); !iter.Done(); ) {	719 for (SyscallIterator iter(false); !iter.Done(); ) {

642 uint32_t sysnum = iter.Next();	720 uint32_t sysnum = iter.Next();

643 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);	721 ErrorCode err = evaluate_syscall(this, static_cast<int>(sysnum), aux);

644 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {	722 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {

645 // A proper sandbox policy should always treat system calls outside of	723 // A proper sandbox policy should always treat system calls outside of

646 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns	724 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns

647 // "false" for SyscallIterator::IsValid()) identically. Typically, all	725 // "false" for SyscallIterator::IsValid()) identically. Typically, all

648 // of these system calls would be denied with the same ErrorCode.	726 // of these system calls would be denied with the same ErrorCode.

649 SANDBOX_DIE("Invalid seccomp policy");	727 SANDBOX_DIE("Invalid seccomp policy");

650 }	728 }

651 if (!err.Equals(old_err) \|\| iter.Done()) {	729 if (!err.Equals(old_err) \|\| iter.Done()) {

652 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));	730 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));

653 old_sysnum = sysnum;	731 old_sysnum = sysnum;

(...skipping 229 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
883 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {	961 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {

884 return SandboxSyscall(args.nr,	962 return SandboxSyscall(args.nr,

885 static_cast<intptr_t>(args.args[0]),	963 static_cast<intptr_t>(args.args[0]),

886 static_cast<intptr_t>(args.args[1]),	964 static_cast<intptr_t>(args.args[1]),

887 static_cast<intptr_t>(args.args[2]),	965 static_cast<intptr_t>(args.args[2]),

888 static_cast<intptr_t>(args.args[3]),	966 static_cast<intptr_t>(args.args[3]),

889 static_cast<intptr_t>(args.args[4]),	967 static_cast<intptr_t>(args.args[4]),

890 static_cast<intptr_t>(args.args[5]));	968 static_cast<intptr_t>(args.args[5]));

891 }	969 }

892	970

893 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {

894 // TrapFnc functions report error by following the native kernel convention

895 // of returning an exit code in the range of -1..-4096. They do not try to

896 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

897 // ultimately do so for us.

898 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

899 return -err;

900 }

901

902 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,	971 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,

903 ErrorCode::Operation op, uint64_t value,	972 ErrorCode::Operation op, uint64_t value,

904 const ErrorCode& passed, const ErrorCode& failed) {	973 const ErrorCode& passed, const ErrorCode& failed) {

905 return ErrorCode(argno, width, op, value,	974 return ErrorCode(argno, width, op, value,

906 &*conds_.insert(passed).first,	975 &*conds_->insert(passed).first,

907 &*conds_.insert(failed).first);	976 &*conds_->insert(failed).first);

908 }

909

910 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {

911 SANDBOX_DIE(static_cast<char *>(aux));

912 }	977 }

913	978

914 ErrorCode Sandbox::Kill(const char *msg) {	979 ErrorCode Sandbox::Kill(const char *msg) {

915 return Trap(BpfFailure, const_cast<char *>(msg));	980 return Trap(BpfFailure, const_cast<char *>(msg));

916 }	981 }

917	982

918 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	983 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

919 int Sandbox::proc_fd_ = -1;

920 Sandbox::Evaluators Sandbox::evaluators_;

921 Sandbox::Conds Sandbox::conds_;

922	984

923 } // namespace	985 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('j') | no next file with comments »