Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(296)

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Some headers on Android are missing cdefs: crbug.com/172337. 5 // Some headers on Android are missing cdefs: crbug.com/172337.
6 // (We can't use OS_ANDROID here since build_config.h is not included). 6 // (We can't use OS_ANDROID here since build_config.h is not included).
7 #if defined(ANDROID) 7 #if defined(ANDROID)
8 #include <sys/cdefs.h> 8 #include <sys/cdefs.h>
9 #endif 9 #endif
10 10
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <string.h>
11 #include <sys/prctl.h> 14 #include <sys/prctl.h>
15 #include <sys/stat.h>
12 #include <sys/syscall.h> 16 #include <sys/syscall.h>
17 #include <sys/types.h>
18 #include <time.h>
19 #include <unistd.h>
13 20
14 #ifndef SECCOMP_BPF_STANDALONE 21 #ifndef SECCOMP_BPF_STANDALONE
15 #include "base/logging.h" 22 #include "base/logging.h"
16 #include "base/posix/eintr_wrapper.h" 23 #include "base/posix/eintr_wrapper.h"
17 #endif 24 #endif
18 25
19 #include "sandbox/linux/seccomp-bpf/codegen.h" 26 #include "sandbox/linux/seccomp-bpf/codegen.h"
20 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 27 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
21 #include "sandbox/linux/seccomp-bpf/syscall.h" 28 #include "sandbox/linux/seccomp-bpf/syscall.h"
22 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" 29 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
23 #include "sandbox/linux/seccomp-bpf/verifier.h" 30 #include "sandbox/linux/seccomp-bpf/verifier.h"
24 31
25 namespace { 32 namespace {
26 33
27 void WriteFailedStderrSetupMessage(int out_fd) { 34 using playground2::ErrorCode;
28 const char* error_string = strerror(errno); 35 using playground2::Instruction;
29 static const char msg[] = "You have reproduced a puzzling issue.\n" 36 using playground2::Sandbox;
30 "Please, report to crbug.com/152530!\n" 37 using playground2::Trap;
31 "Failed to set up stderr: "; 38 using playground2::arch_seccomp_data;
32 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string && 39
33 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 && 40 const int kExpectedExitCode = 100;
34 HANDLE_EINTR(write(out_fd, "\n", 1))) {
35 }
36 }
37 41
38 template<class T> int popcount(T x); 42 template<class T> int popcount(T x);
39 template<> int popcount<unsigned int>(unsigned int x) { 43 template<> int popcount<unsigned int>(unsigned int x) {
40 return __builtin_popcount(x); 44 return __builtin_popcount(x);
41 } 45 }
42 template<> int popcount<unsigned long>(unsigned long x) { 46 template<> int popcount<unsigned long>(unsigned long x) {
43 return __builtin_popcountl(x); 47 return __builtin_popcountl(x);
44 } 48 }
45 template<> int popcount<unsigned long long>(unsigned long long x) { 49 template<> int popcount<unsigned long long>(unsigned long long x) {
46 return __builtin_popcountll(x); 50 return __builtin_popcountll(x);
47 } 51 }
48 52
49 } // namespace 53 void WriteFailedStderrSetupMessage(int out_fd) {
50 54 const char* error_string = strerror(errno);
51 // The kernel gives us a sandbox, we turn it into a playground :-) 55 static const char msg[] = "You have reproduced a puzzling issue.\n"
52 // This is version 2 of the playground; version 1 was built on top of 56 "Please, report to crbug.com/152530!\n"
53 // pre-BPF seccomp mode. 57 "Failed to set up stderr: ";
54 namespace playground2 { 58 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&
55 59 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
56 const int kExpectedExitCode = 100; 60 HANDLE_EINTR(write(out_fd, "\n", 1))) {
61 }
62 }
57 63
58 // We define a really simple sandbox policy. It is just good enough for us 64 // We define a really simple sandbox policy. It is just good enough for us
59 // to tell that the sandbox has actually been activated. 65 // to tell that the sandbox has actually been activated.
60 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) { 66 ErrorCode ProbeEvaluator(Sandbox *, int sysnum, void *) __attribute__((const));
67 ErrorCode ProbeEvaluator(Sandbox *, int sysnum, void *) {
61 switch (sysnum) { 68 switch (sysnum) {
62 case __NR_getpid: 69 case __NR_getpid:
63 // Return EPERM so that we can check that the filter actually ran. 70 // Return EPERM so that we can check that the filter actually ran.
64 return ErrorCode(EPERM); 71 return ErrorCode(EPERM);
65 case __NR_exit_group: 72 case __NR_exit_group:
66 // Allow exit() with a non-default return code. 73 // Allow exit() with a non-default return code.
67 return ErrorCode(ErrorCode::ERR_ALLOWED); 74 return ErrorCode(ErrorCode::ERR_ALLOWED);
68 default: 75 default:
69 // Make everything else fail in an easily recognizable way. 76 // Make everything else fail in an easily recognizable way.
70 return ErrorCode(EINVAL); 77 return ErrorCode(EINVAL);
71 } 78 }
72 } 79 }
73 80
74 void Sandbox::ProbeProcess(void) { 81 void ProbeProcess(void) {
75 if (syscall(__NR_getpid) < 0 && errno == EPERM) { 82 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
76 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 83 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
77 } 84 }
78 } 85 }
79 86
80 bool Sandbox::IsValidSyscallNumber(int sysnum) { 87 ErrorCode AllowAllEvaluator(Sandbox *, int sysnum, void *) {
81 return SyscallIterator::IsValid(sysnum); 88 if (!Sandbox::IsValidSyscallNumber(sysnum)) {
82 }
83
84 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {
85 if (!IsValidSyscallNumber(sysnum)) {
86 return ErrorCode(ENOSYS); 89 return ErrorCode(ENOSYS);
87 } 90 }
88 return ErrorCode(ErrorCode::ERR_ALLOWED); 91 return ErrorCode(ErrorCode::ERR_ALLOWED);
89 } 92 }
90 93
91 void Sandbox::TryVsyscallProcess(void) { 94 void TryVsyscallProcess(void) {
92 time_t current_time; 95 time_t current_time;
93 // time() is implemented as a vsyscall. With an older glibc, with 96 // time() is implemented as a vsyscall. With an older glibc, with
94 // vsyscall=emulate and some versions of the seccomp BPF patch 97 // vsyscall=emulate and some versions of the seccomp BPF patch
95 // we may get SIGKILL-ed. Detect this! 98 // we may get SIGKILL-ed. Detect this!
96 if (time(&current_time) != static_cast<time_t>(-1)) { 99 if (time(&current_time) != static_cast<time_t>(-1)) {
97 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 100 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
98 } 101 }
99 } 102 }
100 103
104 bool IsSingleThreaded(int proc_fd) {
105 if (proc_fd < 0) {
106 // Cannot determine whether program is single-threaded. Hope for
107 // the best...
108 return true;
109 }
110
111 struct stat sb;
112 int task = -1;
113 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
114 fstat(task, &sb) != 0 ||
115 sb.st_nlink != 3 ||
116 HANDLE_EINTR(close(task))) {
117 if (task >= 0) {
118 if (HANDLE_EINTR(close(task))) { }
119 }
120 return false;
121 }
122 return true;
123 }
124
125 bool IsDenied(const ErrorCode& code) {
126 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
127 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
128 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
129 }
130
131 // Function that can be passed as a callback function to CodeGen::Traverse().
132 // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it
133 // sets the "bool" variable pointed to by "aux".
134 void CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
135 bool *is_unsafe = static_cast<bool *>(aux);
136 if (!*is_unsafe) {
137 if (BPF_CLASS(insn->code) == BPF_RET &&
138 insn->k > SECCOMP_RET_TRAP &&
139 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {
140 const ErrorCode& err =
141 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);
142 if (err.error_type() != ErrorCode::ET_INVALID && !err.safe()) {
143 *is_unsafe = true;
144 }
145 }
146 }
147 }
148
149 // A Trap() handler that returns an "errno" value. The value is encoded
150 // in the "aux" parameter.
151 intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux) {
152 // TrapFnc functions report error by following the native kernel convention
153 // of returning an exit code in the range of -1..-4096. They do not try to
154 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
155 // ultimately do so for us.
156 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
157 return -err;
158 }
159
160 // Function that can be passed as a callback function to CodeGen::Traverse().
161 // Checks whether the "insn" returns an errno value from a BPF filter. If so,
162 // it rewrites the instruction to instead call a Trap() handler that does
163 // the same thing. "aux" is ignored.
164 void RedirectToUserspace(Instruction *insn, void *aux) {
165 // When inside an UnsafeTrap() callback, we want to allow all system calls.
166 // This means, we must conditionally disable the sandbox -- and that's not
167 // something that kernel-side BPF filters can do, as they cannot inspect
168 // any state other than the syscall arguments.
169 // But if we redirect all error handlers to user-space, then we can easily
170 // make this decision.
171 // The performance penalty for this extra round-trip to user-space is not
172 // actually that bad, as we only ever pay it for denied system calls; and a
173 // typical program has very few of these.
174 Sandbox *sandbox = static_cast<Sandbox *>(aux);
175 if (BPF_CLASS(insn->code) == BPF_RET &&
176 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
177 insn->k = sandbox->Trap(ReturnErrno,
178 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
179 }
180 }
181
182 // Stackable wrapper around an Evaluators handler. Changes ErrorCodes
183 // returned by a system call evaluator to match the changes made by
184 // RedirectToUserspace(). "aux" should be pointer to wrapped system call
185 // evaluator.
186 ErrorCode RedirectToUserspaceEvalWrapper(Sandbox *sandbox, int sysnum,
187 void *aux) {
188 // We need to replicate the behavior of RedirectToUserspace(), so that our
189 // Verifier can still work correctly.
190 Sandbox::Evaluators *evaluators =
191 reinterpret_cast<Sandbox::Evaluators *>(aux);
192 const std::pair<Sandbox::EvaluateSyscall, void *>& evaluator =
193 *evaluators->begin();
194
195 ErrorCode err = evaluator.first(sandbox, sysnum, evaluator.second);
196 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
197 return sandbox->Trap(ReturnErrno,
198 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
199 }
200 return err;
201 }
202
203 intptr_t BpfFailure(const struct arch_seccomp_data&, void *aux) {
204 SANDBOX_DIE(static_cast<char *>(aux));
205 }
206
207 } // namespace
208
209 // The kernel gives us a sandbox, we turn it into a playground :-)
210 // This is version 2 of the playground; version 1 was built on top of
211 // pre-BPF seccomp mode.
212 namespace playground2 {
213
214 Sandbox::Sandbox()
215 : quiet_(false),
216 proc_fd_(-1),
217 evaluators_(new Evaluators),
218 conds_(new Conds) {
219 }
220
221 Sandbox::~Sandbox() {
222 // It is generally unsafe to call any memory allocator operations or to even
223 // call arbitrary destructors after having installed a new policy. We just
224 // have no way to tell whether this policy would allow the system calls that
225 // the constructors can trigger.
226 // So, we normally destroy all of our complex state prior to starting the
227 // sandbox. But this won't happen, if the Sandbox object was created and
228 // never actually used to set up a sandbox. So, just in case, we are
229 // destroying any remaining state.
230 // The "if ()" statements are technically superfluous. But let's be explicit
231 // that we really don't want to run any code, when we already destroyed
232 // objects before setting up the sandbox.
233 if (evaluators_) {
234 delete evaluators_;
235 }
236 if (conds_) {
237 delete conds_;
238 }
239 }
240
241 bool Sandbox::IsValidSyscallNumber(int sysnum) {
242 return SyscallIterator::IsValid(sysnum);
243 }
244
245
101 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(), 246 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),
102 EvaluateSyscall syscall_evaluator, 247 Sandbox::EvaluateSyscall syscall_evaluator,
103 void *aux, 248 void *aux) {
104 int proc_fd) {
105 // Block all signals before forking a child process. This prevents an 249 // Block all signals before forking a child process. This prevents an
106 // attacker from manipulating our test by sending us an unexpected signal. 250 // attacker from manipulating our test by sending us an unexpected signal.
107 sigset_t old_mask, new_mask; 251 sigset_t old_mask, new_mask;
108 if (sigfillset(&new_mask) || 252 if (sigfillset(&new_mask) ||
109 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) { 253 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
110 SANDBOX_DIE("sigprocmask() failed"); 254 SANDBOX_DIE("sigprocmask() failed");
111 } 255 }
112 int fds[2]; 256 int fds[2];
113 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { 257 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) {
114 SANDBOX_DIE("pipe() failed"); 258 SANDBOX_DIE("pipe() failed");
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
161 } 305 }
162 if (HANDLE_EINTR(close(fds[1]))) { 306 if (HANDLE_EINTR(close(fds[1]))) {
163 // This call to close() has been failing in strange ways. See 307 // This call to close() has been failing in strange ways. See
164 // crbug.com/152530. So we only fail in debug mode now. 308 // crbug.com/152530. So we only fail in debug mode now.
165 #if !defined(NDEBUG) 309 #if !defined(NDEBUG)
166 WriteFailedStderrSetupMessage(fds[1]); 310 WriteFailedStderrSetupMessage(fds[1]);
167 SANDBOX_DIE(NULL); 311 SANDBOX_DIE(NULL);
168 #endif 312 #endif
169 } 313 }
170 314
171 evaluators_.clear();
172 SetSandboxPolicy(syscall_evaluator, aux); 315 SetSandboxPolicy(syscall_evaluator, aux);
173 set_proc_fd(proc_fd); 316 StartSandbox();
174
175 // By passing "quiet=true" to "startSandboxInternal()" we suppress
176 // messages for expected and benign failures (e.g. if the current
177 // kernel lacks support for BPF filters).
178 StartSandboxInternal(true);
179 317
180 // Run our code in the sandbox. 318 // Run our code in the sandbox.
181 code_in_sandbox(); 319 code_in_sandbox();
182 320
183 // code_in_sandbox() is not supposed to return here. 321 // code_in_sandbox() is not supposed to return here.
184 SANDBOX_DIE(NULL); 322 SANDBOX_DIE(NULL);
185 } 323 }
186 324
187 // In the parent process. 325 // In the parent process.
188 if (HANDLE_EINTR(close(fds[1]))) { 326 if (HANDLE_EINTR(close(fds[1]))) {
(...skipping 24 matching lines...) Expand all
213 SANDBOX_DIE(buf); 351 SANDBOX_DIE(buf);
214 } 352 }
215 } 353 }
216 if (HANDLE_EINTR(close(fds[0]))) { 354 if (HANDLE_EINTR(close(fds[0]))) {
217 SANDBOX_DIE("close() failed"); 355 SANDBOX_DIE("close() failed");
218 } 356 }
219 357
220 return rc; 358 return rc;
221 } 359 }
222 360
223 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) { 361 bool Sandbox::KernelSupportSeccompBPF() {
224 return 362 return
225 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) && 363 RunFunctionInPolicy(ProbeProcess, ProbeEvaluator, 0) &&
226 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0, 364 RunFunctionInPolicy(TryVsyscallProcess, AllowAllEvaluator, 0);
227 proc_fd);
228 } 365 }
229 366
230 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) { 367 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {
231 // It the sandbox is currently active, we clearly must have support for 368 // It the sandbox is currently active, we clearly must have support for
232 // sandboxing. 369 // sandboxing.
233 if (status_ == STATUS_ENABLED) { 370 if (status_ == STATUS_ENABLED) {
234 return status_; 371 return status_;
235 } 372 }
236 373
237 // Even if the sandbox was previously available, something might have 374 // Even if the sandbox was previously available, something might have
(...skipping 14 matching lines...) Expand all
252 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is 389 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
253 // actually available. 390 // actually available.
254 status_ = STATUS_AVAILABLE; 391 status_ = STATUS_AVAILABLE;
255 return status_; 392 return status_;
256 } 393 }
257 394
258 // If we have not previously checked for availability of the sandbox or if 395 // If we have not previously checked for availability of the sandbox or if
259 // we otherwise don't believe to have a good cached value, we have to 396 // we otherwise don't believe to have a good cached value, we have to
260 // perform a thorough check now. 397 // perform a thorough check now.
261 if (status_ == STATUS_UNKNOWN) { 398 if (status_ == STATUS_UNKNOWN) {
262 status_ = KernelSupportSeccompBPF(proc_fd) 399 // We create our own private copy of a "Sandbox" object. This ensures that
400 // the object does not have any policies configured, that might interfere
401 // with the tests done by "KernelSupportSeccompBPF()".
402 Sandbox sandbox;
403
404 // By setting "quiet_ = true" we suppress messages for expected and benign
405 // failures (e.g. if the current kernel lacks support for BPF filters).
406 sandbox.quiet_ = true;
407 sandbox.set_proc_fd(proc_fd);
408 status_ = sandbox.KernelSupportSeccompBPF()
263 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; 409 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
264 410
265 // As we are performing our tests from a child process, the run-time 411 // As we are performing our tests from a child process, the run-time
266 // environment that is visible to the sandbox is always guaranteed to be 412 // environment that is visible to the sandbox is always guaranteed to be
267 // single-threaded. Let's check here whether the caller is single- 413 // single-threaded. Let's check here whether the caller is single-
268 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. 414 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
269 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) { 415 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
270 status_ = STATUS_UNAVAILABLE; 416 status_ = STATUS_UNAVAILABLE;
271 } 417 }
272 } 418 }
273 return status_; 419 return status_;
274 } 420 }
275 421
276 void Sandbox::set_proc_fd(int proc_fd) { 422 void Sandbox::set_proc_fd(int proc_fd) {
277 proc_fd_ = proc_fd; 423 proc_fd_ = proc_fd;
278 } 424 }
279 425
280 void Sandbox::StartSandboxInternal(bool quiet) { 426 void Sandbox::StartSandbox() {
281 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { 427 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
282 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " 428 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "
283 "unavailable"); 429 "unavailable");
284 } else if (status_ == STATUS_ENABLED) { 430 } else if (!evaluators_ || !conds_) {
285 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " 431 SANDBOX_DIE("Cannot repeatedly start sandbox. Create a separate Sandbox "
286 "setSandboxPolicy() to stack policies instead"); 432 "object instead.");
287 } 433 }
288 if (proc_fd_ < 0) { 434 if (proc_fd_ < 0) {
289 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); 435 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
290 } 436 }
291 if (proc_fd_ < 0) { 437 if (proc_fd_ < 0) {
292 // For now, continue in degraded mode, if we can't access /proc. 438 // For now, continue in degraded mode, if we can't access /proc.
293 // In the future, we might want to tighten this requirement. 439 // In the future, we might want to tighten this requirement.
294 } 440 }
295 if (!IsSingleThreaded(proc_fd_)) { 441 if (!IsSingleThreaded(proc_fd_)) {
296 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); 442 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");
297 } 443 }
298 444
299 // We no longer need access to any files in /proc. We want to do this 445 // We no longer need access to any files in /proc. We want to do this
300 // before installing the filters, just in case that our policy denies 446 // before installing the filters, just in case that our policy denies
301 // close(). 447 // close().
302 if (proc_fd_ >= 0) { 448 if (proc_fd_ >= 0) {
303 if (HANDLE_EINTR(close(proc_fd_))) { 449 if (HANDLE_EINTR(close(proc_fd_))) {
304 SANDBOX_DIE("Failed to close file descriptor for /proc"); 450 SANDBOX_DIE("Failed to close file descriptor for /proc");
305 } 451 }
306 proc_fd_ = -1; 452 proc_fd_ = -1;
307 } 453 }
308 454
309 // Install the filters. 455 // Install the filters.
310 InstallFilter(quiet); 456 InstallFilter();
311 457
312 // We are now inside the sandbox. 458 // We are now inside the sandbox.
313 status_ = STATUS_ENABLED; 459 status_ = STATUS_ENABLED;
314 } 460 }
315 461
316 bool Sandbox::IsSingleThreaded(int proc_fd) {
317 if (proc_fd < 0) {
318 // Cannot determine whether program is single-threaded. Hope for
319 // the best...
320 return true;
321 }
322
323 struct stat sb;
324 int task = -1;
325 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
326 fstat(task, &sb) != 0 ||
327 sb.st_nlink != 3 ||
328 HANDLE_EINTR(close(task))) {
329 if (task >= 0) {
330 if (HANDLE_EINTR(close(task))) { }
331 }
332 return false;
333 }
334 return true;
335 }
336
337 bool Sandbox::IsDenied(const ErrorCode& code) {
338 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
339 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
340 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
341 }
342
343 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator, 462 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,
344 void *aux) { 463 void *aux) {
345 for (SyscallIterator iter(true); !iter.Done(); ) { 464 for (SyscallIterator iter(true); !iter.Done(); ) {
346 uint32_t sysnum = iter.Next(); 465 uint32_t sysnum = iter.Next();
347 if (!IsDenied(syscall_evaluator(sysnum, aux))) { 466 if (!IsDenied(syscall_evaluator(this, sysnum, aux))) {
348 SANDBOX_DIE("Policies should deny system calls that are outside the " 467 SANDBOX_DIE("Policies should deny system calls that are outside the "
349 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); 468 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
350 } 469 }
351 } 470 }
352 return; 471 return;
353 } 472 }
354 473
355 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
356 bool *is_unsafe = static_cast<bool *>(aux);
357 if (!*is_unsafe) {
358 if (BPF_CLASS(insn->code) == BPF_RET &&
359 insn->k > SECCOMP_RET_TRAP &&
360 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {
361 const ErrorCode& err =
362 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);
363 if (err.error_type_ != ErrorCode::ET_INVALID && !err.safe_) {
364 *is_unsafe = true;
365 }
366 }
367 }
368 }
369
370 void Sandbox::RedirectToUserspace(Instruction *insn, void *) {
371 // When inside an UnsafeTrap() callback, we want to allow all system calls.
372 // This means, we must conditionally disable the sandbox -- and that's not
373 // something that kernel-side BPF filters can do, as they cannot inspect
374 // any state other than the syscall arguments.
375 // But if we redirect all error handlers to user-space, then we can easily
376 // make this decision.
377 // The performance penalty for this extra round-trip to user-space is not
378 // actually that bad, as we only ever pay it for denied system calls; and a
379 // typical program has very few of these.
380 if (BPF_CLASS(insn->code) == BPF_RET &&
381 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
382 insn->k = Trap(ReturnErrno,
383 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
384 }
385 }
386
387 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
388 // We need to replicate the behavior of RedirectToUserspace(), so that our
389 // Verifier can still work correctly.
390 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
391 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
392 ErrorCode err = evaluator.first(sysnum, evaluator.second);
393 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
394 return Trap(ReturnErrno,
395 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
396 }
397 return err;
398 }
399
400 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) { 474 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {
401 if (status_ == STATUS_ENABLED) { 475 if (!evaluators_ || !conds_) {
402 SANDBOX_DIE("Cannot change policy after sandbox has started"); 476 SANDBOX_DIE("Cannot change policy after sandbox has started");
403 } 477 }
404 PolicySanityChecks(syscall_evaluator, aux); 478 PolicySanityChecks(syscall_evaluator, aux);
405 evaluators_.push_back(std::make_pair(syscall_evaluator, aux)); 479 evaluators_->push_back(std::make_pair(syscall_evaluator, aux));
406 } 480 }
407 481
408 void Sandbox::InstallFilter(bool quiet) { 482 void Sandbox::InstallFilter() {
409 // We want to be very careful in not imposing any requirements on the 483 // We want to be very careful in not imposing any requirements on the
410 // policies that are set with SetSandboxPolicy(). This means, as soon as 484 // policies that are set with SetSandboxPolicy(). This means, as soon as
411 // the sandbox is active, we shouldn't be relying on libraries that could 485 // the sandbox is active, we shouldn't be relying on libraries that could
412 // be making system calls. This, for example, means we should avoid 486 // be making system calls. This, for example, means we should avoid
413 // using the heap and we should avoid using STL functions. 487 // using the heap and we should avoid using STL functions.
414 // Temporarily copy the contents of the "program" vector into a 488 // Temporarily copy the contents of the "program" vector into a
415 // stack-allocated array; and then explicitly destroy that object. 489 // stack-allocated array; and then explicitly destroy that object.
416 // This makes sure we don't ex- or implicitly call new/delete after we 490 // This makes sure we don't ex- or implicitly call new/delete after we
417 // installed the BPF filter program in the kernel. Depending on the 491 // installed the BPF filter program in the kernel. Depending on the
418 // system memory allocator that is in effect, these operators can result 492 // system memory allocator that is in effect, these operators can result
419 // in system calls to things like munmap() or brk(). 493 // in system calls to things like munmap() or brk().
420 Program *program = AssembleFilter(false /* force_verification */); 494 Program *program = AssembleFilter(false /* force_verification */);
421 495
422 struct sock_filter bpf[program->size()]; 496 struct sock_filter bpf[program->size()];
423 const struct sock_fprog prog = { 497 const struct sock_fprog prog = {
424 static_cast<unsigned short>(program->size()), bpf }; 498 static_cast<unsigned short>(program->size()), bpf };
425 memcpy(bpf, &(*program)[0], sizeof(bpf)); 499 memcpy(bpf, &(*program)[0], sizeof(bpf));
426 delete program; 500 delete program;
427 501
428 // Release memory that is no longer needed 502 // Release memory that is no longer needed
429 evaluators_.clear(); 503 delete evaluators_;
430 conds_.clear(); 504 delete conds_;
505 evaluators_ = NULL;
506 conds_ = NULL;
431 507
432 // Install BPF filter program 508 // Install BPF filter program
433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 509 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
434 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); 510 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
435 } else { 511 } else {
436 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { 512 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
437 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); 513 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
438 } 514 }
439 } 515 }
440 516
441 return; 517 return;
442 } 518 }
443 519
444 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) { 520 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {
445 #if !defined(NDEBUG) 521 #if !defined(NDEBUG)
446 force_verification = true; 522 force_verification = true;
447 #endif 523 #endif
448 524
449 // Verify that the user pushed a policy. 525 // Verify that the user pushed a policy.
450 if (evaluators_.empty()) { 526 if (evaluators_->empty()) {
451 SANDBOX_DIE("Failed to configure system call filters"); 527 SANDBOX_DIE("Failed to configure system call filters");
452 } 528 }
453 529
454 // We can't handle stacked evaluators, yet. We'll get there eventually 530 // We can't handle stacked evaluators, yet. We'll get there eventually
455 // though. Hang tight. 531 // though. Hang tight.
456 if (evaluators_.size() != 1) { 532 if (evaluators_->size() != 1) {
457 SANDBOX_DIE("Not implemented"); 533 SANDBOX_DIE("Not implemented");
458 } 534 }
459 535
460 // Assemble the BPF filter program. 536 // Assemble the BPF filter program.
461 CodeGen *gen = new CodeGen(); 537 CodeGen *gen = new CodeGen();
462 if (!gen) { 538 if (!gen) {
463 SANDBOX_DIE("Out of memory"); 539 SANDBOX_DIE("Out of memory");
464 } 540 }
465 541
466 // If the architecture doesn't match SECCOMP_ARCH, disallow the 542 // If the architecture doesn't match SECCOMP_ARCH, disallow the
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 // As support for unsafe jumps essentially defeats all the security 578 // As support for unsafe jumps essentially defeats all the security
503 // measures that the sandbox provides, we print a big warning message -- 579 // measures that the sandbox provides, we print a big warning message --
504 // and of course, we make sure to only ever enable this feature if it 580 // and of course, we make sure to only ever enable this feature if it
505 // is actually requested by the sandbox policy. 581 // is actually requested by the sandbox policy.
506 if (has_unsafe_traps) { 582 if (has_unsafe_traps) {
507 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { 583 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {
508 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this " 584 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "
509 "architecture"); 585 "architecture");
510 } 586 }
511 587
512 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; 588 EvaluateSyscall evaluateSyscall = evaluators_->begin()->first;
513 void *aux = evaluators_.begin()->second; 589 void *aux = evaluators_->begin()->second;
514 if (!evaluateSyscall(__NR_rt_sigprocmask, aux). 590 if (!evaluateSyscall(this, __NR_rt_sigprocmask, aux).
515 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || 591 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) ||
516 !evaluateSyscall(__NR_rt_sigreturn, aux). 592 !evaluateSyscall(this, __NR_rt_sigreturn, aux).
517 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 593 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
518 #if defined(__NR_sigprocmask) 594 #if defined(__NR_sigprocmask)
519 || !evaluateSyscall(__NR_sigprocmask, aux). 595 || !evaluateSyscall(this, __NR_sigprocmask, aux).
520 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 596 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
521 #endif 597 #endif
522 #if defined(__NR_sigreturn) 598 #if defined(__NR_sigreturn)
523 || !evaluateSyscall(__NR_sigreturn, aux). 599 || !evaluateSyscall(this, __NR_sigreturn, aux).
524 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 600 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
525 #endif 601 #endif
526 ) { 602 ) {
527 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " 603 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "
528 "unconditionally allow sigreturn() and sigprocmask()"); 604 "unconditionally allow sigreturn() and sigprocmask()");
529 } 605 }
530 606
531 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) { 607 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {
532 // We should never be able to get here, as UnsafeTrap() should never 608 // We should never be able to get here, as UnsafeTrap() should never
533 // actually return a valid ErrorCode object unless the user set the 609 // actually return a valid ErrorCode object unless the user set the
534 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore, 610 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,
535 // "has_unsafe_traps" would always be false. But better double-check 611 // "has_unsafe_traps" would always be false. But better double-check
536 // than enabling dangerous code. 612 // than enabling dangerous code.
537 SANDBOX_DIE("We'd rather die than enable unsafe traps"); 613 SANDBOX_DIE("We'd rather die than enable unsafe traps");
538 } 614 }
539 gen->Traverse(jumptable, RedirectToUserspace, NULL); 615 gen->Traverse(jumptable, RedirectToUserspace, this);
540 616
541 // Allow system calls, if they originate from our magic return address 617 // Allow system calls, if they originate from our magic return address
542 // (which we can query by calling SandboxSyscall(-1)). 618 // (which we can query by calling SandboxSyscall(-1)).
543 uintptr_t syscall_entry_point = 619 uintptr_t syscall_entry_point =
544 static_cast<uintptr_t>(SandboxSyscall(-1)); 620 static_cast<uintptr_t>(SandboxSyscall(-1));
545 uint32_t low = static_cast<uint32_t>(syscall_entry_point); 621 uint32_t low = static_cast<uint32_t>(syscall_entry_point);
546 #if __SIZEOF_POINTER__ > 4 622 #if __SIZEOF_POINTER__ > 4
547 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); 623 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
548 #endif 624 #endif
549 625
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
608 return program; 684 return program;
609 } 685 }
610 686
611 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) { 687 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {
612 // If we previously rewrote the BPF program so that it calls user-space 688 // If we previously rewrote the BPF program so that it calls user-space
613 // whenever we return an "errno" value from the filter, then we have to 689 // whenever we return an "errno" value from the filter, then we have to
614 // wrap our system call evaluator to perform the same operation. Otherwise, 690 // wrap our system call evaluator to perform the same operation. Otherwise,
615 // the verifier would also report a mismatch in return codes. 691 // the verifier would also report a mismatch in return codes.
616 Evaluators redirected_evaluators; 692 Evaluators redirected_evaluators;
617 redirected_evaluators.push_back( 693 redirected_evaluators.push_back(
618 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); 694 std::make_pair(RedirectToUserspaceEvalWrapper, evaluators_));
619 695
620 const char *err = NULL; 696 const char *err = NULL;
621 if (!Verifier::VerifyBPF( 697 if (!Verifier::VerifyBPF(
698 this,
622 program, 699 program,
623 has_unsafe_traps ? redirected_evaluators : evaluators_, 700 has_unsafe_traps ? redirected_evaluators : *evaluators_,
624 &err)) { 701 &err)) {
625 CodeGen::PrintProgram(program); 702 CodeGen::PrintProgram(program);
626 SANDBOX_DIE(err); 703 SANDBOX_DIE(err);
627 } 704 }
628 } 705 }
629 706
630 void Sandbox::FindRanges(Ranges *ranges) { 707 void Sandbox::FindRanges(Ranges *ranges) {
631 // Please note that "struct seccomp_data" defines system calls as a signed 708 // Please note that "struct seccomp_data" defines system calls as a signed
632 // int32_t, but BPF instructions always operate on unsigned quantities. We 709 // int32_t, but BPF instructions always operate on unsigned quantities. We
633 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, 710 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
634 // and then verifying that the rest of the number range (both positive and 711 // and then verifying that the rest of the number range (both positive and
635 // negative) all return the same ErrorCode. 712 // negative) all return the same ErrorCode.
636 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first; 713 EvaluateSyscall evaluate_syscall = evaluators_->begin()->first;
637 void *aux = evaluators_.begin()->second; 714 void *aux = evaluators_->begin()->second;
638 uint32_t old_sysnum = 0; 715 uint32_t old_sysnum = 0;
639 ErrorCode old_err = evaluate_syscall(old_sysnum, aux); 716 ErrorCode old_err = evaluate_syscall(this, old_sysnum, aux);
640 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux); 717 ErrorCode invalid_err = evaluate_syscall(this, MIN_SYSCALL - 1,
718 aux);
641 for (SyscallIterator iter(false); !iter.Done(); ) { 719 for (SyscallIterator iter(false); !iter.Done(); ) {
642 uint32_t sysnum = iter.Next(); 720 uint32_t sysnum = iter.Next();
643 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux); 721 ErrorCode err = evaluate_syscall(this, static_cast<int>(sysnum), aux);
644 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) { 722 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {
645 // A proper sandbox policy should always treat system calls outside of 723 // A proper sandbox policy should always treat system calls outside of
646 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns 724 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns
647 // "false" for SyscallIterator::IsValid()) identically. Typically, all 725 // "false" for SyscallIterator::IsValid()) identically. Typically, all
648 // of these system calls would be denied with the same ErrorCode. 726 // of these system calls would be denied with the same ErrorCode.
649 SANDBOX_DIE("Invalid seccomp policy"); 727 SANDBOX_DIE("Invalid seccomp policy");
650 } 728 }
651 if (!err.Equals(old_err) || iter.Done()) { 729 if (!err.Equals(old_err) || iter.Done()) {
652 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err)); 730 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));
653 old_sysnum = sysnum; 731 old_sysnum = sysnum;
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
883 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) { 961 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {
884 return SandboxSyscall(args.nr, 962 return SandboxSyscall(args.nr,
885 static_cast<intptr_t>(args.args[0]), 963 static_cast<intptr_t>(args.args[0]),
886 static_cast<intptr_t>(args.args[1]), 964 static_cast<intptr_t>(args.args[1]),
887 static_cast<intptr_t>(args.args[2]), 965 static_cast<intptr_t>(args.args[2]),
888 static_cast<intptr_t>(args.args[3]), 966 static_cast<intptr_t>(args.args[3]),
889 static_cast<intptr_t>(args.args[4]), 967 static_cast<intptr_t>(args.args[4]),
890 static_cast<intptr_t>(args.args[5])); 968 static_cast<intptr_t>(args.args[5]));
891 } 969 }
892 970
893 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
894 // TrapFnc functions report error by following the native kernel convention
895 // of returning an exit code in the range of -1..-4096. They do not try to
896 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
897 // ultimately do so for us.
898 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
899 return -err;
900 }
901
902 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width, 971 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,
903 ErrorCode::Operation op, uint64_t value, 972 ErrorCode::Operation op, uint64_t value,
904 const ErrorCode& passed, const ErrorCode& failed) { 973 const ErrorCode& passed, const ErrorCode& failed) {
905 return ErrorCode(argno, width, op, value, 974 return ErrorCode(argno, width, op, value,
906 &*conds_.insert(passed).first, 975 &*conds_->insert(passed).first,
907 &*conds_.insert(failed).first); 976 &*conds_->insert(failed).first);
908 }
909
910 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {
911 SANDBOX_DIE(static_cast<char *>(aux));
912 } 977 }
913 978
914 ErrorCode Sandbox::Kill(const char *msg) { 979 ErrorCode Sandbox::Kill(const char *msg) {
915 return Trap(BpfFailure, const_cast<char *>(msg)); 980 return Trap(BpfFailure, const_cast<char *>(msg));
916 } 981 }
917 982
918 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; 983 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
919 int Sandbox::proc_fd_ = -1;
920 Sandbox::Evaluators Sandbox::evaluators_;
921 Sandbox::Conds Sandbox::conds_;
922 984
923 } // namespace 985 } // namespace
OLDNEW
« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698