Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(712)

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 12223109: SECCOMP-BPF: Refactor the BPF sandbox API to use fewer "static" fields and methods. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebase (now, that the bitmask change has landed in the tree) Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Some headers on Android are missing cdefs: crbug.com/172337. 5 // Some headers on Android are missing cdefs: crbug.com/172337.
6 // (We can't use OS_ANDROID here since build_config.h is not included). 6 // (We can't use OS_ANDROID here since build_config.h is not included).
7 #if defined(ANDROID) 7 #if defined(ANDROID)
8 #include <sys/cdefs.h> 8 #include <sys/cdefs.h>
9 #endif 9 #endif
10 10
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <string.h>
11 #include <sys/prctl.h> 14 #include <sys/prctl.h>
15 #include <sys/stat.h>
12 #include <sys/syscall.h> 16 #include <sys/syscall.h>
17 #include <sys/types.h>
18 #include <time.h>
19 #include <unistd.h>
13 20
14 #ifndef SECCOMP_BPF_STANDALONE 21 #ifndef SECCOMP_BPF_STANDALONE
15 #include "base/logging.h" 22 #include "base/logging.h"
16 #include "base/posix/eintr_wrapper.h" 23 #include "base/posix/eintr_wrapper.h"
17 #endif 24 #endif
18 25
19 #include "sandbox/linux/seccomp-bpf/codegen.h" 26 #include "sandbox/linux/seccomp-bpf/codegen.h"
20 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" 27 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
21 #include "sandbox/linux/seccomp-bpf/syscall.h" 28 #include "sandbox/linux/seccomp-bpf/syscall.h"
22 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" 29 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
23 #include "sandbox/linux/seccomp-bpf/verifier.h" 30 #include "sandbox/linux/seccomp-bpf/verifier.h"
24 31
32 // The kernel gives us a sandbox, we turn it into a playground :-)
33 // This is version 2 of the playground; version 1 was built on top of
34 // pre-BPF seccomp mode.
35 namespace playground2 {
jln (very slow on Chromium) 2013/02/15 20:58:25 Most of this can remain a real anonymous namespace
36
37 const int kExpectedExitCode = 100;
38
25 namespace { 39 namespace {
26 40
27 void WriteFailedStderrSetupMessage(int out_fd) {
28 const char* error_string = strerror(errno);
29 static const char msg[] = "You have reproduced a puzzling issue.\n"
30 "Please, report to crbug.com/152530!\n"
31 "Failed to set up stderr: ";
32 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&
33 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
34 HANDLE_EINTR(write(out_fd, "\n", 1))) {
35 }
36 }
37
38 template<class T> int popcount(T x); 41 template<class T> int popcount(T x);
39 template<> int popcount<unsigned int>(unsigned int x) { 42 template<> int popcount<unsigned int>(unsigned int x) {
40 return __builtin_popcount(x); 43 return __builtin_popcount(x);
41 } 44 }
42 template<> int popcount<unsigned long>(unsigned long x) { 45 template<> int popcount<unsigned long>(unsigned long x) {
43 return __builtin_popcountl(x); 46 return __builtin_popcountl(x);
44 } 47 }
45 template<> int popcount<unsigned long long>(unsigned long long x) { 48 template<> int popcount<unsigned long long>(unsigned long long x) {
46 return __builtin_popcountll(x); 49 return __builtin_popcountll(x);
47 } 50 }
48 51
49 } // namespace 52 void WriteFailedStderrSetupMessage(int out_fd) {
50 53 const char* error_string = strerror(errno);
51 // The kernel gives us a sandbox, we turn it into a playground :-) 54 static const char msg[] = "You have reproduced a puzzling issue.\n"
52 // This is version 2 of the playground; version 1 was built on top of 55 "Please, report to crbug.com/152530!\n"
53 // pre-BPF seccomp mode. 56 "Failed to set up stderr: ";
54 namespace playground2 { 57 if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&
55 58 HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
56 const int kExpectedExitCode = 100; 59 HANDLE_EINTR(write(out_fd, "\n", 1))) {
60 }
61 }
57 62
58 // We define a really simple sandbox policy. It is just good enough for us 63 // We define a really simple sandbox policy. It is just good enough for us
59 // to tell that the sandbox has actually been activated. 64 // to tell that the sandbox has actually been activated.
60 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) { 65 ErrorCode ProbeEvaluator(Sandbox *, int sysnum, void *) __attribute__((const));
66 ErrorCode ProbeEvaluator(Sandbox *, int sysnum, void *) {
61 switch (sysnum) { 67 switch (sysnum) {
62 case __NR_getpid: 68 case __NR_getpid:
63 // Return EPERM so that we can check that the filter actually ran. 69 // Return EPERM so that we can check that the filter actually ran.
64 return ErrorCode(EPERM); 70 return ErrorCode(EPERM);
65 case __NR_exit_group: 71 case __NR_exit_group:
66 // Allow exit() with a non-default return code. 72 // Allow exit() with a non-default return code.
67 return ErrorCode(ErrorCode::ERR_ALLOWED); 73 return ErrorCode(ErrorCode::ERR_ALLOWED);
68 default: 74 default:
69 // Make everything else fail in an easily recognizable way. 75 // Make everything else fail in an easily recognizable way.
70 return ErrorCode(EINVAL); 76 return ErrorCode(EINVAL);
71 } 77 }
72 } 78 }
73 79
74 void Sandbox::ProbeProcess(void) { 80 void ProbeProcess(void) {
75 if (syscall(__NR_getpid) < 0 && errno == EPERM) { 81 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
76 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 82 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
77 } 83 }
78 } 84 }
79 85
80 bool Sandbox::IsValidSyscallNumber(int sysnum) { 86 ErrorCode AllowAllEvaluator(Sandbox *, int sysnum, void *) {
81 return SyscallIterator::IsValid(sysnum); 87 if (!Sandbox::IsValidSyscallNumber(sysnum)) {
82 }
83
84 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {
85 if (!IsValidSyscallNumber(sysnum)) {
86 return ErrorCode(ENOSYS); 88 return ErrorCode(ENOSYS);
87 } 89 }
88 return ErrorCode(ErrorCode::ERR_ALLOWED); 90 return ErrorCode(ErrorCode::ERR_ALLOWED);
89 } 91 }
90 92
91 void Sandbox::TryVsyscallProcess(void) { 93 void TryVsyscallProcess(void) {
92 time_t current_time; 94 time_t current_time;
93 // time() is implemented as a vsyscall. With an older glibc, with 95 // time() is implemented as a vsyscall. With an older glibc, with
94 // vsyscall=emulate and some versions of the seccomp BPF patch 96 // vsyscall=emulate and some versions of the seccomp BPF patch
95 // we may get SIGKILL-ed. Detect this! 97 // we may get SIGKILL-ed. Detect this!
96 if (time(&current_time) != static_cast<time_t>(-1)) { 98 if (time(&current_time) != static_cast<time_t>(-1)) {
97 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 99 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
98 } 100 }
99 } 101 }
100 102
103 bool IsSingleThreaded(int proc_fd) {
104 if (proc_fd < 0) {
105 // Cannot determine whether program is single-threaded. Hope for
106 // the best...
107 return true;
108 }
109
110 struct stat sb;
111 int task = -1;
112 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
113 fstat(task, &sb) != 0 ||
114 sb.st_nlink != 3 ||
115 HANDLE_EINTR(close(task))) {
116 if (task >= 0) {
117 if (HANDLE_EINTR(close(task))) { }
118 }
119 return false;
120 }
121 return true;
122 }
123
124 bool IsDenied(const ErrorCode& code) {
125 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
126 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
127 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
128 }
129
130 // Function that can be passed as a callback function to CodeGen::Traverse().
131 // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it
132 // sets the "bool" variable pointed to by "aux".
133 void CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
134 bool *is_unsafe = static_cast<bool *>(aux);
135 if (!*is_unsafe) {
136 if (BPF_CLASS(insn->code) == BPF_RET &&
137 insn->k > SECCOMP_RET_TRAP &&
138 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {
139 const ErrorCode& err =
140 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);
141 if (err.error_type() != ErrorCode::ET_INVALID && !err.safe()) {
142 *is_unsafe = true;
143 }
144 }
145 }
146 }
147
148 // A Trap() handler that returns an "errno" value. The value is encoded
149 // in the "aux" parameter.
150 intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux) {
151 // TrapFnc functions report error by following the native kernel convention
152 // of returning an exit code in the range of -1..-4096. They do not try to
153 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
154 // ultimately do so for us.
155 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
156 return -err;
157 }
158
159 // Function that can be passed as a callback function to CodeGen::Traverse().
160 // Checks whether the "insn" returns an errno value from a BPF filter. If so,
161 // it rewrites the instruction to instead call a Trap() handler that does
162 // the same thing. "aux" is ignored.
163 void RedirectToUserspace(Instruction *insn, void *aux) {
164 // When inside an UnsafeTrap() callback, we want to allow all system calls.
165 // This means, we must conditionally disable the sandbox -- and that's not
166 // something that kernel-side BPF filters can do, as they cannot inspect
167 // any state other than the syscall arguments.
168 // But if we redirect all error handlers to user-space, then we can easily
169 // make this decision.
170 // The performance penalty for this extra round-trip to user-space is not
171 // actually that bad, as we only ever pay it for denied system calls; and a
172 // typical program has very few of these.
173 Sandbox *sandbox = static_cast<Sandbox *>(aux);
174 if (BPF_CLASS(insn->code) == BPF_RET &&
175 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
176 insn->k = sandbox->Trap(ReturnErrno,
177 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
178 }
179 }
180
181 // Stackable wrapper around an Evaluators handler. Changes ErrorCodes
182 // returned by a system call evaluator to match the changes made by
183 // RedirectToUserspace(). "aux" should be pointer to wrapped system call
184 // evaluator.
185 ErrorCode RedirectToUserspaceEvalWrapper(Sandbox *sandbox, int sysnum,
186 void *aux) {
187 // We need to replicate the behavior of RedirectToUserspace(), so that our
188 // Verifier can still work correctly.
189 Sandbox::Evaluators *evaluators =
190 reinterpret_cast<Sandbox::Evaluators *>(aux);
191 const std::pair<Sandbox::EvaluateSyscall, void *>& evaluator =
192 *evaluators->begin();
193
194 ErrorCode err = evaluator.first(sandbox, sysnum, evaluator.second);
195 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
196 return sandbox->Trap(ReturnErrno,
197 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
198 }
199 return err;
200 }
201
202 intptr_t BpfFailure(const struct arch_seccomp_data&, void *aux) {
203 SANDBOX_DIE(static_cast<char *>(aux));
204 }
205
206 } // namespace
207
208 Sandbox::Sandbox()
209 : quiet_(false),
210 proc_fd_(-1),
211 evaluators_(new Evaluators),
212 conds_(new Conds) {
213 }
214
215 Sandbox::~Sandbox() {
216 // It is generally unsafe to call any memory allocator operations or to even
217 // call arbitrary destructors after having installed a new policy. We just
218 // have no way to tell whether this policy would allow the system calls that
219 // the constructors can trigger.
220 // So, we normally destroy all of our complex state prior to starting the
221 // sandbox. But this won't happen, if the Sandbox object was created and
222 // never actually used to set up a sandbox. So, just in case, we are
223 // destroying any remaining state.
224 // The "if ()" statements are technically superfluous. But let's be explicit
225 // that we really don't want to run any code, when we already destroyed
226 // objects before setting up the sandbox.
227 if (evaluators_) {
228 delete evaluators_;
229 }
230 if (conds_) {
231 delete conds_;
232 }
233 }
234
235 bool Sandbox::IsValidSyscallNumber(int sysnum) {
236 return SyscallIterator::IsValid(sysnum);
237 }
238
239
101 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(), 240 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),
102 EvaluateSyscall syscall_evaluator, 241 Sandbox::EvaluateSyscall syscall_evaluator,
103 void *aux, 242 void *aux) {
104 int proc_fd) {
105 // Block all signals before forking a child process. This prevents an 243 // Block all signals before forking a child process. This prevents an
106 // attacker from manipulating our test by sending us an unexpected signal. 244 // attacker from manipulating our test by sending us an unexpected signal.
107 sigset_t old_mask, new_mask; 245 sigset_t old_mask, new_mask;
108 if (sigfillset(&new_mask) || 246 if (sigfillset(&new_mask) ||
109 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) { 247 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
110 SANDBOX_DIE("sigprocmask() failed"); 248 SANDBOX_DIE("sigprocmask() failed");
111 } 249 }
112 int fds[2]; 250 int fds[2];
113 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { 251 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) {
114 SANDBOX_DIE("pipe() failed"); 252 SANDBOX_DIE("pipe() failed");
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
161 } 299 }
162 if (HANDLE_EINTR(close(fds[1]))) { 300 if (HANDLE_EINTR(close(fds[1]))) {
163 // This call to close() has been failing in strange ways. See 301 // This call to close() has been failing in strange ways. See
164 // crbug.com/152530. So we only fail in debug mode now. 302 // crbug.com/152530. So we only fail in debug mode now.
165 #if !defined(NDEBUG) 303 #if !defined(NDEBUG)
166 WriteFailedStderrSetupMessage(fds[1]); 304 WriteFailedStderrSetupMessage(fds[1]);
167 SANDBOX_DIE(NULL); 305 SANDBOX_DIE(NULL);
168 #endif 306 #endif
169 } 307 }
170 308
171 evaluators_.clear();
172 SetSandboxPolicy(syscall_evaluator, aux); 309 SetSandboxPolicy(syscall_evaluator, aux);
173 set_proc_fd(proc_fd); 310 StartSandbox();
174
175 // By passing "quiet=true" to "startSandboxInternal()" we suppress
176 // messages for expected and benign failures (e.g. if the current
177 // kernel lacks support for BPF filters).
178 StartSandboxInternal(true);
179 311
180 // Run our code in the sandbox. 312 // Run our code in the sandbox.
181 code_in_sandbox(); 313 code_in_sandbox();
182 314
183 // code_in_sandbox() is not supposed to return here. 315 // code_in_sandbox() is not supposed to return here.
184 SANDBOX_DIE(NULL); 316 SANDBOX_DIE(NULL);
185 } 317 }
186 318
187 // In the parent process. 319 // In the parent process.
188 if (HANDLE_EINTR(close(fds[1]))) { 320 if (HANDLE_EINTR(close(fds[1]))) {
(...skipping 24 matching lines...) Expand all
213 SANDBOX_DIE(buf); 345 SANDBOX_DIE(buf);
214 } 346 }
215 } 347 }
216 if (HANDLE_EINTR(close(fds[0]))) { 348 if (HANDLE_EINTR(close(fds[0]))) {
217 SANDBOX_DIE("close() failed"); 349 SANDBOX_DIE("close() failed");
218 } 350 }
219 351
220 return rc; 352 return rc;
221 } 353 }
222 354
223 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) { 355 bool Sandbox::KernelSupportSeccompBPF() {
224 return 356 return
225 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) && 357 RunFunctionInPolicy(ProbeProcess, ProbeEvaluator, 0) &&
226 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0, 358 RunFunctionInPolicy(TryVsyscallProcess, AllowAllEvaluator, 0);
227 proc_fd);
228 } 359 }
229 360
230 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) { 361 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {
231 // It the sandbox is currently active, we clearly must have support for 362 // It the sandbox is currently active, we clearly must have support for
232 // sandboxing. 363 // sandboxing.
233 if (status_ == STATUS_ENABLED) { 364 if (status_ == STATUS_ENABLED) {
234 return status_; 365 return status_;
235 } 366 }
236 367
237 // Even if the sandbox was previously available, something might have 368 // Even if the sandbox was previously available, something might have
(...skipping 14 matching lines...) Expand all
252 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is 383 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
253 // actually available. 384 // actually available.
254 status_ = STATUS_AVAILABLE; 385 status_ = STATUS_AVAILABLE;
255 return status_; 386 return status_;
256 } 387 }
257 388
258 // If we have not previously checked for availability of the sandbox or if 389 // If we have not previously checked for availability of the sandbox or if
259 // we otherwise don't believe to have a good cached value, we have to 390 // we otherwise don't believe to have a good cached value, we have to
260 // perform a thorough check now. 391 // perform a thorough check now.
261 if (status_ == STATUS_UNKNOWN) { 392 if (status_ == STATUS_UNKNOWN) {
262 status_ = KernelSupportSeccompBPF(proc_fd) 393 // We create our own private copy of a "Sandbox" object. This ensures that
394 // the object does not have any policies configured, that might interfere
395 // with the tests done by "KernelSupportSeccompBPF()".
396 Sandbox sandbox;
397
398 // By setting "quiet_ = true" we suppress messages for expected and benign
399 // failures (e.g. if the current kernel lacks support for BPF filters).
400 sandbox.quiet_ = true;
401 sandbox.set_proc_fd(proc_fd);
402 status_ = sandbox.KernelSupportSeccompBPF()
263 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; 403 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
264 404
265 // As we are performing our tests from a child process, the run-time 405 // As we are performing our tests from a child process, the run-time
266 // environment that is visible to the sandbox is always guaranteed to be 406 // environment that is visible to the sandbox is always guaranteed to be
267 // single-threaded. Let's check here whether the caller is single- 407 // single-threaded. Let's check here whether the caller is single-
268 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. 408 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
269 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) { 409 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
270 status_ = STATUS_UNAVAILABLE; 410 status_ = STATUS_UNAVAILABLE;
271 } 411 }
272 } 412 }
273 return status_; 413 return status_;
274 } 414 }
275 415
276 void Sandbox::set_proc_fd(int proc_fd) { 416 void Sandbox::set_proc_fd(int proc_fd) {
277 proc_fd_ = proc_fd; 417 proc_fd_ = proc_fd;
278 } 418 }
279 419
280 void Sandbox::StartSandboxInternal(bool quiet) { 420 void Sandbox::StartSandbox() {
281 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { 421 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
282 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " 422 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "
283 "unavailable"); 423 "unavailable");
284 } else if (status_ == STATUS_ENABLED) { 424 } else if (!evaluators_ || !conds_) {
285 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " 425 SANDBOX_DIE("Cannot repeatedly start sandbox. Create a separate Sandbox "
286 "setSandboxPolicy() to stack policies instead"); 426 "object instead.");
287 } 427 }
288 if (proc_fd_ < 0) { 428 if (proc_fd_ < 0) {
289 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); 429 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
290 } 430 }
291 if (proc_fd_ < 0) { 431 if (proc_fd_ < 0) {
292 // For now, continue in degraded mode, if we can't access /proc. 432 // For now, continue in degraded mode, if we can't access /proc.
293 // In the future, we might want to tighten this requirement. 433 // In the future, we might want to tighten this requirement.
294 } 434 }
295 if (!IsSingleThreaded(proc_fd_)) { 435 if (!IsSingleThreaded(proc_fd_)) {
296 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); 436 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");
297 } 437 }
298 438
299 // We no longer need access to any files in /proc. We want to do this 439 // We no longer need access to any files in /proc. We want to do this
300 // before installing the filters, just in case that our policy denies 440 // before installing the filters, just in case that our policy denies
301 // close(). 441 // close().
302 if (proc_fd_ >= 0) { 442 if (proc_fd_ >= 0) {
303 if (HANDLE_EINTR(close(proc_fd_))) { 443 if (HANDLE_EINTR(close(proc_fd_))) {
304 SANDBOX_DIE("Failed to close file descriptor for /proc"); 444 SANDBOX_DIE("Failed to close file descriptor for /proc");
305 } 445 }
306 proc_fd_ = -1; 446 proc_fd_ = -1;
307 } 447 }
308 448
309 // Install the filters. 449 // Install the filters.
310 InstallFilter(quiet); 450 InstallFilter();
311 451
312 // We are now inside the sandbox. 452 // We are now inside the sandbox.
313 status_ = STATUS_ENABLED; 453 status_ = STATUS_ENABLED;
314 } 454 }
315 455
316 bool Sandbox::IsSingleThreaded(int proc_fd) {
317 if (proc_fd < 0) {
318 // Cannot determine whether program is single-threaded. Hope for
319 // the best...
320 return true;
321 }
322
323 struct stat sb;
324 int task = -1;
325 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
326 fstat(task, &sb) != 0 ||
327 sb.st_nlink != 3 ||
328 HANDLE_EINTR(close(task))) {
329 if (task >= 0) {
330 if (HANDLE_EINTR(close(task))) { }
331 }
332 return false;
333 }
334 return true;
335 }
336
337 bool Sandbox::IsDenied(const ErrorCode& code) {
338 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
339 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
340 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
341 }
342
343 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator, 456 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,
344 void *aux) { 457 void *aux) {
345 for (SyscallIterator iter(true); !iter.Done(); ) { 458 for (SyscallIterator iter(true); !iter.Done(); ) {
346 uint32_t sysnum = iter.Next(); 459 uint32_t sysnum = iter.Next();
347 if (!IsDenied(syscall_evaluator(sysnum, aux))) { 460 if (!IsDenied(syscall_evaluator(this, sysnum, aux))) {
348 SANDBOX_DIE("Policies should deny system calls that are outside the " 461 SANDBOX_DIE("Policies should deny system calls that are outside the "
349 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); 462 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
350 } 463 }
351 } 464 }
352 return; 465 return;
353 } 466 }
354 467
355 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
356 bool *is_unsafe = static_cast<bool *>(aux);
357 if (!*is_unsafe) {
358 if (BPF_CLASS(insn->code) == BPF_RET &&
359 insn->k > SECCOMP_RET_TRAP &&
360 insn->k - SECCOMP_RET_TRAP <= SECCOMP_RET_DATA) {
361 const ErrorCode& err =
362 Trap::ErrorCodeFromTrapId(insn->k & SECCOMP_RET_DATA);
363 if (err.error_type_ != ErrorCode::ET_INVALID && !err.safe_) {
364 *is_unsafe = true;
365 }
366 }
367 }
368 }
369
370 void Sandbox::RedirectToUserspace(Instruction *insn, void *) {
371 // When inside an UnsafeTrap() callback, we want to allow all system calls.
372 // This means, we must conditionally disable the sandbox -- and that's not
373 // something that kernel-side BPF filters can do, as they cannot inspect
374 // any state other than the syscall arguments.
375 // But if we redirect all error handlers to user-space, then we can easily
376 // make this decision.
377 // The performance penalty for this extra round-trip to user-space is not
378 // actually that bad, as we only ever pay it for denied system calls; and a
379 // typical program has very few of these.
380 if (BPF_CLASS(insn->code) == BPF_RET &&
381 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
382 insn->k = Trap(ReturnErrno,
383 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
384 }
385 }
386
387 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
388 // We need to replicate the behavior of RedirectToUserspace(), so that our
389 // Verifier can still work correctly.
390 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
391 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
392 ErrorCode err = evaluator.first(sysnum, evaluator.second);
393 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
394 return Trap(ReturnErrno,
395 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
396 }
397 return err;
398 }
399
400 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) { 468 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {
401 if (status_ == STATUS_ENABLED) { 469 if (!evaluators_ || !conds_) {
402 SANDBOX_DIE("Cannot change policy after sandbox has started"); 470 SANDBOX_DIE("Cannot change policy after sandbox has started");
403 } 471 }
404 PolicySanityChecks(syscall_evaluator, aux); 472 PolicySanityChecks(syscall_evaluator, aux);
405 evaluators_.push_back(std::make_pair(syscall_evaluator, aux)); 473 evaluators_->push_back(std::make_pair(syscall_evaluator, aux));
406 } 474 }
407 475
408 void Sandbox::InstallFilter(bool quiet) { 476 void Sandbox::InstallFilter() {
409 // We want to be very careful in not imposing any requirements on the 477 // We want to be very careful in not imposing any requirements on the
410 // policies that are set with SetSandboxPolicy(). This means, as soon as 478 // policies that are set with SetSandboxPolicy(). This means, as soon as
411 // the sandbox is active, we shouldn't be relying on libraries that could 479 // the sandbox is active, we shouldn't be relying on libraries that could
412 // be making system calls. This, for example, means we should avoid 480 // be making system calls. This, for example, means we should avoid
413 // using the heap and we should avoid using STL functions. 481 // using the heap and we should avoid using STL functions.
414 // Temporarily copy the contents of the "program" vector into a 482 // Temporarily copy the contents of the "program" vector into a
415 // stack-allocated array; and then explicitly destroy that object. 483 // stack-allocated array; and then explicitly destroy that object.
416 // This makes sure we don't ex- or implicitly call new/delete after we 484 // This makes sure we don't ex- or implicitly call new/delete after we
417 // installed the BPF filter program in the kernel. Depending on the 485 // installed the BPF filter program in the kernel. Depending on the
418 // system memory allocator that is in effect, these operators can result 486 // system memory allocator that is in effect, these operators can result
419 // in system calls to things like munmap() or brk(). 487 // in system calls to things like munmap() or brk().
420 Program *program = AssembleFilter(false /* force_verification */); 488 Program *program = AssembleFilter(false /* force_verification */);
421 489
422 struct sock_filter bpf[program->size()]; 490 struct sock_filter bpf[program->size()];
423 const struct sock_fprog prog = { 491 const struct sock_fprog prog = {
424 static_cast<unsigned short>(program->size()), bpf }; 492 static_cast<unsigned short>(program->size()), bpf };
425 memcpy(bpf, &(*program)[0], sizeof(bpf)); 493 memcpy(bpf, &(*program)[0], sizeof(bpf));
426 delete program; 494 delete program;
427 495
428 // Release memory that is no longer needed 496 // Release memory that is no longer needed
429 evaluators_.clear(); 497 delete evaluators_;
430 conds_.clear(); 498 delete conds_;
499 evaluators_ = NULL;
500 conds_ = NULL;
431 501
432 // Install BPF filter program 502 // Install BPF filter program
433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 503 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
434 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); 504 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
435 } else { 505 } else {
436 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { 506 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
437 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); 507 SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
438 } 508 }
439 } 509 }
440 510
441 return; 511 return;
442 } 512 }
443 513
444 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) { 514 Sandbox::Program *Sandbox::AssembleFilter(bool force_verification) {
445 #if !defined(NDEBUG) 515 #if !defined(NDEBUG)
446 force_verification = true; 516 force_verification = true;
447 #endif 517 #endif
448 518
449 // Verify that the user pushed a policy. 519 // Verify that the user pushed a policy.
450 if (evaluators_.empty()) { 520 if (evaluators_->empty()) {
451 SANDBOX_DIE("Failed to configure system call filters"); 521 SANDBOX_DIE("Failed to configure system call filters");
452 } 522 }
453 523
454 // We can't handle stacked evaluators, yet. We'll get there eventually 524 // We can't handle stacked evaluators, yet. We'll get there eventually
455 // though. Hang tight. 525 // though. Hang tight.
456 if (evaluators_.size() != 1) { 526 if (evaluators_->size() != 1) {
457 SANDBOX_DIE("Not implemented"); 527 SANDBOX_DIE("Not implemented");
458 } 528 }
459 529
460 // Assemble the BPF filter program. 530 // Assemble the BPF filter program.
461 CodeGen *gen = new CodeGen(); 531 CodeGen *gen = new CodeGen();
462 if (!gen) { 532 if (!gen) {
463 SANDBOX_DIE("Out of memory"); 533 SANDBOX_DIE("Out of memory");
464 } 534 }
465 535
466 // If the architecture doesn't match SECCOMP_ARCH, disallow the 536 // If the architecture doesn't match SECCOMP_ARCH, disallow the
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 // As support for unsafe jumps essentially defeats all the security 572 // As support for unsafe jumps essentially defeats all the security
503 // measures that the sandbox provides, we print a big warning message -- 573 // measures that the sandbox provides, we print a big warning message --
504 // and of course, we make sure to only ever enable this feature if it 574 // and of course, we make sure to only ever enable this feature if it
505 // is actually requested by the sandbox policy. 575 // is actually requested by the sandbox policy.
506 if (has_unsafe_traps) { 576 if (has_unsafe_traps) {
507 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { 577 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {
508 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this " 578 SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "
509 "architecture"); 579 "architecture");
510 } 580 }
511 581
512 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; 582 EvaluateSyscall evaluateSyscall = evaluators_->begin()->first;
513 void *aux = evaluators_.begin()->second; 583 void *aux = evaluators_->begin()->second;
514 if (!evaluateSyscall(__NR_rt_sigprocmask, aux). 584 if (!evaluateSyscall(this, __NR_rt_sigprocmask, aux).
515 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || 585 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) ||
516 !evaluateSyscall(__NR_rt_sigreturn, aux). 586 !evaluateSyscall(this, __NR_rt_sigreturn, aux).
517 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 587 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
518 #if defined(__NR_sigprocmask) 588 #if defined(__NR_sigprocmask)
519 || !evaluateSyscall(__NR_sigprocmask, aux). 589 || !evaluateSyscall(this, __NR_sigprocmask, aux).
520 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 590 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
521 #endif 591 #endif
522 #if defined(__NR_sigreturn) 592 #if defined(__NR_sigreturn)
523 || !evaluateSyscall(__NR_sigreturn, aux). 593 || !evaluateSyscall(this, __NR_sigreturn, aux).
524 Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) 594 Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
525 #endif 595 #endif
526 ) { 596 ) {
527 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " 597 SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "
528 "unconditionally allow sigreturn() and sigprocmask()"); 598 "unconditionally allow sigreturn() and sigprocmask()");
529 } 599 }
530 600
531 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) { 601 if (!Trap::EnableUnsafeTrapsInSigSysHandler()) {
532 // We should never be able to get here, as UnsafeTrap() should never 602 // We should never be able to get here, as UnsafeTrap() should never
533 // actually return a valid ErrorCode object unless the user set the 603 // actually return a valid ErrorCode object unless the user set the
534 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore, 604 // CHROME_SANDBOX_DEBUGGING environment variable; and therefore,
535 // "has_unsafe_traps" would always be false. But better double-check 605 // "has_unsafe_traps" would always be false. But better double-check
536 // than enabling dangerous code. 606 // than enabling dangerous code.
537 SANDBOX_DIE("We'd rather die than enable unsafe traps"); 607 SANDBOX_DIE("We'd rather die than enable unsafe traps");
538 } 608 }
539 gen->Traverse(jumptable, RedirectToUserspace, NULL); 609 gen->Traverse(jumptable, RedirectToUserspace, this);
540 610
541 // Allow system calls, if they originate from our magic return address 611 // Allow system calls, if they originate from our magic return address
542 // (which we can query by calling SandboxSyscall(-1)). 612 // (which we can query by calling SandboxSyscall(-1)).
543 uintptr_t syscall_entry_point = 613 uintptr_t syscall_entry_point =
544 static_cast<uintptr_t>(SandboxSyscall(-1)); 614 static_cast<uintptr_t>(SandboxSyscall(-1));
545 uint32_t low = static_cast<uint32_t>(syscall_entry_point); 615 uint32_t low = static_cast<uint32_t>(syscall_entry_point);
546 #if __SIZEOF_POINTER__ > 4 616 #if __SIZEOF_POINTER__ > 4
547 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); 617 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
548 #endif 618 #endif
549 619
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
608 return program; 678 return program;
609 } 679 }
610 680
611 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) { 681 void Sandbox::VerifyProgram(const Program& program, bool has_unsafe_traps) {
612 // If we previously rewrote the BPF program so that it calls user-space 682 // If we previously rewrote the BPF program so that it calls user-space
613 // whenever we return an "errno" value from the filter, then we have to 683 // whenever we return an "errno" value from the filter, then we have to
614 // wrap our system call evaluator to perform the same operation. Otherwise, 684 // wrap our system call evaluator to perform the same operation. Otherwise,
615 // the verifier would also report a mismatch in return codes. 685 // the verifier would also report a mismatch in return codes.
616 Evaluators redirected_evaluators; 686 Evaluators redirected_evaluators;
617 redirected_evaluators.push_back( 687 redirected_evaluators.push_back(
618 std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); 688 std::make_pair(RedirectToUserspaceEvalWrapper, evaluators_));
619 689
620 const char *err = NULL; 690 const char *err = NULL;
621 if (!Verifier::VerifyBPF( 691 if (!Verifier::VerifyBPF(
692 this,
622 program, 693 program,
623 has_unsafe_traps ? redirected_evaluators : evaluators_, 694 has_unsafe_traps ? redirected_evaluators : *evaluators_,
624 &err)) { 695 &err)) {
625 CodeGen::PrintProgram(program); 696 CodeGen::PrintProgram(program);
626 SANDBOX_DIE(err); 697 SANDBOX_DIE(err);
627 } 698 }
628 } 699 }
629 700
630 void Sandbox::FindRanges(Ranges *ranges) { 701 void Sandbox::FindRanges(Ranges *ranges) {
631 // Please note that "struct seccomp_data" defines system calls as a signed 702 // Please note that "struct seccomp_data" defines system calls as a signed
632 // int32_t, but BPF instructions always operate on unsigned quantities. We 703 // int32_t, but BPF instructions always operate on unsigned quantities. We
633 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, 704 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
634 // and then verifying that the rest of the number range (both positive and 705 // and then verifying that the rest of the number range (both positive and
635 // negative) all return the same ErrorCode. 706 // negative) all return the same ErrorCode.
636 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first; 707 EvaluateSyscall evaluate_syscall = evaluators_->begin()->first;
637 void *aux = evaluators_.begin()->second; 708 void *aux = evaluators_->begin()->second;
638 uint32_t old_sysnum = 0; 709 uint32_t old_sysnum = 0;
639 ErrorCode old_err = evaluate_syscall(old_sysnum, aux); 710 ErrorCode old_err = evaluate_syscall(this, old_sysnum, aux);
640 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux); 711 ErrorCode invalid_err = evaluate_syscall(this, MIN_SYSCALL - 1,
712 aux);
641 for (SyscallIterator iter(false); !iter.Done(); ) { 713 for (SyscallIterator iter(false); !iter.Done(); ) {
642 uint32_t sysnum = iter.Next(); 714 uint32_t sysnum = iter.Next();
643 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux); 715 ErrorCode err = evaluate_syscall(this, static_cast<int>(sysnum), aux);
644 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) { 716 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {
645 // A proper sandbox policy should always treat system calls outside of 717 // A proper sandbox policy should always treat system calls outside of
646 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns 718 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns
647 // "false" for SyscallIterator::IsValid()) identically. Typically, all 719 // "false" for SyscallIterator::IsValid()) identically. Typically, all
648 // of these system calls would be denied with the same ErrorCode. 720 // of these system calls would be denied with the same ErrorCode.
649 SANDBOX_DIE("Invalid seccomp policy"); 721 SANDBOX_DIE("Invalid seccomp policy");
650 } 722 }
651 if (!err.Equals(old_err) || iter.Done()) { 723 if (!err.Equals(old_err) || iter.Done()) {
652 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err)); 724 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));
653 old_sysnum = sysnum; 725 old_sysnum = sysnum;
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
883 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) { 955 intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {
884 return SandboxSyscall(args.nr, 956 return SandboxSyscall(args.nr,
885 static_cast<intptr_t>(args.args[0]), 957 static_cast<intptr_t>(args.args[0]),
886 static_cast<intptr_t>(args.args[1]), 958 static_cast<intptr_t>(args.args[1]),
887 static_cast<intptr_t>(args.args[2]), 959 static_cast<intptr_t>(args.args[2]),
888 static_cast<intptr_t>(args.args[3]), 960 static_cast<intptr_t>(args.args[3]),
889 static_cast<intptr_t>(args.args[4]), 961 static_cast<intptr_t>(args.args[4]),
890 static_cast<intptr_t>(args.args[5])); 962 static_cast<intptr_t>(args.args[5]));
891 } 963 }
892 964
893 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
894 // TrapFnc functions report error by following the native kernel convention
895 // of returning an exit code in the range of -1..-4096. They do not try to
896 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
897 // ultimately do so for us.
898 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
899 return -err;
900 }
901
902 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width, 965 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,
903 ErrorCode::Operation op, uint64_t value, 966 ErrorCode::Operation op, uint64_t value,
904 const ErrorCode& passed, const ErrorCode& failed) { 967 const ErrorCode& passed, const ErrorCode& failed) {
905 return ErrorCode(argno, width, op, value, 968 return ErrorCode(argno, width, op, value,
906 &*conds_.insert(passed).first, 969 &*conds_->insert(passed).first,
907 &*conds_.insert(failed).first); 970 &*conds_->insert(failed).first);
908 }
909
910 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {
911 SANDBOX_DIE(static_cast<char *>(aux));
912 } 971 }
913 972
914 ErrorCode Sandbox::Kill(const char *msg) { 973 ErrorCode Sandbox::Kill(const char *msg) {
915 return Trap(BpfFailure, const_cast<char *>(msg)); 974 return Trap(BpfFailure, const_cast<char *>(msg));
916 } 975 }
917 976
918 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; 977 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
919 int Sandbox::proc_fd_ = -1;
920 Sandbox::Evaluators Sandbox::evaluators_;
921 Sandbox::Conds Sandbox::conds_;
922 978
923 } // namespace 979 } // namespace
jln (very slow on Chromium) 2013/02/15 20:58:25 // namespace playground2
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698