Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(386)

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Changelist is ready for initial review; still needs a few TODO()s to be resolved, though. Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <endian.h> 5 #include <endian.h>
6 #if __BYTE_ORDER == __BIG_ENDIAN 6 #if __BYTE_ORDER == __BIG_ENDIAN
7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit 7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit
8 // values that need to be inspected by a virtual machine that only ever 8 // values that need to be inspected by a virtual machine that only ever
9 // operates on 32bit values. The kernel developers decided how values 9 // operates on 32bit values. The kernel developers decided how values
10 // should be split into two 32bit words to achieve this goal. But at this 10 // should be split into two 32bit words to achieve this goal. But at this
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
65 65
66 // The kernel gives us a sandbox, we turn it into a playground :-) 66 // The kernel gives us a sandbox, we turn it into a playground :-)
67 // This is version 2 of the playground; version 1 was built on top of 67 // This is version 2 of the playground; version 1 was built on top of
68 // pre-BPF seccomp mode. 68 // pre-BPF seccomp mode.
69 namespace playground2 { 69 namespace playground2 {
70 70
71 const int kExpectedExitCode = 100; 71 const int kExpectedExitCode = 100;
72 72
73 // We define a really simple sandbox policy. It is just good enough for us 73 // We define a really simple sandbox policy. It is just good enough for us
74 // to tell that the sandbox has actually been activated. 74 // to tell that the sandbox has actually been activated.
75 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) { 75 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {
76 switch (sysnum) { 76 switch (sysnum) {
77 case __NR_getpid: 77 case __NR_getpid:
78 // Return EPERM so that we can check that the filter actually ran. 78 // Return EPERM so that we can check that the filter actually ran.
79 return ErrorCode(EPERM); 79 return ErrorCode(EPERM);
80 case __NR_exit_group: 80 case __NR_exit_group:
81 // Allow exit() with a non-default return code. 81 // Allow exit() with a non-default return code.
82 return ErrorCode(ErrorCode::ERR_ALLOWED); 82 return ErrorCode(ErrorCode::ERR_ALLOWED);
83 default: 83 default:
84 // Make everything else fail in an easily recognizable way. 84 // Make everything else fail in an easily recognizable way.
85 return ErrorCode(EINVAL); 85 return ErrorCode(EINVAL);
86 } 86 }
87 } 87 }
88 88
89 void Sandbox::probeProcess(void) { 89 void Sandbox::ProbeProcess(void) {
90 if (syscall(__NR_getpid) < 0 && errno == EPERM) { 90 if (syscall(__NR_getpid) < 0 && errno == EPERM) {
91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
92 } 92 }
93 } 93 }
94 94
95 bool Sandbox::isValidSyscallNumber(int sysnum) { 95 bool Sandbox::IsValidSyscallNumber(int sysnum) {
96 return SyscallIterator::IsValid(sysnum); 96 return SyscallIterator::IsValid(sysnum);
97 } 97 }
98 98
99 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) { 99 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {
100 if (!isValidSyscallNumber(sysnum)) { 100 if (!IsValidSyscallNumber(sysnum)) {
101 return ErrorCode(ENOSYS); 101 return ErrorCode(ENOSYS);
102 } 102 }
103 return ErrorCode(ErrorCode::ERR_ALLOWED); 103 return ErrorCode(ErrorCode::ERR_ALLOWED);
104 } 104 }
105 105
106 void Sandbox::tryVsyscallProcess(void) { 106 void Sandbox::TryVsyscallProcess(void) {
107 time_t current_time; 107 time_t current_time;
108 // time() is implemented as a vsyscall. With an older glibc, with 108 // time() is implemented as a vsyscall. With an older glibc, with
109 // vsyscall=emulate and some versions of the seccomp BPF patch 109 // vsyscall=emulate and some versions of the seccomp BPF patch
110 // we may get SIGKILL-ed. Detect this! 110 // we may get SIGKILL-ed. Detect this!
111 if (time(&current_time) != static_cast<time_t>(-1)) { 111 if (time(&current_time) != static_cast<time_t>(-1)) {
112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); 112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
113 } 113 }
114 } 114 }
115 115
116 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), 116 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),
117 EvaluateSyscall syscallEvaluator, 117 EvaluateSyscall syscall_evaluator,
118 void *aux, 118 void *aux,
119 int proc_fd) { 119 int proc_fd) {
120 // Block all signals before forking a child process. This prevents an 120 // Block all signals before forking a child process. This prevents an
121 // attacker from manipulating our test by sending us an unexpected signal. 121 // attacker from manipulating our test by sending us an unexpected signal.
122 sigset_t oldMask, newMask; 122 sigset_t old_mask, new_mask;
123 if (sigfillset(&newMask) || 123 if (sigfillset(&new_mask) ||
124 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { 124 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
125 SANDBOX_DIE("sigprocmask() failed"); 125 SANDBOX_DIE("sigprocmask() failed");
126 } 126 }
127 int fds[2]; 127 int fds[2];
128 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { 128 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) {
129 SANDBOX_DIE("pipe() failed"); 129 SANDBOX_DIE("pipe() failed");
130 } 130 }
131 131
132 if (fds[0] <= 2 || fds[1] <= 2) { 132 if (fds[0] <= 2 || fds[1] <= 2) {
133 SANDBOX_DIE("Process started without standard file descriptors"); 133 SANDBOX_DIE("Process started without standard file descriptors");
134 } 134 }
135 135
136 pid_t pid = fork(); 136 pid_t pid = fork();
137 if (pid < 0) { 137 if (pid < 0) {
138 // Die if we cannot fork(). We would probably fail a little later 138 // Die if we cannot fork(). We would probably fail a little later
139 // anyway, as the machine is likely very close to running out of 139 // anyway, as the machine is likely very close to running out of
140 // memory. 140 // memory.
141 // But what we don't want to do is return "false", as a crafty 141 // But what we don't want to do is return "false", as a crafty
142 // attacker might cause fork() to fail at will and could trick us 142 // attacker might cause fork() to fail at will and could trick us
143 // into running without a sandbox. 143 // into running without a sandbox.
144 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails 144 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails
145 SANDBOX_DIE("fork() failed unexpectedly"); 145 SANDBOX_DIE("fork() failed unexpectedly");
146 } 146 }
147 147
148 // In the child process 148 // In the child process
149 if (!pid) { 149 if (!pid) {
150 // Test a very simple sandbox policy to verify that we can 150 // Test a very simple sandbox policy to verify that we can
151 // successfully turn on sandboxing. 151 // successfully turn on sandboxing.
152 Die::EnableSimpleExit(); 152 Die::EnableSimpleExit();
153 153
154 if (HANDLE_EINTR(close(fds[0]))) { 154 if (HANDLE_EINTR(close(fds[0]))) {
155 WriteFailedStderrSetupMessage(fds[1]); 155 WriteFailedStderrSetupMessage(fds[1]);
156 SANDBOX_DIE(NULL); 156 SANDBOX_DIE(NULL);
157 } 157 }
158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) { 158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
159 // Stderr could very well be a file descriptor to .xsession-errors, or 159 // Stderr could very well be a file descriptor to .xsession-errors, or
160 // another file, which could be backed by a file system that could cause 160 // another file, which could be backed by a file system that could cause
161 // dup2 to fail while trying to close stderr. It's important that we do 161 // dup2 to fail while trying to close stderr. It's important that we do
162 // not fail on trying to close stderr. 162 // not fail on trying to close stderr.
163 // If dup2 fails here, we will continue normally, this means that our 163 // If dup2 fails here, we will continue normally, this means that our
164 // parent won't cause a fatal failure if something writes to stderr in 164 // parent won't cause a fatal failure if something writes to stderr in
165 // this child. 165 // this child.
166 } 166 }
167 if (HANDLE_EINTR(close(fds[1]))) { 167 if (HANDLE_EINTR(close(fds[1]))) {
168 WriteFailedStderrSetupMessage(fds[1]); 168 WriteFailedStderrSetupMessage(fds[1]);
169 SANDBOX_DIE(NULL); 169 SANDBOX_DIE(NULL);
170 } 170 }
171 171
172 evaluators_.clear(); 172 evaluators_.clear();
173 setSandboxPolicy(syscallEvaluator, aux); 173 SetSandboxPolicy(syscall_evaluator, aux);
174 setProcFd(proc_fd); 174 SetProcFd(proc_fd);
175 175
176 // By passing "quiet=true" to "startSandboxInternal()" we suppress 176 // By passing "quiet=true" to "startSandboxInternal()" we suppress
177 // messages for expected and benign failures (e.g. if the current 177 // messages for expected and benign failures (e.g. if the current
178 // kernel lacks support for BPF filters). 178 // kernel lacks support for BPF filters).
179 startSandboxInternal(true); 179 StartSandboxInternal(true);
180 180
181 // Run our code in the sandbox. 181 // Run our code in the sandbox.
182 CodeInSandbox(); 182 code_in_sandbox();
183 183
184 // CodeInSandbox() is not supposed to return here. 184 // code_in_sandbox() is not supposed to return here.
185 SANDBOX_DIE(NULL); 185 SANDBOX_DIE(NULL);
186 } 186 }
187 187
188 // In the parent process. 188 // In the parent process.
189 if (HANDLE_EINTR(close(fds[1]))) { 189 if (HANDLE_EINTR(close(fds[1]))) {
190 SANDBOX_DIE("close() failed"); 190 SANDBOX_DIE("close() failed");
191 } 191 }
192 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { 192 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
193 SANDBOX_DIE("sigprocmask() failed"); 193 SANDBOX_DIE("sigprocmask() failed");
194 } 194 }
195 int status; 195 int status;
196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { 196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
197 SANDBOX_DIE("waitpid() failed unexpectedly"); 197 SANDBOX_DIE("waitpid() failed unexpectedly");
198 } 198 }
199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; 199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;
200 200
201 // If we fail to support sandboxing, there might be an additional 201 // If we fail to support sandboxing, there might be an additional
202 // error message. If so, this was an entirely unexpected and fatal 202 // error message. If so, this was an entirely unexpected and fatal
(...skipping 11 matching lines...) Expand all
214 SANDBOX_DIE(buf); 214 SANDBOX_DIE(buf);
215 } 215 }
216 } 216 }
217 if (HANDLE_EINTR(close(fds[0]))) { 217 if (HANDLE_EINTR(close(fds[0]))) {
218 SANDBOX_DIE("close() failed"); 218 SANDBOX_DIE("close() failed");
219 } 219 }
220 220
221 return rc; 221 return rc;
222 } 222 }
223 223
224 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { 224 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {
225 #if defined(SECCOMP_BPF_VALGRIND_HACKS) 225 #if defined(SECCOMP_BPF_VALGRIND_HACKS)
226 if (RUNNING_ON_VALGRIND) { 226 if (RUNNING_ON_VALGRIND) {
227 // Valgrind doesn't like our run-time test. Disable testing and assume we 227 // Valgrind doesn't like our run-time test. Disable testing and assume we
228 // always support sandboxing. This feature should only ever be enabled when 228 // always support sandboxing. This feature should only ever be enabled when
229 // debugging. 229 // debugging.
230 return true; 230 return true;
231 } 231 }
232 #endif 232 #endif
233 233
234 return 234 return
235 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) && 235 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&
236 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0, 236 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,
237 proc_fd); 237 proc_fd);
238 } 238 }
239 239
240 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { 240 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {
241 // It the sandbox is currently active, we clearly must have support for 241 // It the sandbox is currently active, we clearly must have support for
242 // sandboxing. 242 // sandboxing.
243 if (status_ == STATUS_ENABLED) { 243 if (status_ == STATUS_ENABLED) {
244 return status_; 244 return status_;
245 } 245 }
246 246
247 // Even if the sandbox was previously available, something might have 247 // Even if the sandbox was previously available, something might have
248 // changed in our run-time environment. Check one more time. 248 // changed in our run-time environment. Check one more time.
249 if (status_ == STATUS_AVAILABLE) { 249 if (status_ == STATUS_AVAILABLE) {
250 if (!isSingleThreaded(proc_fd)) { 250 if (!IsSingleThreaded(proc_fd)) {
251 status_ = STATUS_UNAVAILABLE; 251 status_ = STATUS_UNAVAILABLE;
252 } 252 }
253 return status_; 253 return status_;
254 } 254 }
255 255
256 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) { 256 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately 257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately
258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all 258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
259 // happen, if and only if they are triggered by the process being multi- 259 // happen, if and only if they are triggered by the process being multi-
260 // threaded. 260 // threaded.
261 // In other words, if a single-threaded process is currently in the 261 // In other words, if a single-threaded process is currently in the
262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is 262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
263 // actually available. 263 // actually available.
264 status_ = STATUS_AVAILABLE; 264 status_ = STATUS_AVAILABLE;
265 return status_; 265 return status_;
266 } 266 }
267 267
268 // If we have not previously checked for availability of the sandbox or if 268 // If we have not previously checked for availability of the sandbox or if
269 // we otherwise don't believe to have a good cached value, we have to 269 // we otherwise don't believe to have a good cached value, we have to
270 // perform a thorough check now. 270 // perform a thorough check now.
271 if (status_ == STATUS_UNKNOWN) { 271 if (status_ == STATUS_UNKNOWN) {
272 status_ = kernelSupportSeccompBPF(proc_fd) 272 status_ = KernelSupportSeccompBPF(proc_fd)
273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; 273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
274 274
275 // As we are performing our tests from a child process, the run-time 275 // As we are performing our tests from a child process, the run-time
276 // environment that is visible to the sandbox is always guaranteed to be 276 // environment that is visible to the sandbox is always guaranteed to be
277 // single-threaded. Let's check here whether the caller is single- 277 // single-threaded. Let's check here whether the caller is single-
278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. 278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
279 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { 279 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
280 status_ = STATUS_UNAVAILABLE; 280 status_ = STATUS_UNAVAILABLE;
281 } 281 }
282 } 282 }
283 return status_; 283 return status_;
284 } 284 }
285 285
286 void Sandbox::setProcFd(int proc_fd) { 286 void Sandbox::SetProcFd(int proc_fd) {
287 proc_fd_ = proc_fd; 287 proc_fd_ = proc_fd;
288 } 288 }
289 289
290 void Sandbox::startSandboxInternal(bool quiet) { 290 void Sandbox::StartSandboxInternal(bool quiet) {
291 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { 291 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " 292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "
293 "unavailable"); 293 "unavailable");
294 } else if (status_ == STATUS_ENABLED) { 294 } else if (status_ == STATUS_ENABLED) {
295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " 295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "
296 "setSandboxPolicy() to stack policies instead"); 296 "setSandboxPolicy() to stack policies instead");
297 } 297 }
298 if (proc_fd_ < 0) { 298 if (proc_fd_ < 0) {
299 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); 299 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
300 } 300 }
301 if (proc_fd_ < 0) { 301 if (proc_fd_ < 0) {
302 // For now, continue in degraded mode, if we can't access /proc. 302 // For now, continue in degraded mode, if we can't access /proc.
303 // In the future, we might want to tighten this requirement. 303 // In the future, we might want to tighten this requirement.
304 } 304 }
305 if (!isSingleThreaded(proc_fd_)) { 305 if (!IsSingleThreaded(proc_fd_)) {
306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); 306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");
307 } 307 }
308 308
309 // We no longer need access to any files in /proc. We want to do this 309 // We no longer need access to any files in /proc. We want to do this
310 // before installing the filters, just in case that our policy denies 310 // before installing the filters, just in case that our policy denies
311 // close(). 311 // close().
312 if (proc_fd_ >= 0) { 312 if (proc_fd_ >= 0) {
313 if (HANDLE_EINTR(close(proc_fd_))) { 313 if (HANDLE_EINTR(close(proc_fd_))) {
314 SANDBOX_DIE("Failed to close file descriptor for /proc"); 314 SANDBOX_DIE("Failed to close file descriptor for /proc");
315 } 315 }
316 proc_fd_ = -1; 316 proc_fd_ = -1;
317 } 317 }
318 318
319 // Install the filters. 319 // Install the filters.
320 installFilter(quiet); 320 InstallFilter(quiet);
321 321
322 // We are now inside the sandbox. 322 // We are now inside the sandbox.
323 status_ = STATUS_ENABLED; 323 status_ = STATUS_ENABLED;
324 } 324 }
325 325
326 bool Sandbox::isSingleThreaded(int proc_fd) { 326 bool Sandbox::IsSingleThreaded(int proc_fd) {
327 if (proc_fd < 0) { 327 if (proc_fd < 0) {
328 // Cannot determine whether program is single-threaded. Hope for 328 // Cannot determine whether program is single-threaded. Hope for
329 // the best... 329 // the best...
330 return true; 330 return true;
331 } 331 }
332 332
333 struct stat sb; 333 struct stat sb;
334 int task = -1; 334 int task = -1;
335 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || 335 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
336 fstat(task, &sb) != 0 || 336 fstat(task, &sb) != 0 ||
337 sb.st_nlink != 3 || 337 sb.st_nlink != 3 ||
338 HANDLE_EINTR(close(task))) { 338 HANDLE_EINTR(close(task))) {
339 if (task >= 0) { 339 if (task >= 0) {
340 if (HANDLE_EINTR(close(task))) { } 340 if (HANDLE_EINTR(close(task))) { }
341 } 341 }
342 return false; 342 return false;
343 } 343 }
344 return true; 344 return true;
345 } 345 }
346 346
347 bool Sandbox::isDenied(const ErrorCode& code) { 347 bool Sandbox::IsDenied(const ErrorCode& code) {
348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || 348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && 349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); 350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
351 } 351 }
352 352
353 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, 353 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,
354 void *aux) { 354 void *aux) {
355 for (SyscallIterator iter(true); !iter.Done(); ) { 355 for (SyscallIterator iter(true); !iter.Done(); ) {
356 uint32_t sysnum = iter.Next(); 356 uint32_t sysnum = iter.Next();
357 if (!isDenied(syscallEvaluator(sysnum, aux))) { 357 if (!IsDenied(syscall_evaluator(sysnum, aux))) {
358 SANDBOX_DIE("Policies should deny system calls that are outside the " 358 SANDBOX_DIE("Policies should deny system calls that are outside the "
359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); 359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
360 } 360 }
361 } 361 }
362 return; 362 return;
363 } 363 }
364 364
365 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { 365 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
366 if (BPF_CLASS(insn->code) == BPF_RET && 366 if (BPF_CLASS(insn->code) == BPF_RET &&
367 insn->k > SECCOMP_RET_TRAP && 367 insn->k > SECCOMP_RET_TRAP &&
368 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) { 368 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) {
369 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; 369 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1];
370 if (!err.safe_) { 370 if (!err.safe_) {
371 bool *is_unsafe = static_cast<bool *>(aux); 371 bool *is_unsafe = static_cast<bool *>(aux);
372 *is_unsafe = true; 372 *is_unsafe = true;
373 } 373 }
374 } 374 }
375 } 375 }
376 376
377 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { 377 void Sandbox::RedirectToUserspace(Instruction *insn, void *) {
378 // When inside an UnsafeTrap() callback, we want to allow all system calls. 378 // When inside an UnsafeTrap() callback, we want to allow all system calls.
379 // This means, we must conditionally disable the sandbox -- and that's not 379 // This means, we must conditionally disable the sandbox -- and that's not
380 // something that kernel-side BPF filters can do, as they cannot inspect 380 // something that kernel-side BPF filters can do, as they cannot inspect
381 // any state other than the syscall arguments. 381 // any state other than the syscall arguments.
382 // But if we redirect all error handlers to user-space, then we can easily 382 // But if we redirect all error handlers to user-space, then we can easily
383 // make this decision. 383 // make this decision.
384 // The performance penalty for this extra round-trip to user-space is not 384 // The performance penalty for this extra round-trip to user-space is not
385 // actually that bad, as we only ever pay it for denied system calls; and a 385 // actually that bad, as we only ever pay it for denied system calls; and a
386 // typical program has very few of these. 386 // typical program has very few of these.
387 if (BPF_CLASS(insn->code) == BPF_RET && 387 if (BPF_CLASS(insn->code) == BPF_RET &&
388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
389 insn->k = Trap(ReturnErrno, 389 insn->k = Trap(ReturnErrno,
390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); 390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
391 } 391 }
392 } 392 }
393 393
394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { 394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
395 // We need to replicate the behavior of RedirectToUserspace(), so that our 395 // We need to replicate the behavior of RedirectToUserspace(), so that our
396 // Verifier can still work correctly. 396 // Verifier can still work correctly.
397 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); 397 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
398 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); 398 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
399 ErrorCode err = evaluator.first(sysnum, evaluator.second); 399 ErrorCode err = evaluator.first(sysnum, evaluator.second);
400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
401 return Trap(ReturnErrno, 401 return Trap(ReturnErrno,
402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); 402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
403 } 403 }
404 return err; 404 return err;
405 } 405 }
406 406
407 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { 407 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {
408 if (status_ == STATUS_ENABLED) { 408 if (status_ == STATUS_ENABLED) {
409 SANDBOX_DIE("Cannot change policy after sandbox has started"); 409 SANDBOX_DIE("Cannot change policy after sandbox has started");
410 } 410 }
411 policySanityChecks(syscallEvaluator, aux); 411 PolicySanityChecks(syscall_evaluator, aux);
412 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); 412 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));
413 } 413 }
414 414
415 void Sandbox::installFilter(bool quiet) { 415 void Sandbox::InstallFilter(bool quiet) {
416 // Verify that the user pushed a policy. 416 // Verify that the user pushed a policy.
417 if (evaluators_.empty()) { 417 if (evaluators_.empty()) {
418 filter_failed: 418 filter_failed:
419 SANDBOX_DIE("Failed to configure system call filters"); 419 SANDBOX_DIE("Failed to configure system call filters");
420 } 420 }
421 421
422 // Set new SIGSYS handler 422 // Set new SIGSYS handler
423 struct sigaction sa; 423 struct sigaction sa;
424 memset(&sa, 0, sizeof(sa)); 424 memset(&sa, 0, sizeof(sa));
425 sa.sa_sigaction = sigSys; 425 sa.sa_sigaction = SigSys;
426 sa.sa_flags = SA_SIGINFO | SA_NODEFER; 426 sa.sa_flags = SA_SIGINFO | SA_NODEFER;
427 if (sigaction(SIGSYS, &sa, NULL) < 0) { 427 if (sigaction(SIGSYS, &sa, NULL) < 0) {
428 goto filter_failed; 428 goto filter_failed;
429 } 429 }
430 430
431 // Unmask SIGSYS 431 // Unmask SIGSYS
432 sigset_t mask; 432 sigset_t mask;
433 if (sigemptyset(&mask) || 433 if (sigemptyset(&mask) ||
434 sigaddset(&mask, SIGSYS) || 434 sigaddset(&mask, SIGSYS) ||
435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { 435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
(...skipping 15 matching lines...) Expand all
451 // If the architecture doesn't match SECCOMP_ARCH, disallow the 451 // If the architecture doesn't match SECCOMP_ARCH, disallow the
452 // system call. 452 // system call.
453 Instruction *tail; 453 Instruction *tail;
454 Instruction *head = 454 Instruction *head =
455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
456 offsetof(struct arch_seccomp_data, arch), 456 offsetof(struct arch_seccomp_data, arch),
457 tail = 457 tail =
458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
459 NULL, 459 NULL,
460 gen->MakeInstruction(BPF_RET+BPF_K, 460 gen->MakeInstruction(BPF_RET+BPF_K,
461 Kill( 461 Kill("Invalid audit architecture in BPF filter"))));
462 "Invalid audit architecture in BPF filter").err_)));
463 462
464 { 463 {
465 // Evaluate all possible system calls and group their ErrorCodes into 464 // Evaluate all possible system calls and group their ErrorCodes into
466 // ranges of identical codes. 465 // ranges of identical codes.
467 Ranges ranges; 466 Ranges ranges;
468 findRanges(&ranges); 467 FindRanges(&ranges);
469 468
470 // Compile the system call ranges to an optimized BPF jumptable 469 // Compile the system call ranges to an optimized BPF jumptable
471 Instruction *jumptable = 470 Instruction *jumptable =
472 assembleJumpTable(gen, ranges.begin(), ranges.end()); 471 AssembleJumpTable(gen, ranges.begin(), ranges.end());
473 472
474 // If there is at least one UnsafeTrap() in our program, the entire sandbox 473 // If there is at least one UnsafeTrap() in our program, the entire sandbox
475 // is unsafe. We need to modify the program so that all non- 474 // is unsafe. We need to modify the program so that all non-
476 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then 475 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then
477 // allow us to temporarily disable sandboxing rules inside of callbacks to 476 // allow us to temporarily disable sandboxing rules inside of callbacks to
478 // UnsafeTrap(). 477 // UnsafeTrap().
479 has_unsafe_traps_ = false; 478 has_unsafe_traps_ = false;
480 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); 479 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);
481 480
482 // Grab the system call number, so that we can implement jump tables. 481 // Grab the system call number, so that we can implement jump tables.
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
618 // system memory allocator that is in effect, these operators can result 617 // system memory allocator that is in effect, these operators can result
619 // in system calls to things like munmap() or brk(). 618 // in system calls to things like munmap() or brk().
620 struct sock_filter bpf[program->size()]; 619 struct sock_filter bpf[program->size()];
621 const struct sock_fprog prog = { 620 const struct sock_fprog prog = {
622 static_cast<unsigned short>(program->size()), bpf }; 621 static_cast<unsigned short>(program->size()), bpf };
623 memcpy(bpf, &(*program)[0], sizeof(bpf)); 622 memcpy(bpf, &(*program)[0], sizeof(bpf));
624 delete program; 623 delete program;
625 624
626 // Release memory that is no longer needed 625 // Release memory that is no longer needed
627 evaluators_.clear(); 626 evaluators_.clear();
627 conds_.clear();
628 628
629 #if defined(SECCOMP_BPF_VALGRIND_HACKS) 629 #if defined(SECCOMP_BPF_VALGRIND_HACKS)
630 // Valgrind is really not happy about our sandbox. Disable it when running 630 // Valgrind is really not happy about our sandbox. Disable it when running
631 // in Valgrind. This feature is dangerous and should never be enabled by 631 // in Valgrind. This feature is dangerous and should never be enabled by
632 // default. We protect it behind a pre-processor option. 632 // default. We protect it behind a pre-processor option.
633 if (!RUNNING_ON_VALGRIND) 633 if (!RUNNING_ON_VALGRIND)
634 #endif 634 #endif
635 { 635 {
636 // Install BPF filter program 636 // Install BPF filter program
637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); 638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");
639 } else { 639 } else {
640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { 640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); 641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");
642 } 642 }
643 } 643 }
644 } 644 }
645 645
646 return; 646 return;
647 } 647 }
648 648
649 void Sandbox::findRanges(Ranges *ranges) { 649 void Sandbox::FindRanges(Ranges *ranges) {
650 // Please note that "struct seccomp_data" defines system calls as a signed 650 // Please note that "struct seccomp_data" defines system calls as a signed
651 // int32_t, but BPF instructions always operate on unsigned quantities. We 651 // int32_t, but BPF instructions always operate on unsigned quantities. We
652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, 652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
653 // and then verifying that the rest of the number range (both positive and 653 // and then verifying that the rest of the number range (both positive and
654 // negative) all return the same ErrorCode. 654 // negative) all return the same ErrorCode.
655 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; 655 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;
656 void *aux = evaluators_.begin()->second; 656 void *aux = evaluators_.begin()->second;
657 uint32_t oldSysnum = 0; 657 uint32_t old_sysnum = 0;
658 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux); 658 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);
659 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux); 659 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);
660 for (SyscallIterator iter(false); !iter.Done(); ) { 660 for (SyscallIterator iter(false); !iter.Done(); ) {
661 uint32_t sysnum = iter.Next(); 661 uint32_t sysnum = iter.Next();
662 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux); 662 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);
663 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) { 663 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {
664 // A proper sandbox policy should always treat system calls outside of 664 // A proper sandbox policy should always treat system calls outside of
665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns 665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns
666 // "false" for SyscallIterator::IsValid()) identically. Typically, all 666 // "false" for SyscallIterator::IsValid()) identically. Typically, all
667 // of these system calls would be denied with the same ErrorCode. 667 // of these system calls would be denied with the same ErrorCode.
668 SANDBOX_DIE("Invalid seccomp policy"); 668 SANDBOX_DIE("Invalid seccomp policy");
669 } 669 }
670 if (!err.Equals(oldErr) || iter.Done()) { 670 if (!err.Equals(old_err) || iter.Done()) {
671 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr)); 671 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));
672 oldSysnum = sysnum; 672 old_sysnum = sysnum;
673 oldErr = err; 673 old_err = err;
674 } 674 }
675 } 675 }
676 } 676 }
677 677
678 Instruction *Sandbox::assembleJumpTable(CodeGen *gen, 678 Instruction *Sandbox::AssembleJumpTable(CodeGen *gen,
679 Ranges::const_iterator start, 679 Ranges::const_iterator start,
680 Ranges::const_iterator stop) { 680 Ranges::const_iterator stop) {
681 // We convert the list of system call ranges into jump table that performs 681 // We convert the list of system call ranges into jump table that performs
682 // a binary search over the ranges. 682 // a binary search over the ranges.
683 // As a sanity check, we need to have at least one distinct ranges for us 683 // As a sanity check, we need to have at least one distinct ranges for us
684 // to be able to build a jump table. 684 // to be able to build a jump table.
685 if (stop - start <= 0) { 685 if (stop - start <= 0) {
686 SANDBOX_DIE("Invalid set of system call ranges"); 686 SANDBOX_DIE("Invalid set of system call ranges");
687 } else if (stop - start == 1) { 687 } else if (stop - start == 1) {
688 // If we have narrowed things down to a single range object, we can 688 // If we have narrowed things down to a single range object, we can
689 // return from the BPF filter program. 689 // return from the BPF filter program.
690 return gen->MakeInstruction(BPF_RET+BPF_K, start->err); 690 return RetExpression(gen, start->err);
691 } 691 }
692 692
693 // Pick the range object that is located at the mid point of our list. 693 // Pick the range object that is located at the mid point of our list.
694 // We compare our system call number against the lowest valid system call 694 // We compare our system call number against the lowest valid system call
695 // number in this range object. If our number is lower, it is outside of 695 // number in this range object. If our number is lower, it is outside of
696 // this range object. If it is greater or equal, it might be inside. 696 // this range object. If it is greater or equal, it might be inside.
697 Ranges::const_iterator mid = start + (stop - start)/2; 697 Ranges::const_iterator mid = start + (stop - start)/2;
698 698
699 // Sub-divide the list of ranges and continue recursively. 699 // Sub-divide the list of ranges and continue recursively.
700 Instruction *jf = assembleJumpTable(gen, start, mid); 700 Instruction *jf = AssembleJumpTable(gen, start, mid);
701 Instruction *jt = assembleJumpTable(gen, mid, stop); 701 Instruction *jt = AssembleJumpTable(gen, mid, stop);
702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); 702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);
703 } 703 }
704 704
705 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { 705 Instruction *Sandbox::RetExpression(CodeGen *gen, const ErrorCode& cond) {
706 if (cond.error_type_ == ErrorCode::ET_COND) {
707 return CondExpression(gen, cond);
708 } else {
709 return gen->MakeInstruction(BPF_RET+BPF_K, cond);
710 }
711 }
712
713 Instruction *Sandbox::CondExpression(CodeGen *gen, const ErrorCode& cond) {
714 // We can only inspect the six system call arguments that are passed in
715 // CPU registers.
jln (very slow on Chromium) 2012/12/06 00:35:00 It's even worse than that and is architecture depe
Markus (顧孟勤) 2012/12/12 20:54:35 I am almost certain this is a red herring. Yes, fo
716 if (cond.argno_ < 0 || cond.argno_ >= 6) {
717 SANDBOX_DIE("Internal compiler error; invalid argument number "
718 "encountered");
719 }
720
721 // BPF programs operate on 32bit entities. Load both halfs of the 64bit
722 // system call argument and then generate suitable conditional statements.
723 Instruction *msb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
724 offsetof(struct arch_seccomp_data, args) +
725 cond.argno_ * sizeof(uint64_t) +
726 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4)); // Most significant bits
727 Instruction *lsb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
728 offsetof(struct arch_seccomp_data, args) +
729 cond.argno_ * sizeof(uint64_t) +
730 (__BYTE_ORDER == __BIG_ENDIAN ? 4 : 0)); // Least significant bits
731
732 // Emit a suitable comparison statement.
733 switch (cond.op_) {
734 case ErrorCode::OP_EQUAL:
735 // Compare the least significant bits for equality
736 gen->JoinInstructions(lsb,
737 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,
738 static_cast<uint32_t>(cond.value_),
739 RetExpression(gen, *cond.passed_),
740 RetExpression(gen, *cond.failed_)));
741
742 // If we are looking at a 64bit argument, we need to also compare the
743 // most significant bits.
744 if (cond.width_ == ErrorCode::TP_64BIT) {
745 lsb = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,
746 static_cast<uint32_t>(cond.value_ >> 32),
747 lsb,
748 RetExpression(gen, *cond.failed_));
749 }
750 break;
751 default:
752 // TODO(markus): We can only check for equality so far.
753 SANDBOX_DIE("Not implemented");
754 break;
755 }
756
757 // Ensure that we never pass a 64bit value, when we only expect a 32bit
758 // value.
759 if (cond.width_ == ErrorCode::TP_32BIT) {
760 gen->JoinInstructions(msb,
761 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0, lsb,
762 RetExpression(gen, Kill("Unexpected 64bit argument detected"))));
763 } else {
764 gen->JoinInstructions(msb, lsb);
765 }
766
767 return msb;
768 }
769
770 void Sandbox::SigSys(int nr, siginfo_t *info, void *void_context) {
706 // Various sanity checks to make sure we actually received a signal 771 // Various sanity checks to make sure we actually received a signal
707 // triggered by a BPF filter. If something else triggered SIGSYS 772 // triggered by a BPF filter. If something else triggered SIGSYS
708 // (e.g. kill()), there is really nothing we can do with this signal. 773 // (e.g. kill()), there is really nothing we can do with this signal.
709 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || 774 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context ||
710 info->si_errno <= 0 || 775 info->si_errno <= 0 ||
711 static_cast<size_t>(info->si_errno) > trapArraySize_) { 776 static_cast<size_t>(info->si_errno) > trap_array_size_) {
712 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal 777 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
713 // safe and can lead to bugs. We should eventually implement a different 778 // safe and can lead to bugs. We should eventually implement a different
714 // logging and reporting mechanism that is safe to be called from 779 // logging and reporting mechanism that is safe to be called from
715 // the sigSys() handler. 780 // the sigSys() handler.
716 // TODO: If we feel confident that our code otherwise works correctly, we 781 // TODO: If we feel confident that our code otherwise works correctly, we
717 // could actually make an argument that spurious SIGSYS should 782 // could actually make an argument that spurious SIGSYS should
718 // just get silently ignored. TBD 783 // just get silently ignored. TBD
719 sigsys_err: 784 sigsys_err:
720 SANDBOX_DIE("Unexpected SIGSYS received"); 785 SANDBOX_DIE("Unexpected SIGSYS received");
721 } 786 }
(...skipping 23 matching lines...) Expand all
745 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { 810 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
746 errno = old_errno; 811 errno = old_errno;
747 if (sigsys.nr == __NR_clone) { 812 if (sigsys.nr == __NR_clone) {
748 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); 813 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");
749 } 814 }
750 rc = SandboxSyscall(sigsys.nr, 815 rc = SandboxSyscall(sigsys.nr,
751 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), 816 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),
752 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), 817 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),
753 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); 818 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));
754 } else { 819 } else {
755 const ErrorCode& err = trapArray_[info->si_errno - 1]; 820 const ErrorCode& err = trap_array_[info->si_errno - 1];
756 if (!err.safe_) { 821 if (!err.safe_) {
757 SetIsInSigHandler(); 822 SetIsInSigHandler();
758 } 823 }
759 824
760 // Copy the seccomp-specific data into a arch_seccomp_data structure. This 825 // Copy the seccomp-specific data into a arch_seccomp_data structure. This
761 // is what we are showing to TrapFnc callbacks that the system call 826 // is what we are showing to TrapFnc callbacks that the system call
762 // evaluator registered with the sandbox. 827 // evaluator registered with the sandbox.
763 struct arch_seccomp_data data = { 828 struct arch_seccomp_data data = {
764 sigsys.nr, 829 sigsys.nr,
765 SECCOMP_ARCH, 830 SECCOMP_ARCH,
(...skipping 30 matching lines...) Expand all
796 } else { 861 } else {
797 return safe < o.safe; 862 return safe < o.safe;
798 } 863 }
799 } 864 }
800 865
801 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, 866 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
802 bool safe) { 867 bool safe) {
803 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance 868 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
804 // of a SECCOMP_RET_TRAP. 869 // of a SECCOMP_RET_TRAP.
805 TrapKey key(fnc, aux, safe); 870 TrapKey key(fnc, aux, safe);
806 TrapIds::const_iterator iter = trapIds_.find(key); 871 TrapIds::const_iterator iter = trap_ids_.find(key);
807 uint16_t id; 872 uint16_t id;
808 if (iter != trapIds_.end()) { 873 if (iter != trap_ids_.end()) {
809 // We have seen this pair before. Return the same id that we assigned 874 // We have seen this pair before. Return the same id that we assigned
810 // earlier. 875 // earlier.
811 id = iter->second; 876 id = iter->second;
812 } else { 877 } else {
813 // This is a new pair. Remember it and assign a new id. 878 // This is a new pair. Remember it and assign a new id.
814 // Please note that we have to store traps in memory that doesn't get 879 // Please note that we have to store traps in memory that doesn't get
815 // deallocated when the program is shutting down. A memory leak is 880 // deallocated when the program is shutting down. A memory leak is
816 // intentional, because we might otherwise not be able to execute 881 // intentional, because we might otherwise not be able to execute
817 // system calls part way through the program shutting down 882 // system calls part way through the program shutting down
818 if (!traps_) { 883 if (!traps_) {
819 traps_ = new Traps(); 884 traps_ = new Traps();
820 } 885 }
821 if (traps_->size() >= SECCOMP_RET_DATA) { 886 if (traps_->size() >= SECCOMP_RET_DATA) {
822 // In practice, this is pretty much impossible to trigger, as there 887 // In practice, this is pretty much impossible to trigger, as there
823 // are other kernel limitations that restrict overall BPF program sizes. 888 // are other kernel limitations that restrict overall BPF program sizes.
824 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); 889 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
825 } 890 }
826 id = traps_->size() + 1; 891 id = traps_->size() + 1;
827 892
828 traps_->push_back(ErrorCode(fnc, aux, safe, id)); 893 traps_->push_back(ErrorCode(fnc, aux, safe, id));
829 trapIds_[key] = id; 894 trap_ids_[key] = id;
830 895
831 // We want to access the traps_ vector from our signal handler. But 896 // We want to access the traps_ vector from our signal handler. But
832 // we are not assured that doing so is async-signal safe. On the other 897 // we are not assured that doing so is async-signal safe. On the other
833 // hand, C++ guarantees that the contents of a vector is stored in a 898 // hand, C++ guarantees that the contents of a vector is stored in a
834 // contiguous C-style array. 899 // contiguous C-style array.
835 // So, we look up the address and size of this array outside of the 900 // So, we look up the address and size of this array outside of the
836 // signal handler, where we can safely do so. 901 // signal handler, where we can safely do so.
837 trapArray_ = &(*traps_)[0]; 902 trap_array_ = &(*traps_)[0];
838 trapArraySize_ = id; 903 trap_array_size_ = id;
839 return traps_->back(); 904 return traps_->back();
840 } 905 }
841 906
842 return ErrorCode(fnc, aux, safe, id); 907 return ErrorCode(fnc, aux, safe, id);
843 } 908 }
844 909
845 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { 910 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
846 return MakeTrap(fnc, aux, true /* Safe Trap */); 911 return MakeTrap(fnc, aux, true /* Safe Trap */);
847 } 912 }
848 913
(...skipping 13 matching lines...) Expand all
862 927
863 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { 928 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
864 // TrapFnc functions report error by following the native kernel convention 929 // TrapFnc functions report error by following the native kernel convention
865 // of returning an exit code in the range of -1..-4096. They do not try to 930 // of returning an exit code in the range of -1..-4096. They do not try to
866 // set errno themselves. The glibc wrapper that triggered the SIGSYS will 931 // set errno themselves. The glibc wrapper that triggered the SIGSYS will
867 // ultimately do so for us. 932 // ultimately do so for us.
868 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; 933 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
869 return -err; 934 return -err;
870 } 935 }
871 936
872 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { 937 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,
938 ErrorCode::Operation op, uint64_t value,
939 const ErrorCode& passed, const ErrorCode& failed) {
940 return ErrorCode(argno, width, op, value,
941 &*conds_.insert(passed).first,
942 &*conds_.insert(failed).first);
943 }
944
945 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {
873 SANDBOX_DIE(static_cast<char *>(aux)); 946 SANDBOX_DIE(static_cast<char *>(aux));
874 } 947 }
875 948
876 ErrorCode Sandbox::Kill(const char *msg) { 949 ErrorCode Sandbox::Kill(const char *msg) {
877 return Trap(bpfFailure, const_cast<char *>(msg)); 950 return Trap(BpfFailure, const_cast<char *>(msg));
878 } 951 }
879 952
880 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; 953 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
881 int Sandbox::proc_fd_ = -1; 954 int Sandbox::proc_fd_ = -1;
882 Sandbox::Evaluators Sandbox::evaluators_; 955 Sandbox::Evaluators Sandbox::evaluators_;
883 Sandbox::Traps *Sandbox::traps_ = NULL; 956 Sandbox::Traps *Sandbox::traps_ = NULL;
884 Sandbox::TrapIds Sandbox::trapIds_; 957 Sandbox::TrapIds Sandbox::trap_ids_;
885 ErrorCode *Sandbox::trapArray_ = NULL; 958 ErrorCode *Sandbox::trap_array_ = NULL;
886 size_t Sandbox::trapArraySize_ = 0; 959 size_t Sandbox::trap_array_size_ = 0;
887 bool Sandbox::has_unsafe_traps_ = false; 960 bool Sandbox::has_unsafe_traps_ = false;
961 Sandbox::Conds Sandbox::conds_;
888 962
889 } // namespace 963 } // namespace
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698