Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(68)

Side by Side Diff: content/common/sandbox_init_linux.cc

Issue 10843042: Create a class for seccomp-bpf sandboxing in content. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Correct typo. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/common/seccomp_sandbox.h" 5 #include <string>
6
7 #include "base/command_line.h"
8 #include "base/logging.h"
9 #include "content/common/sandbox_linux.h"
10 #include "content/public/common/content_switches.h"
6 #include "content/public/common/sandbox_init.h" 11 #include "content/public/common/sandbox_init.h"
7 12
8 #if defined(__i386__) || defined(__x86_64__) 13 namespace content {
9 14
10 // This is an assert for GYP 15 // TODO(jln): have call sites provide a process / policy type to
11 #if !defined(OS_LINUX) 16 // InitializeSandbox().
12 #error "Linux specific file compiled on non Linux OS!" 17 void InitializeSandbox() {
13 #endif 18 bool seccomp_legacy_started = false;
19 LinuxSandbox* linux_sandbox = LinuxSandbox::GetInstance();
20 const std::string process_type =
21 CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
22 switches::kProcessType);
14 23
15 #include <asm/unistd.h>
16 #include <dlfcn.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <linux/audit.h>
20 #include <linux/filter.h>
21 #include <signal.h>
22 #include <string.h>
23 #include <sys/prctl.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <ucontext.h>
27 #include <unistd.h>
28 24
29 #include <vector> 25 // No matter what, it's always an error to call InitializeSandbox() after
30 26 // threads have been created.
31 #include "base/command_line.h" 27 if (!linux_sandbox->IsSingleThreaded()) {
32 #include "base/file_util.h"
33 #include "base/logging.h"
34 #include "base/time.h"
35 #include "content/common/sandbox_linux.h"
36 #include "content/public/common/content_switches.h"
37 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
38
39 // These are fairly new and not defined in all headers yet.
40 #if defined(__x86_64__)
41
42 #ifndef __NR_process_vm_readv
43 #define __NR_process_vm_readv 310
44 #endif
45
46 #ifndef __NR_process_vm_writev
47 #define __NR_process_vm_writev 311
48 #endif
49
50 #elif defined(__i386__)
51
52 #ifndef __NR_process_vm_readv
53 #define __NR_process_vm_readv 347
54 #endif
55
56 #ifndef __NR_process_vm_writev
57 #define __NR_process_vm_writev 348
58 #endif
59
60 #endif
61
62 namespace {
63
64 bool IsSingleThreaded() {
65 // Possibly racy, but it's ok because this is more of a debug check to catch
66 // new threaded situations arising during development.
67 int num_threads =
68 file_util::CountFilesCreatedAfter(FilePath("/proc/self/task"),
69 base::Time::UnixEpoch());
70
71 // We pass the test if we don't know ( == 0), because the setuid sandbox
72 // will prevent /proc access in some contexts.
73 return num_threads == 1 || num_threads == 0;
74 }
75
76 inline bool IsChromeOS() {
77 #if defined(OS_CHROMEOS)
78 return true;
79 #else
80 return false;
81 #endif
82 }
83
84 void LogSandboxStarted(const std::string& sandbox_name,
85 const std::string& process_type) {
86 const std::string activated_sandbox =
87 "Activated " + sandbox_name + " sandbox for process type: " +
88 process_type + ".";
89 if (IsChromeOS()) {
90 LOG(WARNING) << activated_sandbox;
91 } else {
92 VLOG(1) << activated_sandbox;
93 }
94 }
95
96 intptr_t CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux) {
97 int syscall = args.nr;
98 if (syscall >= 1024)
99 syscall = 0;
100 // Encode 8-bits of the 1st two arguments too, so we can discern which socket
101 // type, which fcntl, ... etc., without being likely to hit a mapped
102 // address.
103 // Do not encode more bits here without thinking about increasing the
104 // likelihood of collision with mapped pages.
105 syscall |= ((args.args[0] & 0xffUL) << 12);
106 syscall |= ((args.args[1] & 0xffUL) << 20);
107 // Purposefully dereference the syscall as an address so it'll show up very
108 // clearly and easily in crash dumps.
109 volatile char* addr = reinterpret_cast<volatile char*>(syscall);
110 *addr = '\0';
111 // In case we hit a mapped address, hit the null page with just the syscall,
112 // for paranoia.
113 syscall &= 0xfffUL;
114 addr = reinterpret_cast<volatile char*>(syscall);
115 *addr = '\0';
116 for (;;)
117 _exit(1);
118 }
119
120 // TODO(jln) we need to restrict the first parameter!
121 bool IsKillSyscall(int sysno) {
122 switch (sysno) {
123 case __NR_kill:
124 case __NR_tkill:
125 case __NR_tgkill:
126 return true;
127 default:
128 return false;
129 }
130 }
131
132 bool IsGettimeSyscall(int sysno) {
133 switch (sysno) {
134 case __NR_clock_gettime:
135 case __NR_gettimeofday:
136 case __NR_time:
137 return true;
138 default:
139 return false;
140 }
141 }
142
143 bool IsFileSystemSyscall(int sysno) {
144 switch (sysno) {
145 case __NR_open:
146 case __NR_openat:
147 case __NR_execve:
148 case __NR_access:
149 case __NR_mkdir:
150 case __NR_mkdirat:
151 case __NR_readlink:
152 case __NR_readlinkat:
153 case __NR_stat:
154 case __NR_lstat:
155 case __NR_chdir:
156 case __NR_mknod:
157 case __NR_mknodat:
158 return true;
159 default:
160 return false;
161 }
162 }
163
164 bool IsAcceleratedVideoDecodeEnabled() {
165 // Accelerated video decode is currently enabled on Chrome OS,
166 // but not on Linux: crbug.com/137247.
167 bool is_enabled = IsChromeOS();
168
169 const CommandLine& command_line = *CommandLine::ForCurrentProcess();
170 is_enabled = is_enabled &&
171 !command_line.HasSwitch(switches::kDisableAcceleratedVideoDecode);
172
173 return is_enabled;
174 }
175
176 static const char kDriRcPath[] = "/etc/drirc";
177
178 // TODO(jorgelo): limited to /etc/drirc for now, extend this to cover
179 // other sandboxed file access cases.
180 int OpenWithCache(const char* pathname, int flags) {
181 static int drircfd = -1;
182 static bool do_open = true;
183 int res = -1;
184
185 if (strcmp(pathname, kDriRcPath) == 0 && flags == O_RDONLY) {
186 if (do_open) {
187 drircfd = open(pathname, flags);
188 do_open = false;
189 res = drircfd;
190 } else {
191 // dup() man page:
192 // "After a successful return from one of these system calls,
193 // the old and new file descriptors may be used interchangeably.
194 // They refer to the same open file description and thus share
195 // file offset and file status flags; for example, if the file offset
196 // is modified by using lseek(2) on one of the descriptors,
197 // the offset is also changed for the other."
198 // Since |drircfd| can be dup()'ed and read many times, we need to
199 // lseek() it to the beginning of the file before returning.
200 // We assume the caller will not keep more than one fd open at any
201 // one time. Intel driver code in Mesa that parses /etc/drirc does
202 // open()/read()/close() in the same function.
203 if (drircfd < 0) {
204 errno = ENOENT;
205 return -1;
206 }
207 int newfd = dup(drircfd);
208 if (newfd < 0) {
209 errno = ENOMEM;
210 return -1;
211 }
212 if (lseek(newfd, 0, SEEK_SET) == static_cast<off_t>(-1)) {
213 (void) HANDLE_EINTR(close(newfd));
214 errno = ENOMEM;
215 return -1;
216 }
217 res = newfd;
218 }
219 } else {
220 res = open(pathname, flags);
221 }
222
223 return res;
224 }
225
226 // We allow the GPU process to open /etc/drirc because it's needed by Mesa.
227 // OpenWithCache() has been called before enabling the sandbox, and has cached
228 // a file descriptor for /etc/drirc.
229 intptr_t GpuOpenSIGSYS_Handler(const struct arch_seccomp_data& args,
230 void* aux) {
231 uint64_t arg0 = args.args[0];
232 uint64_t arg1 = args.args[1];
233 const char* pathname = reinterpret_cast<const char*>(arg0);
234 int flags = static_cast<int>(arg1);
235
236 if (strcmp(pathname, kDriRcPath) == 0) {
237 int ret = OpenWithCache(pathname, flags);
238 return (ret == -1) ? -errno : ret;
239 } else {
240 return -ENOENT;
241 }
242 }
243
244 #if defined(__x86_64__)
245 // x86_64 only because it references system calls that are multiplexed on IA32.
246 playground2::Sandbox::ErrorCode GpuProcessPolicy_x86_64(int sysno) {
247 switch(sysno) {
248 case __NR_read:
249 case __NR_ioctl:
250 case __NR_poll:
251 case __NR_epoll_wait:
252 case __NR_recvfrom:
253 case __NR_write:
254 case __NR_writev:
255 case __NR_gettid:
256 case __NR_sched_yield: // Nvidia binary driver.
257
258 case __NR_futex:
259 case __NR_madvise:
260 case __NR_sendmsg:
261 case __NR_recvmsg:
262 case __NR_eventfd2:
263 case __NR_pipe:
264 case __NR_mmap:
265 case __NR_mprotect:
266 case __NR_clone: // TODO(jln) restrict flags.
267 case __NR_set_robust_list:
268 case __NR_getuid:
269 case __NR_geteuid:
270 case __NR_getgid:
271 case __NR_getegid:
272 case __NR_epoll_create:
273 case __NR_fcntl:
274 case __NR_socketpair:
275 case __NR_epoll_ctl:
276 case __NR_prctl:
277 case __NR_fstat:
278 case __NR_close:
279 case __NR_restart_syscall:
280 case __NR_rt_sigreturn:
281 case __NR_brk:
282 case __NR_rt_sigprocmask:
283 case __NR_munmap:
284 case __NR_dup:
285 case __NR_mlock:
286 case __NR_munlock:
287 case __NR_exit:
288 case __NR_exit_group:
289 case __NR_lseek:
290 case __NR_getpid: // Nvidia binary driver.
291 case __NR_getppid: // ATI binary driver.
292 case __NR_shutdown: // Virtual driver.
293 case __NR_rt_sigaction: // Breakpad signal handler.
294 return playground2::Sandbox::SB_ALLOWED;
295 case __NR_socket:
296 return EACCES; // Nvidia binary driver.
297 case __NR_fchmod:
298 return EPERM; // ATI binary driver.
299 case __NR_open:
300 // Accelerated video decode is enabled by default only on Chrome OS.
301 if (IsAcceleratedVideoDecodeEnabled()) {
302 // Accelerated video decode needs to open /dev/dri/card0, and
303 // dup()'ing an already open file descriptor does not work.
304 // Allow open() even though it severely weakens the sandbox,
305 // to test the sandboxing mechanism in general.
306 // TODO(jorgelo): remove this once we solve the libva issue.
307 return playground2::Sandbox::SB_ALLOWED;
308 } else {
309 // Hook open() in the GPU process to allow opening /etc/drirc,
310 // needed by Mesa.
311 // The hook needs dup(), lseek(), and close() to be allowed.
312 return playground2::Sandbox::ErrorCode(GpuOpenSIGSYS_Handler, NULL);
313 }
314 default:
315 if (IsGettimeSyscall(sysno) ||
316 IsKillSyscall(sysno)) { // GPU watchdog.
317 return playground2::Sandbox::SB_ALLOWED;
318 }
319 // Generally, filename-based syscalls will fail with ENOENT to behave
320 // similarly to a possible future setuid sandbox.
321 if (IsFileSystemSyscall(sysno)) {
322 return ENOENT;
323 }
324 // In any other case crash the program with our SIGSYS handler
325 return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL);
326 }
327 }
328
329 // x86_64 only because it references system calls that are multiplexed on IA32.
330 playground2::Sandbox::ErrorCode FlashProcessPolicy_x86_64(int sysno) {
331 switch (sysno) {
332 case __NR_futex:
333 case __NR_write:
334 case __NR_epoll_wait:
335 case __NR_read:
336 case __NR_times:
337 case __NR_clone: // TODO(jln): restrict flags.
338 case __NR_set_robust_list:
339 case __NR_getuid:
340 case __NR_geteuid:
341 case __NR_getgid:
342 case __NR_getegid:
343 case __NR_epoll_create:
344 case __NR_fcntl:
345 case __NR_socketpair:
346 case __NR_pipe:
347 case __NR_epoll_ctl:
348 case __NR_gettid:
349 case __NR_prctl:
350 case __NR_fstat:
351 case __NR_sendmsg:
352 case __NR_mmap:
353 case __NR_munmap:
354 case __NR_mprotect:
355 case __NR_madvise:
356 case __NR_rt_sigaction:
357 case __NR_rt_sigprocmask:
358 case __NR_wait4:
359 case __NR_exit_group:
360 case __NR_exit:
361 case __NR_rt_sigreturn:
362 case __NR_restart_syscall:
363 case __NR_close:
364 case __NR_recvmsg:
365 case __NR_lseek:
366 case __NR_brk:
367 case __NR_sched_yield:
368 case __NR_shutdown:
369 case __NR_sched_getaffinity:
370 case __NR_sched_setscheduler:
371 case __NR_dup: // Flash Access.
372 // These are under investigation, and hopefully not here for the long term.
373 case __NR_shmctl:
374 case __NR_shmat:
375 case __NR_shmdt:
376 return playground2::Sandbox::SB_ALLOWED;
377 case __NR_ioctl:
378 return ENOTTY; // Flash Access.
379 case __NR_socket:
380 return EACCES;
381 default:
382 if (IsGettimeSyscall(sysno) ||
383 IsKillSyscall(sysno)) {
384 return playground2::Sandbox::SB_ALLOWED;
385 }
386 if (IsFileSystemSyscall(sysno)) {
387 return ENOENT;
388 }
389 // In any other case crash the program with our SIGSYS handler.
390 return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL);
391 }
392 }
393 #endif
394
395 playground2::Sandbox::ErrorCode BlacklistPtracePolicy(int sysno) {
396 if (sysno < static_cast<int>(MIN_SYSCALL) ||
397 sysno > static_cast<int>(MAX_SYSCALL)) {
398 // TODO(jln) we should not have to do that in a trivial policy.
399 return ENOSYS;
400 }
401 switch (sysno) {
402 case __NR_ptrace:
403 case __NR_process_vm_readv:
404 case __NR_process_vm_writev:
405 case __NR_migrate_pages:
406 case __NR_move_pages:
407 return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL);
408 default:
409 return playground2::Sandbox::SB_ALLOWED;
410 }
411 }
412
413 // Allow all syscalls.
414 // This will still deny x32 or IA32 calls in 64 bits mode or
415 // 64 bits system calls in compatibility mode.
416 playground2::Sandbox::ErrorCode AllowAllPolicy(int sysno) {
417 if (sysno < static_cast<int>(MIN_SYSCALL) ||
418 sysno > static_cast<int>(MAX_SYSCALL)) {
419 // TODO(jln) we should not have to do that in a trivial policy.
420 return ENOSYS;
421 } else {
422 return playground2::Sandbox::SB_ALLOWED;
423 }
424 }
425
426 // Warms up/preloads resources needed by the policies.
427 void WarmupPolicy(playground2::Sandbox::EvaluateSyscall policy) {
428 #if defined(__x86_64__)
429 if (policy == GpuProcessPolicy_x86_64) {
430 OpenWithCache(kDriRcPath, O_RDONLY);
431 // Accelerated video decode dlopen()'s this shared object
432 // inside the sandbox, so preload it now.
433 // TODO(jorgelo): generalize this to other platforms.
434 if (IsAcceleratedVideoDecodeEnabled()) {
435 const char kI965DrvVideoPath_64[] =
436 "/usr/lib64/va/drivers/i965_drv_video.so";
437 dlopen(kI965DrvVideoPath_64, RTLD_NOW|RTLD_GLOBAL|RTLD_NODELETE);
438 }
439 }
440 #endif
441 }
442
443 // Is the sandbox fully disabled for this process?
444 bool ShouldDisableBpfSandbox(const CommandLine& command_line,
445 const std::string& process_type) {
446 if (command_line.HasSwitch(switches::kNoSandbox) ||
447 command_line.HasSwitch(switches::kDisableSeccompFilterSandbox)) {
448 return true;
449 }
450
451 if (process_type == switches::kGpuProcess) {
452 // The GPU sandbox is disabled by default in ChromeOS, enabled by default on
453 // generic Linux.
454 // TODO(jorgelo): when we feel comfortable, make this a policy decision
455 // instead. (i.e. move this to GetProcessSyscallPolicy) and return an
456 // AllowAllPolicy for lack of "--enable-gpu-sandbox".
457 bool should_disable;
458 if (IsChromeOS()) {
459 should_disable = true;
460 } else {
461 should_disable = false;
462 }
463
464 if (command_line.HasSwitch(switches::kEnableGpuSandbox))
465 should_disable = false;
466 if (command_line.HasSwitch(switches::kDisableGpuSandbox))
467 should_disable = true;
468 return should_disable;
469 }
470
471 return false;
472 }
473
474 playground2::Sandbox::EvaluateSyscall GetProcessSyscallPolicy(
475 const CommandLine& command_line,
476 const std::string& process_type) {
477 #if defined(__x86_64__)
478 if (process_type == switches::kGpuProcess) {
479 return GpuProcessPolicy_x86_64;
480 }
481
482 if (process_type == switches::kPpapiPluginProcess) {
483 // TODO(jln): figure out what to do with non-Flash PPAPI
484 // out-of-process plug-ins.
485 return FlashProcessPolicy_x86_64;
486 }
487
488 if (process_type == switches::kRendererProcess ||
489 process_type == switches::kWorkerProcess) {
490 return BlacklistPtracePolicy;
491 }
492 NOTREACHED();
493 // This will be our default if we need one.
494 return AllowAllPolicy;
495 #else
496 // On IA32, we only have a small blacklist at the moment.
497 (void) process_type;
498 return BlacklistPtracePolicy;
499 #endif // __x86_64__
500 }
501
502 // Initialize the seccomp-bpf sandbox.
503 bool InitializeBpfSandbox_x86(const CommandLine& command_line,
504 const std::string& process_type) {
505 if (ShouldDisableBpfSandbox(command_line, process_type))
506 return false;
507
508 // No matter what, InitializeSandbox() should always be called before threads
509 // are started.
510 // Note: IsSingleThreaded() will be true if /proc is not accessible!
511 if (!IsSingleThreaded()) {
512 std::string error_message = "InitializeSandbox() called with multiple " 28 std::string error_message = "InitializeSandbox() called with multiple "
513 "threads in process " + process_type; 29 "threads in process " + process_type;
514 // TODO(jln): change this into a CHECK() once we are more comfortable it 30 // TODO(jln): change this into a CHECK() once we are more comfortable it
515 // does not trigger. 31 // does not trigger.
516 // On non-DEBUG build, we still log an error
517 LOG(ERROR) << error_message; 32 LOG(ERROR) << error_message;
518 return false; 33 return;
519 } 34 }
520 35
521 // TODO(jln): find a way for the Zygote processes under the setuid sandbox to
522 // have a /proc fd and pass it here.
523 // Passing -1 as the /proc fd since we have no special way to have it for
524 // now.
525 if (playground2::Sandbox::supportsSeccompSandbox(-1) !=
526 playground2::Sandbox::STATUS_AVAILABLE) {
527 return false;
528 }
529
530 playground2::Sandbox::EvaluateSyscall SyscallPolicy =
531 GetProcessSyscallPolicy(command_line, process_type);
532
533 // Warms up resources needed by the policy we're about to enable.
534 WarmupPolicy(SyscallPolicy);
535
536 playground2::Sandbox::setSandboxPolicy(SyscallPolicy, NULL);
537 playground2::Sandbox::startSandbox();
538
539 return true;
540 }
541
542 } // anonymous namespace
543
544 #endif // defined(__i386__) || defined(__x86_64__)
545
546 namespace content {
547
548 void InitializeSandbox() {
549 #if defined(__i386__) || defined(__x86_64__)
550 const CommandLine& command_line = *CommandLine::ForCurrentProcess();
551 const std::string process_type =
552 command_line.GetSwitchValueASCII(switches::kProcessType);
553 bool seccomp_legacy_started = false;
554 bool seccomp_bpf_started = false;
555
556 // First, try to enable seccomp-legacy. 36 // First, try to enable seccomp-legacy.
557 seccomp_legacy_started = 37 seccomp_legacy_started = linux_sandbox->StartSeccompLegacy(process_type);
558 LinuxSandbox::GetInstance()->StartSeccompLegacy(process_type);
559 if (seccomp_legacy_started)
560 LogSandboxStarted("seccomp-legacy", process_type);
561 38
562 // Then, try to enable seccomp-bpf. 39 // Then, try to enable seccomp-bpf.
563 // If seccomp-legacy is enabled, seccomp-bpf initialization will crash 40 // If seccomp-legacy is enabled, seccomp-bpf initialization will crash
564 // instead of failing gracefully. 41 // instead of failing gracefully.
565 // TODO(markus): fix this (crbug.com/139872). 42 // TODO(markus): fix this (crbug.com/139872).
566 if (!seccomp_legacy_started) { 43 if (!seccomp_legacy_started) {
567 seccomp_bpf_started = 44 linux_sandbox->StartSeccompBpf(process_type);
568 InitializeBpfSandbox_x86(command_line, process_type);
569 } 45 }
570 if (seccomp_bpf_started)
571 LogSandboxStarted("seccomp-bpf", process_type);
572 #endif
573 } 46 }
574 47
575 } // namespace content 48 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698