|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h" | |
6 | |
7 // The kernel gives us a sandbox, we turn it into a playground :-) | |
8 // This is version 2 of the playground; version 1 was built on top of | |
9 // pre-BPF seccomp mode. | |
10 namespace playground2 { | |
11 | |
12 | |
13 static Sandbox::ErrorCode probeEvaluator(int signo) { | |
14 switch (signo) { | |
15 case __NR_getpid: | |
16 // Return EPERM so that we can check that the filter actually ran. | |
17 return (Sandbox::ErrorCode)EPERM; | |
18 case __NR_exit_group: | |
19 // Allow exit() with a non-default return code. | |
20 return Sandbox::SB_ALLOWED; | |
21 default: | |
22 // Make everything else fail in an easily recognizable way. | |
23 return (Sandbox::ErrorCode)EINVAL; | |
24 } | |
25 } | |
26 | |
27 bool Sandbox::supportsSeccompSandbox(int proc_fd) { | |
28 if (status_ == STATUS_UNKNOWN) { | |
29 if (!isSingleThreaded(proc_fd)) { | |
30 status_ = STATUS_UNSUPPORTED; | |
Chris Evans
2012/06/01 20:07:55
Should we have a specific status for that? As a us
Markus (顧孟勤)
2012/06/01 21:38:52
I see your point. While this has never come up in
| |
31 } else { | |
32 sigset_t oldMask, newMask; | |
33 sigfillset(&newMask); | |
Chris Evans
2012/06/01 20:07:55
Although this realistically isn't going to fail, s
Markus (顧孟勤)
2012/06/01 21:38:52
Done.
| |
34 if (!sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | |
35 pid_t pid = fork(); | |
Chris Evans
2012/06/01 20:07:55
I wonder what the overhead is of this check? We'll
Markus (顧孟勤)
2012/06/01 21:38:52
That's why the result is cached in status_. You sh
| |
36 if (pid >= 0) { | |
37 if (!pid) { | |
38 // Test a very simple sandbox policy to verify that we can | |
39 // successfully turn on sandboxing. | |
40 setSandboxPolicy(probeEvaluator, NULL); | |
41 setProcFd(proc_fd); | |
42 startSandbox(); | |
43 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | |
44 syscall(__NR_exit_group, (intptr_t)100); | |
Chris Evans
2012/06/01 20:07:55
Why use syscall(exit_group) here but _exit() below
Markus (顧孟勤)
2012/06/01 21:38:52
Good call. This was a mistake when I changed the c
| |
45 } | |
46 for (;;) { | |
Chris Evans
2012/06/01 20:07:55
Why is the infinite loop needed?
Markus (顧孟勤)
2012/06/01 21:38:52
We really absolutely do not want to fall through.
| |
47 _exit(1); | |
48 } | |
49 } | |
50 sigprocmask(SIG_SETMASK, &oldMask, NULL); | |
Chris Evans
2012/06/01 20:07:55
Check return values where possible.
| |
51 int status; | |
52 HANDLE_EINTR(waitpid(pid, &status, 0)); | |
Chris Evans
2012/06/01 20:07:55
Does this look at status without checking that wai
Markus (顧孟勤)
2012/06/01 21:38:52
We really should never see an error here. But you
| |
53 status_ = WIFEXITED(status) && WEXITSTATUS(status) == 100 | |
54 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; | |
55 } else { | |
Chris Evans
2012/06/01 20:07:55
Is this branch the case that fork() fails? Is it y
Markus (顧孟勤)
2012/06/01 21:38:52
Take a look at the new logic and let me know if yo
| |
56 sigprocmask(SIG_SETMASK, &oldMask, NULL); | |
57 } | |
58 } | |
59 } | |
60 } | |
61 return status_ == STATUS_AVAILABLE; | |
62 } | |
63 | |
64 void Sandbox::setProcFd(int proc_fd) { | |
65 proc_fd_ = proc_fd; | |
66 } | |
67 | |
68 void Sandbox::startSandbox() { | |
69 if (status_ == STATUS_UNSUPPORTED) { | |
Chris Evans
2012/06/01 20:07:55
What should we do for STATUS_UNKNOWN?
Markus (顧孟勤)
2012/06/01 21:38:52
STATUS_UNKNOWN is perfectly OK. This would happen
| |
70 die("Trying to start sandbox, even though it is known to be unavailable"); | |
71 } | |
72 if (proc_fd_ < 0) { | |
73 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | |
74 } | |
75 if (proc_fd_ < 0) { | |
76 // For now, continue in degraded mode, if we can't access /proc. | |
77 // In the future, we might want to tighten this requirement. | |
78 } | |
79 if (!isSingleThreaded(proc_fd_)) { | |
80 die("Cannot start sandbox, if process is already multi-threaded"); | |
81 } | |
82 | |
83 // We no longer need access to any files in /proc. We want to do this | |
84 // before installing the filters, just in case that our policy denies | |
85 // close(). | |
86 if (proc_fd_ >= 0) { | |
87 if (HANDLE_EINTR(close(proc_fd_))) { | |
88 die("Failed to close file descriptor for /proc"); | |
89 } | |
90 proc_fd_ = -1; | |
91 } | |
92 | |
93 // Install the filters. | |
94 installFilter(); | |
95 } | |
96 | |
97 bool Sandbox::isSingleThreaded(int proc_fd) { | |
98 if (proc_fd < 0) { | |
99 // Cannot determine whether program is single-threaded. Hope for | |
100 // the best... | |
101 return true; | |
102 } | |
103 | |
104 struct stat sb; | |
105 int task = -1; | |
106 if (proc_fd < 0 || | |
107 (task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | |
108 fstat(task, &sb) != 0 || | |
109 sb.st_nlink != 3 || | |
110 HANDLE_EINTR(close(task))) { | |
111 if (task >= 0) { | |
112 HANDLE_EINTR(close(task)); | |
113 } | |
114 return false; | |
115 } | |
116 return true; | |
117 } | |
118 | |
119 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | |
120 EvaluateArguments argumentEvaluator) { | |
121 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | |
122 } | |
123 | |
124 void Sandbox::installFilter() { | |
125 // Verify that the user pushed a policy. | |
126 if (evaluators_.empty()) { | |
127 filter_failed: | |
128 die("Failed to configure system call filters"); | |
129 } | |
130 | |
131 // Set new SIGSYS handler | |
132 struct sigaction sa; | |
133 memset(&sa, 0, sizeof(sa)); | |
134 sa.sa_sigaction = &sigSys; | |
135 sa.sa_flags = SA_SIGINFO; | |
136 if (sigaction(SIGSYS, &sa, NULL) < 0) { | |
137 goto filter_failed; | |
138 } | |
139 | |
140 // Unmask SIGSYS | |
141 sigset_t mask; | |
142 sigemptyset(&mask); | |
143 sigaddset(&mask, SIGSYS); | |
144 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | |
145 goto filter_failed; | |
146 } | |
147 | |
148 // We can't handle stacked evaluators, yet. We'll get there eventually | |
149 // though. Hang tight. | |
150 if (evaluators_.size() != 1) { | |
151 die("Not implemented"); | |
152 } | |
153 | |
154 // If the architecture doesn't match SECCOMP_ARCH, disallow the | |
155 // system call. | |
156 std::vector<struct sock_filter> program; | |
157 program.push_back((struct sock_filter) | |
158 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, | |
159 offsetof(struct arch_seccomp_data, arch))); | |
160 program.push_back((struct sock_filter) | |
161 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | |
162 program.push_back((struct sock_filter) | |
163 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_DENY)); | |
164 | |
165 // Grab the system call number, so that we can implement jump tables. | |
166 program.push_back((struct sock_filter) | |
167 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr))); | |
168 | |
169 // Evaluate all possible system calls and depending on their | |
170 // exit codes generate a BPF filter. | |
171 // This is very inefficient right now. We need to be much smarter | |
172 // eventually. | |
Jorge Lucangeli Obes
2012/06/01 03:06:18
Definitely. In particular, we cannot keep "hot" sy
Chris Evans
2012/06/01 20:07:55
Agree with Jorge, checking e.g. syscall 200 will b
Markus (顧孟勤)
2012/06/01 21:38:52
That's the plan. In my first attempt at writing a
| |
173 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
174 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) { | |
175 ErrorCode err = evaluateSyscall(sysnum); | |
176 int ret; | |
177 switch (err) { | |
178 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | |
179 die("Not implemented"); | |
180 case SB_TRAP: | |
181 ret = SECCOMP_RET_TRAP; | |
182 break; | |
183 case SB_ALLOWED: | |
184 ret = SECCOMP_RET_ALLOW; | |
185 break; | |
186 default: | |
187 ret = SECCOMP_RET_ERRNO + err; | |
Chris Evans
2012/06/01 20:07:55
For paranoid, probably check that "err" is within
Markus (顧孟勤)
2012/06/01 21:38:52
I like how you are thinking :-)
| |
188 break; | |
189 } | |
190 program.push_back((struct sock_filter) | |
191 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
192 program.push_back((struct sock_filter) | |
193 BPF_STMT(BPF_RET+BPF_K, ret)); | |
194 } | |
195 | |
196 // Everything that isn't allowed is forbidden. Eventually, we would | |
197 // like to have a way to log forbidden calls, when in debug mode. | |
198 program.push_back((struct sock_filter) | |
199 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_DENY)); | |
200 | |
201 // Install BPF filter program | |
202 const struct sock_fprog prog = { program.size(), &program[0] }; | |
203 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
204 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
205 goto filter_failed; | |
206 } | |
207 | |
208 return; | |
209 } | |
210 | |
211 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | |
212 if (info->si_code != SYS_SECCOMP || !void_context) { | |
213 die("Unexpected SIGSYS received"); | |
Chris Evans
2012/06/01 20:07:55
I'd also check that nr==SIGSYS
Markus (顧孟勤)
2012/06/01 21:38:52
Done.
| |
214 } | |
215 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | |
216 int old_errno = errno; | |
217 void *rc = | |
218 (void *)(intptr_t)-(int)(SECCOMP_RET_DENY & SECCOMP_RET_DATA); | |
Chris Evans
2012/06/01 20:07:55
I don't understand that; may be worth a comment?
Markus (顧孟勤)
2012/06/01 21:38:52
I added a comment. No idea whether it makes things
| |
219 | |
220 // This is where we can add extra code to handle complex system calls. | |
221 // ... | |
222 | |
223 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc); | |
224 errno = old_errno; | |
225 return; | |
226 } | |
227 | |
228 | |
229 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | |
230 int Sandbox::proc_fd_ = -1; | |
231 std::vector<std::pair<Sandbox::EvaluateSyscall, | |
232 Sandbox::EvaluateArguments> > Sandbox::evaluators_; | |
233 | |
234 } // namespace | |
OLD | NEW |