|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h" | |
6 | |
7 // The kernel gives us a sandbox, we turn it into a playground :-) | |
8 // This is version 2 of the playground; version 1 was built on top of | |
9 // pre-BPF seccomp mode. | |
10 namespace playground2 { | |
11 | |
12 static Sandbox::ErrorCode probeEvaluator(int signo) { | |
13 switch (signo) { | |
14 case __NR_getpid: | |
15 // Return EPERM so that we can check that the filter actually ran. | |
16 return (Sandbox::ErrorCode)EPERM; | |
17 case __NR_exit_group: | |
18 // Allow exit() with a non-default return code. | |
19 return Sandbox::SB_ALLOWED; | |
20 default: | |
21 // Make everything else fail in an easily recognizable way. | |
22 return (Sandbox::ErrorCode)EINVAL; | |
23 } | |
24 } | |
25 | |
26 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { | |
27 // It the sandbox is currently active, we clearly must have support for | |
28 // sandboxing. | |
29 if (status_ == STATUS_ENABLED) { | |
30 return status_; | |
31 } | |
32 | |
33 // Even if the sandbox was previously available, something might have | |
34 // changed in our run-time environment. Check one more time. | |
35 if (status_ == STATUS_AVAILABLE) { | |
36 if (!isSingleThreaded(proc_fd)) { | |
37 status_ = STATUS_UNAVAILABLE; | |
38 } | |
39 return status_; | |
40 } | |
41 | |
42 // If we previously checked and we determined that kernel support was | |
43 // available but the current state of the run-time environment was | |
44 // incompatible with the sandbox, we want to perform the full test again. | |
45 // This deals with more subtle incompatibilities in the run-time | |
46 // environment. | |
47 if (status_ == STATUS_UNAVAILABLE && | |
48 isSingleThreaded(proc_fd)) { | |
Chris Evans
2012/06/04 19:17:26
Don't we go straight to STATUS_AVAILABLE at this t
Markus (顧孟勤)
2012/06/04 21:13:40
OK, I changed the code.
This is a little fragile,
| |
49 status_ == STATUS_UNKNOWN; | |
50 } | |
51 | |
52 // If we have not previously checked for availability of the sandbox or if | |
53 // we otherwise don't believe to have a good cached value, we have to | |
54 // perform a thorough check now. | |
55 if (status_ == STATUS_UNKNOWN) { | |
Chris Evans
2012/06/04 19:17:26
This whole function is looking much clearer now, a
Markus (顧孟勤)
2012/06/04 21:13:40
Done.
| |
56 sigset_t oldMask, newMask; | |
57 if (!sigfillset(&newMask) && | |
58 !sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | |
Chris Evans
2012/06/04 19:17:26
This is another place where you can avoid the inde
Markus (顧孟勤)
2012/06/04 21:13:40
Done.
| |
59 pid_t pid = fork(); | |
60 if (pid >= 0) { | |
61 if (!pid) { | |
62 // Test a very simple sandbox policy to verify that we can | |
63 // successfully turn on sandboxing. | |
64 suppressLogging_ = true; | |
65 setSandboxPolicy(probeEvaluator, NULL); | |
66 setProcFd(proc_fd); | |
67 startSandbox(); | |
68 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | |
69 syscall(__NR_exit_group, (intptr_t)100); | |
70 } | |
71 die(NULL); | |
Chris Evans
2012/06/04 19:17:26
I missed what we planned to do about the die(NULL)
Markus (顧孟勤)
2012/06/04 21:13:40
die(NULL) is explicitly safe and support. For all
| |
72 } | |
73 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | |
74 die("sigprocmask() failed"); | |
75 } | |
76 int status; | |
77 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { | |
78 status_ = STATUS_UNAVAILABLE; | |
Chris Evans
2012/06/04 19:17:26
Maybe use die() here? We know we forked just a sin
Markus (顧孟勤)
2012/06/04 21:13:40
Done.
| |
79 return status_; | |
80 } | |
81 status_ = WIFEXITED(status) && WEXITSTATUS(status) == 100 | |
82 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; | |
83 } else { | |
84 // fork() failed. Consider this a temporary failure returning | |
85 // STATUS_UNSUPPORTED. | |
Markus (顧孟勤)
2012/06/04 21:13:40
In light of all the other changes, I felt if safer
| |
86 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | |
87 // This should never happen. Really no good way to recover. | |
88 die("sigprocmask() failed"); | |
89 } | |
90 } | |
91 } | |
92 // As we are performing our tests from a child process, the run-time | |
93 // environment that is visible to the sandbox is always guaranteed to be | |
94 // single-threaded. Let's check here whether the caller is single- | |
95 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. | |
96 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { | |
97 status_ = STATUS_UNAVAILABLE; | |
98 } | |
99 } | |
100 return status_; | |
101 } | |
102 | |
103 void Sandbox::setProcFd(int proc_fd) { | |
104 proc_fd_ = proc_fd; | |
105 } | |
106 | |
107 void Sandbox::startSandbox() { | |
108 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | |
109 die("Trying to start sandbox, even though it is known to be unavailable"); | |
110 } else if (status_ == STATUS_ENABLED) { | |
111 die("Cannot start sandbox recursively. Use multiple calls to " | |
112 "setSandboxPolicy() to stack policies instead"); | |
113 } | |
114 if (proc_fd_ < 0) { | |
115 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | |
116 } | |
117 if (proc_fd_ < 0) { | |
118 // For now, continue in degraded mode, if we can't access /proc. | |
119 // In the future, we might want to tighten this requirement. | |
120 } | |
121 if (!isSingleThreaded(proc_fd_)) { | |
122 die("Cannot start sandbox, if process is already multi-threaded"); | |
123 } | |
124 | |
125 // We no longer need access to any files in /proc. We want to do this | |
126 // before installing the filters, just in case that our policy denies | |
127 // close(). | |
128 if (proc_fd_ >= 0) { | |
129 if (HANDLE_EINTR(close(proc_fd_))) { | |
130 die("Failed to close file descriptor for /proc"); | |
131 } | |
132 proc_fd_ = -1; | |
133 } | |
134 | |
135 // Install the filters. | |
136 installFilter(); | |
137 | |
138 // We are now inside the sandbox. | |
139 status_ = STATUS_ENABLED; | |
140 } | |
141 | |
142 bool Sandbox::isSingleThreaded(int proc_fd) { | |
143 if (proc_fd < 0) { | |
144 // Cannot determine whether program is single-threaded. Hope for | |
145 // the best... | |
146 return true; | |
147 } | |
148 | |
149 struct stat sb; | |
150 int task = -1; | |
151 if (proc_fd < 0 || | |
Chris Evans
2012/06/04 19:17:26
Don't need to check proc_fd, you did it just above
Markus (顧孟勤)
2012/06/04 21:13:40
Done.
| |
152 (task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | |
153 fstat(task, &sb) != 0 || | |
154 sb.st_nlink != 3 || | |
155 HANDLE_EINTR(close(task))) { | |
156 if (task >= 0) { | |
157 HANDLE_EINTR(close(task)); | |
158 } | |
159 return false; | |
160 } | |
161 return true; | |
162 } | |
163 | |
164 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | |
165 EvaluateArguments argumentEvaluator) { | |
166 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | |
167 } | |
168 | |
169 void Sandbox::installFilter() { | |
170 // Verify that the user pushed a policy. | |
171 if (evaluators_.empty()) { | |
172 filter_failed: | |
173 die("Failed to configure system call filters"); | |
174 } | |
175 | |
176 // Set new SIGSYS handler | |
177 struct sigaction sa; | |
178 memset(&sa, 0, sizeof(sa)); | |
179 sa.sa_sigaction = &sigSys; | |
180 sa.sa_flags = SA_SIGINFO; | |
181 if (sigaction(SIGSYS, &sa, NULL) < 0) { | |
182 goto filter_failed; | |
183 } | |
184 | |
185 // Unmask SIGSYS | |
186 sigset_t mask; | |
187 sigemptyset(&mask); | |
188 sigaddset(&mask, SIGSYS); | |
189 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | |
190 goto filter_failed; | |
191 } | |
192 | |
193 // We can't handle stacked evaluators, yet. We'll get there eventually | |
194 // though. Hang tight. | |
195 if (evaluators_.size() != 1) { | |
196 die("Not implemented"); | |
197 } | |
198 | |
199 // If the architecture doesn't match SECCOMP_ARCH, disallow the | |
200 // system call. | |
201 std::vector<struct sock_filter> program; | |
202 program.push_back((struct sock_filter) | |
203 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, | |
204 offsetof(struct arch_seccomp_data, arch))); | |
205 program.push_back((struct sock_filter) | |
206 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | |
207 program.push_back((struct sock_filter) | |
208 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
209 | |
210 // Grab the system call number, so that we can implement jump tables. | |
211 program.push_back((struct sock_filter) | |
212 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr))); | |
213 | |
214 // Evaluate all possible system calls and depending on their | |
215 // exit codes generate a BPF filter. | |
216 // This is very inefficient right now. We need to be much smarter | |
217 // eventually. | |
218 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
219 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) { | |
220 ErrorCode err = evaluateSyscall(sysnum); | |
221 int ret; | |
222 switch (err) { | |
223 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | |
224 die("Not implemented"); | |
225 case SB_TRAP: | |
226 ret = SECCOMP_RET_TRAP; | |
227 break; | |
228 case SB_ALLOWED: | |
229 ret = SECCOMP_RET_ALLOW; | |
230 break; | |
231 default: | |
232 if (err >= static_cast<ErrorCode>(1) && | |
233 err <= static_cast<ErrorCode>(4096)) { | |
234 // We limit errno values to a reasonable range. In fact, the Linux ABI | |
235 // doesn't support errno values outside of this range. | |
236 ret = SECCOMP_RET_ERRNO + err; | |
237 } else { | |
238 die("Invalid ErrorCode reported by sandbox system call evaluator"); | |
239 } | |
240 break; | |
241 } | |
242 program.push_back((struct sock_filter) | |
243 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
244 program.push_back((struct sock_filter) | |
245 BPF_STMT(BPF_RET+BPF_K, ret)); | |
246 } | |
247 | |
248 // Everything that isn't allowed is forbidden. Eventually, we would | |
249 // like to have a way to log forbidden calls, when in debug mode. | |
250 program.push_back((struct sock_filter) | |
251 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
252 | |
253 // Install BPF filter program | |
254 const struct sock_fprog prog = { program.size(), &program[0] }; | |
255 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
256 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
257 goto filter_failed; | |
258 } | |
259 | |
260 return; | |
261 } | |
262 | |
263 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | |
264 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | |
265 die("Unexpected SIGSYS received"); | |
Chris Evans
2012/06/04 19:17:26
I still don't think LOG(FATAL) (one particular exp
Markus (顧孟勤)
2012/06/04 21:13:40
I hear you -- you are really preaching to the choi
| |
266 } | |
267 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | |
268 int old_errno = errno; | |
269 | |
270 // In case of error, set the REG_RESULT CPU register to the default | |
271 // errno value (i.e. EPERM). | |
272 // We need to be very careful when doing this, as some of our target | |
273 // platforms have pointer types and CPU registers that are wider than | |
274 // ints. Furthermore, the kernel ABI requires us to return a negative | |
275 // value, but errno values are usually positive. And in fact, it would | |
276 // be perfectly reasonable for somebody to have defined them as unsigned | |
277 // properties. This makes the correct incantation of type casts rather | |
278 // subtle. Sometimes, C++ is just too smart for its own good. | |
279 void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO; | |
280 | |
281 // This is where we can add extra code to handle complex system calls. | |
282 // ... | |
283 | |
284 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc); | |
285 errno = old_errno; | |
286 return; | |
287 } | |
288 | |
289 | |
290 bool Sandbox::suppressLogging_ = false; | |
291 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | |
292 int Sandbox::proc_fd_ = -1; | |
293 std::vector<std::pair<Sandbox::EvaluateSyscall, | |
294 Sandbox::EvaluateArguments> > Sandbox::evaluators_; | |
295 | |
296 } // namespace | |
OLD | NEW |