|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "sandbox/linux/seccomp_bpf/sandbox_bpf.h" | |
6 | |
7 // The kernel gives us a sandbox, we turn it into a playground :-) | |
8 // This is version 2 of the playground; version 1 was built on top of | |
9 // pre-BPF seccomp mode. | |
10 namespace playground2 { | |
11 | |
Chris Evans
2012/06/01 22:48:23
Redundant newline.
Markus (顧孟勤)
2012/06/01 23:46:33
Done.
| |
12 | |
13 static Sandbox::ErrorCode probeEvaluator(int signo) { | |
14 switch (signo) { | |
15 case __NR_getpid: | |
16 // Return EPERM so that we can check that the filter actually ran. | |
17 return (Sandbox::ErrorCode)EPERM; | |
18 case __NR_exit_group: | |
19 // Allow exit() with a non-default return code. | |
20 return Sandbox::SB_ALLOWED; | |
21 default: | |
22 // Make everything else fail in an easily recognizable way. | |
23 return (Sandbox::ErrorCode)EINVAL; | |
24 } | |
25 } | |
26 | |
27 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { | |
Chris Evans
2012/06/01 22:48:23
Just wanted to note that this API draws its result
Markus (顧孟勤)
2012/06/01 23:46:33
I think, I explained the rationale in my other e-m
| |
28 if (status_ == STATUS_UNKNOWN || status_ == STATUS_UNAVAILABLE) { | |
Chris Evans
2012/06/01 22:48:23
Style issue: these nested ifs are getting harder t
Markus (顧孟勤)
2012/06/01 23:46:33
Done.
| |
29 if (!isSingleThreaded(proc_fd)) { | |
30 status_ = STATUS_UNAVAILABLE; | |
31 } else { | |
32 sigset_t oldMask, newMask; | |
33 if (!sigfillset(&newMask) && | |
34 !sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | |
35 pid_t pid = fork(); | |
36 if (pid >= 0) { | |
37 if (!pid) { | |
38 // Test a very simple sandbox policy to verify that we can | |
39 // successfully turn on sandboxing. | |
40 suppressLogging_ = true; | |
41 setSandboxPolicy(probeEvaluator, NULL); | |
42 setProcFd(proc_fd); | |
43 startSandbox(); | |
44 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | |
45 syscall(__NR_exit_group, (intptr_t)100); | |
46 } | |
47 die(NULL); | |
Chris Evans
2012/06/01 22:48:23
die() calls strlen() on the pointer so seems like
Markus (顧孟勤)
2012/06/01 23:46:33
Good call. die() was already safe, when used in st
| |
48 } | |
49 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | |
50 die("sigprocmask() failed"); | |
51 } | |
52 int status; | |
53 status_ = HANDLE_EINTR(waitpid(pid, &status, 0)) != pid | |
Chris Evans
2012/06/01 22:48:23
Nested ternarys are a real PITA to read / parse, c
Markus (顧孟勤)
2012/06/01 23:46:33
Done.
| |
54 ? STATUS_UNAVAILABLE | |
55 : WIFEXITED(status) && WEXITSTATUS(status) == 100 | |
56 ? STATUS_AVAILABLE | |
57 : STATUS_UNSUPPORTED; | |
58 } else if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | |
Chris Evans
2012/06/01 22:48:23
It looks like this branch is associated with fork(
Markus (顧孟勤)
2012/06/01 23:46:33
Done.
| |
59 die("sigprocmask() failed"); | |
60 } | |
61 } | |
62 } | |
63 } else if (status_ == STATUS_AVAILABLE && | |
64 !isSingleThreaded(proc_fd)) { | |
65 status_ = STATUS_UNAVAILABLE; | |
Chris Evans
2012/06/01 22:48:23
Still would prefer a dedicated error, STATUS_BADTH
Markus (顧孟勤)
2012/06/01 23:46:33
I don't really want the caller to have to know too
| |
66 } | |
67 return status_; | |
68 } | |
69 | |
70 void Sandbox::setProcFd(int proc_fd) { | |
71 proc_fd_ = proc_fd; | |
72 } | |
73 | |
74 void Sandbox::startSandbox() { | |
75 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | |
76 die("Trying to start sandbox, even though it is known to be unavailable"); | |
77 } else if (status_ == STATUS_ENABLED) { | |
78 die("Cannot start sandbox recursively. Use multiple calls to " | |
79 "setSandboxPolicy() to stack policies instead"); | |
80 } | |
81 if (proc_fd_ < 0) { | |
82 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | |
83 } | |
84 if (proc_fd_ < 0) { | |
85 // For now, continue in degraded mode, if we can't access /proc. | |
86 // In the future, we might want to tighten this requirement. | |
87 } | |
88 if (!isSingleThreaded(proc_fd_)) { | |
89 die("Cannot start sandbox, if process is already multi-threaded"); | |
90 } | |
91 | |
92 // We no longer need access to any files in /proc. We want to do this | |
93 // before installing the filters, just in case that our policy denies | |
94 // close(). | |
95 if (proc_fd_ >= 0) { | |
96 if (HANDLE_EINTR(close(proc_fd_))) { | |
97 die("Failed to close file descriptor for /proc"); | |
98 } | |
99 proc_fd_ = -1; | |
100 } | |
101 | |
102 // Install the filters. | |
103 installFilter(); | |
104 | |
105 // We are now inside the sandbox. | |
106 status_ = STATUS_ENABLED; | |
107 } | |
108 | |
109 bool Sandbox::isSingleThreaded(int proc_fd) { | |
110 if (proc_fd < 0) { | |
111 // Cannot determine whether program is single-threaded. Hope for | |
112 // the best... | |
113 return true; | |
114 } | |
115 | |
116 struct stat sb; | |
117 int task = -1; | |
118 if (proc_fd < 0 || | |
119 (task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | |
120 fstat(task, &sb) != 0 || | |
121 sb.st_nlink != 3 || | |
122 HANDLE_EINTR(close(task))) { | |
123 if (task >= 0) { | |
124 HANDLE_EINTR(close(task)); | |
125 } | |
126 return false; | |
127 } | |
128 return true; | |
129 } | |
130 | |
131 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | |
132 EvaluateArguments argumentEvaluator) { | |
133 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | |
134 } | |
135 | |
136 void Sandbox::installFilter() { | |
137 // Verify that the user pushed a policy. | |
138 if (evaluators_.empty()) { | |
139 filter_failed: | |
140 die("Failed to configure system call filters"); | |
141 } | |
142 | |
143 // Set new SIGSYS handler | |
144 struct sigaction sa; | |
145 memset(&sa, 0, sizeof(sa)); | |
146 sa.sa_sigaction = &sigSys; | |
147 sa.sa_flags = SA_SIGINFO; | |
148 if (sigaction(SIGSYS, &sa, NULL) < 0) { | |
149 goto filter_failed; | |
150 } | |
151 | |
152 // Unmask SIGSYS | |
153 sigset_t mask; | |
154 sigemptyset(&mask); | |
155 sigaddset(&mask, SIGSYS); | |
156 if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | |
157 goto filter_failed; | |
158 } | |
159 | |
160 // We can't handle stacked evaluators, yet. We'll get there eventually | |
161 // though. Hang tight. | |
162 if (evaluators_.size() != 1) { | |
163 die("Not implemented"); | |
164 } | |
165 | |
166 // If the architecture doesn't match SECCOMP_ARCH, disallow the | |
167 // system call. | |
168 std::vector<struct sock_filter> program; | |
169 program.push_back((struct sock_filter) | |
170 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, | |
171 offsetof(struct arch_seccomp_data, arch))); | |
172 program.push_back((struct sock_filter) | |
173 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | |
174 program.push_back((struct sock_filter) | |
175 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
176 | |
177 // Grab the system call number, so that we can implement jump tables. | |
178 program.push_back((struct sock_filter) | |
179 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr))); | |
180 | |
181 // Evaluate all possible system calls and depending on their | |
182 // exit codes generate a BPF filter. | |
183 // This is very inefficient right now. We need to be much smarter | |
184 // eventually. | |
185 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
186 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL; ++sysnum) { | |
187 ErrorCode err = evaluateSyscall(sysnum); | |
188 int ret; | |
189 switch (err) { | |
190 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | |
191 die("Not implemented"); | |
192 case SB_TRAP: | |
193 ret = SECCOMP_RET_TRAP; | |
194 break; | |
195 case SB_ALLOWED: | |
196 ret = SECCOMP_RET_ALLOW; | |
197 break; | |
198 default: | |
199 if (err >= static_cast<ErrorCode>(1) && | |
200 err <= static_cast<ErrorCode>(4096)) { | |
201 // We limit errno values to a reasonable range. In fact, the Linux ABI | |
202 // doesn't support errno values outside of this range. | |
203 ret = SECCOMP_RET_ERRNO + err; | |
204 } else { | |
205 die("Invalid ErrorCode reported by sandbox system call evaluator"); | |
206 } | |
207 break; | |
208 } | |
209 program.push_back((struct sock_filter) | |
210 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
211 program.push_back((struct sock_filter) | |
212 BPF_STMT(BPF_RET+BPF_K, ret)); | |
213 } | |
214 | |
215 // Everything that isn't allowed is forbidden. Eventually, we would | |
216 // like to have a way to log forbidden calls, when in debug mode. | |
217 program.push_back((struct sock_filter) | |
218 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
219 | |
220 // Install BPF filter program | |
221 const struct sock_fprog prog = { program.size(), &program[0] }; | |
222 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
223 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
224 goto filter_failed; | |
225 } | |
226 | |
227 return; | |
228 } | |
229 | |
230 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | |
231 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | |
232 die("Unexpected SIGSYS received"); | |
Chris Evans
2012/06/01 22:48:23
Bug: die() in the Chromium context uses LOG(), whi
Markus (顧孟勤)
2012/06/01 23:46:33
This is really a tough call. The standalone versio
| |
233 } | |
234 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | |
235 int old_errno = errno; | |
Chris Evans
2012/06/01 22:48:23
We don't seem to do anything with old_errno?
Markus (顧孟勤)
2012/06/01 23:46:33
That is correct.
Non-exiting signal handlers must
| |
236 | |
237 // In case of error, set the REG_RESULT CPU register to the default | |
238 // errno value (i.e. EPERM). | |
239 // We need to be very careful when doing this, as some of our target | |
240 // platforms have pointer types and CPU registers that are wider than | |
241 // ints. Furthermore, the kernel ABI requires us to return a negative | |
242 // value, but errno values are usually positive. And in fact, it would | |
243 // be perfectly reasonable for somebody to have defined them as unsigned | |
244 // properties. This makes the correct incantation of type casts rather | |
245 // subtle. Sometimes, C++ is just too smart for its own good. | |
246 void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO; | |
247 | |
248 // This is where we can add extra code to handle complex system calls. | |
249 // ... | |
250 | |
251 ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc); | |
252 errno = old_errno; | |
253 return; | |
254 } | |
255 | |
256 | |
257 bool Sandbox::suppressLogging_ = false; | |
258 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | |
259 int Sandbox::proc_fd_ = -1; | |
260 std::vector<std::pair<Sandbox::EvaluateSyscall, | |
261 Sandbox::EvaluateArguments> > Sandbox::evaluators_; | |
262 | |
263 } // namespace | |
OLD | NEW |