OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <endian.h> | 5 #include <endian.h> |
6 #if __BYTE_ORDER == __BIG_ENDIAN | 6 #if __BYTE_ORDER == __BIG_ENDIAN |
7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit | 7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit |
8 // values that need to be inspected by a virtual machine that only ever | 8 // values that need to be inspected by a virtual machine that only ever |
9 // operates on 32bit values. The kernel developers decided how values | 9 // operates on 32bit values. The kernel developers decided how values |
10 // should be split into two 32bit words to achieve this goal. But at this | 10 // should be split into two 32bit words to achieve this goal. But at this |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
65 | 65 |
66 // The kernel gives us a sandbox, we turn it into a playground :-) | 66 // The kernel gives us a sandbox, we turn it into a playground :-) |
67 // This is version 2 of the playground; version 1 was built on top of | 67 // This is version 2 of the playground; version 1 was built on top of |
68 // pre-BPF seccomp mode. | 68 // pre-BPF seccomp mode. |
69 namespace playground2 { | 69 namespace playground2 { |
70 | 70 |
71 const int kExpectedExitCode = 100; | 71 const int kExpectedExitCode = 100; |
72 | 72 |
73 // We define a really simple sandbox policy. It is just good enough for us | 73 // We define a really simple sandbox policy. It is just good enough for us |
74 // to tell that the sandbox has actually been activated. | 74 // to tell that the sandbox has actually been activated. |
75 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) { | 75 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) { |
76 switch (sysnum) { | 76 switch (sysnum) { |
77 case __NR_getpid: | 77 case __NR_getpid: |
78 // Return EPERM so that we can check that the filter actually ran. | 78 // Return EPERM so that we can check that the filter actually ran. |
79 return ErrorCode(EPERM); | 79 return ErrorCode(EPERM); |
80 case __NR_exit_group: | 80 case __NR_exit_group: |
81 // Allow exit() with a non-default return code. | 81 // Allow exit() with a non-default return code. |
82 return ErrorCode(ErrorCode::ERR_ALLOWED); | 82 return ErrorCode(ErrorCode::ERR_ALLOWED); |
83 default: | 83 default: |
84 // Make everything else fail in an easily recognizable way. | 84 // Make everything else fail in an easily recognizable way. |
85 return ErrorCode(EINVAL); | 85 return ErrorCode(EINVAL); |
86 } | 86 } |
87 } | 87 } |
88 | 88 |
89 void Sandbox::probeProcess(void) { | 89 void Sandbox::ProbeProcess(void) { |
90 if (syscall(__NR_getpid) < 0 && errno == EPERM) { | 90 if (syscall(__NR_getpid) < 0 && errno == EPERM) { |
91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); | 91 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); |
92 } | 92 } |
93 } | 93 } |
94 | 94 |
95 bool Sandbox::isValidSyscallNumber(int sysnum) { | 95 bool Sandbox::IsValidSyscallNumber(int sysnum) { |
96 return SyscallIterator::IsValid(sysnum); | 96 return SyscallIterator::IsValid(sysnum); |
97 } | 97 } |
98 | 98 |
99 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) { | 99 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) { |
100 if (!isValidSyscallNumber(sysnum)) { | 100 if (!IsValidSyscallNumber(sysnum)) { |
101 return ErrorCode(ENOSYS); | 101 return ErrorCode(ENOSYS); |
102 } | 102 } |
103 return ErrorCode(ErrorCode::ERR_ALLOWED); | 103 return ErrorCode(ErrorCode::ERR_ALLOWED); |
104 } | 104 } |
105 | 105 |
106 void Sandbox::tryVsyscallProcess(void) { | 106 void Sandbox::TryVsyscallProcess(void) { |
107 time_t current_time; | 107 time_t current_time; |
108 // time() is implemented as a vsyscall. With an older glibc, with | 108 // time() is implemented as a vsyscall. With an older glibc, with |
109 // vsyscall=emulate and some versions of the seccomp BPF patch | 109 // vsyscall=emulate and some versions of the seccomp BPF patch |
110 // we may get SIGKILL-ed. Detect this! | 110 // we may get SIGKILL-ed. Detect this! |
111 if (time(¤t_time) != static_cast<time_t>(-1)) { | 111 if (time(¤t_time) != static_cast<time_t>(-1)) { |
112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); | 112 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode)); |
113 } | 113 } |
114 } | 114 } |
115 | 115 |
116 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(), | 116 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(), |
117 EvaluateSyscall syscallEvaluator, | 117 EvaluateSyscall syscall_evaluator, |
118 void *aux, | 118 void *aux, |
119 int proc_fd) { | 119 int proc_fd) { |
120 // Block all signals before forking a child process. This prevents an | 120 // Block all signals before forking a child process. This prevents an |
121 // attacker from manipulating our test by sending us an unexpected signal. | 121 // attacker from manipulating our test by sending us an unexpected signal. |
122 sigset_t oldMask, newMask; | 122 sigset_t old_mask, new_mask; |
123 if (sigfillset(&newMask) || | 123 if (sigfillset(&new_mask) || |
124 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) { | 124 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) { |
125 SANDBOX_DIE("sigprocmask() failed"); | 125 SANDBOX_DIE("sigprocmask() failed"); |
126 } | 126 } |
127 int fds[2]; | 127 int fds[2]; |
128 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { | 128 if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) { |
129 SANDBOX_DIE("pipe() failed"); | 129 SANDBOX_DIE("pipe() failed"); |
130 } | 130 } |
131 | 131 |
132 if (fds[0] <= 2 || fds[1] <= 2) { | 132 if (fds[0] <= 2 || fds[1] <= 2) { |
133 SANDBOX_DIE("Process started without standard file descriptors"); | 133 SANDBOX_DIE("Process started without standard file descriptors"); |
134 } | 134 } |
135 | 135 |
136 pid_t pid = fork(); | 136 pid_t pid = fork(); |
137 if (pid < 0) { | 137 if (pid < 0) { |
138 // Die if we cannot fork(). We would probably fail a little later | 138 // Die if we cannot fork(). We would probably fail a little later |
139 // anyway, as the machine is likely very close to running out of | 139 // anyway, as the machine is likely very close to running out of |
140 // memory. | 140 // memory. |
141 // But what we don't want to do is return "false", as a crafty | 141 // But what we don't want to do is return "false", as a crafty |
142 // attacker might cause fork() to fail at will and could trick us | 142 // attacker might cause fork() to fail at will and could trick us |
143 // into running without a sandbox. | 143 // into running without a sandbox. |
144 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails | 144 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails |
145 SANDBOX_DIE("fork() failed unexpectedly"); | 145 SANDBOX_DIE("fork() failed unexpectedly"); |
146 } | 146 } |
147 | 147 |
148 // In the child process | 148 // In the child process |
149 if (!pid) { | 149 if (!pid) { |
150 // Test a very simple sandbox policy to verify that we can | 150 // Test a very simple sandbox policy to verify that we can |
151 // successfully turn on sandboxing. | 151 // successfully turn on sandboxing. |
152 Die::EnableSimpleExit(); | 152 Die::EnableSimpleExit(); |
153 | 153 |
154 if (HANDLE_EINTR(close(fds[0]))) { | 154 if (HANDLE_EINTR(close(fds[0]))) { |
155 WriteFailedStderrSetupMessage(fds[1]); | 155 WriteFailedStderrSetupMessage(fds[1]); |
156 SANDBOX_DIE(NULL); | 156 SANDBOX_DIE(NULL); |
157 } | 157 } |
158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) { | 158 if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) { |
159 // Stderr could very well be a file descriptor to .xsession-errors, or | 159 // Stderr could very well be a file descriptor to .xsession-errors, or |
160 // another file, which could be backed by a file system that could cause | 160 // another file, which could be backed by a file system that could cause |
161 // dup2 to fail while trying to close stderr. It's important that we do | 161 // dup2 to fail while trying to close stderr. It's important that we do |
162 // not fail on trying to close stderr. | 162 // not fail on trying to close stderr. |
163 // If dup2 fails here, we will continue normally, this means that our | 163 // If dup2 fails here, we will continue normally, this means that our |
164 // parent won't cause a fatal failure if something writes to stderr in | 164 // parent won't cause a fatal failure if something writes to stderr in |
165 // this child. | 165 // this child. |
166 } | 166 } |
167 if (HANDLE_EINTR(close(fds[1]))) { | 167 if (HANDLE_EINTR(close(fds[1]))) { |
168 WriteFailedStderrSetupMessage(fds[1]); | 168 WriteFailedStderrSetupMessage(fds[1]); |
169 SANDBOX_DIE(NULL); | 169 SANDBOX_DIE(NULL); |
170 } | 170 } |
171 | 171 |
172 evaluators_.clear(); | 172 evaluators_.clear(); |
173 setSandboxPolicy(syscallEvaluator, aux); | 173 SetSandboxPolicy(syscall_evaluator, aux); |
174 setProcFd(proc_fd); | 174 SetProcFd(proc_fd); |
175 | 175 |
176 // By passing "quiet=true" to "startSandboxInternal()" we suppress | 176 // By passing "quiet=true" to "startSandboxInternal()" we suppress |
177 // messages for expected and benign failures (e.g. if the current | 177 // messages for expected and benign failures (e.g. if the current |
178 // kernel lacks support for BPF filters). | 178 // kernel lacks support for BPF filters). |
179 startSandboxInternal(true); | 179 StartSandboxInternal(true); |
180 | 180 |
181 // Run our code in the sandbox. | 181 // Run our code in the sandbox. |
182 CodeInSandbox(); | 182 code_in_sandbox(); |
183 | 183 |
184 // CodeInSandbox() is not supposed to return here. | 184 // code_in_sandbox() is not supposed to return here. |
185 SANDBOX_DIE(NULL); | 185 SANDBOX_DIE(NULL); |
186 } | 186 } |
187 | 187 |
188 // In the parent process. | 188 // In the parent process. |
189 if (HANDLE_EINTR(close(fds[1]))) { | 189 if (HANDLE_EINTR(close(fds[1]))) { |
190 SANDBOX_DIE("close() failed"); | 190 SANDBOX_DIE("close() failed"); |
191 } | 191 } |
192 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) { | 192 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) { |
193 SANDBOX_DIE("sigprocmask() failed"); | 193 SANDBOX_DIE("sigprocmask() failed"); |
194 } | 194 } |
195 int status; | 195 int status; |
196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { | 196 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) { |
197 SANDBOX_DIE("waitpid() failed unexpectedly"); | 197 SANDBOX_DIE("waitpid() failed unexpectedly"); |
198 } | 198 } |
199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; | 199 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode; |
200 | 200 |
201 // If we fail to support sandboxing, there might be an additional | 201 // If we fail to support sandboxing, there might be an additional |
202 // error message. If so, this was an entirely unexpected and fatal | 202 // error message. If so, this was an entirely unexpected and fatal |
(...skipping 11 matching lines...) Expand all Loading... | |
214 SANDBOX_DIE(buf); | 214 SANDBOX_DIE(buf); |
215 } | 215 } |
216 } | 216 } |
217 if (HANDLE_EINTR(close(fds[0]))) { | 217 if (HANDLE_EINTR(close(fds[0]))) { |
218 SANDBOX_DIE("close() failed"); | 218 SANDBOX_DIE("close() failed"); |
219 } | 219 } |
220 | 220 |
221 return rc; | 221 return rc; |
222 } | 222 } |
223 | 223 |
224 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) { | 224 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) { |
225 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 225 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
226 if (RUNNING_ON_VALGRIND) { | 226 if (RUNNING_ON_VALGRIND) { |
227 // Valgrind doesn't like our run-time test. Disable testing and assume we | 227 // Valgrind doesn't like our run-time test. Disable testing and assume we |
228 // always support sandboxing. This feature should only ever be enabled when | 228 // always support sandboxing. This feature should only ever be enabled when |
229 // debugging. | 229 // debugging. |
230 return true; | 230 return true; |
231 } | 231 } |
232 #endif | 232 #endif |
233 | 233 |
234 return | 234 return |
235 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) && | 235 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) && |
236 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0, | 236 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0, |
237 proc_fd); | 237 proc_fd); |
238 } | 238 } |
239 | 239 |
240 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) { | 240 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) { |
241 // It the sandbox is currently active, we clearly must have support for | 241 // It the sandbox is currently active, we clearly must have support for |
242 // sandboxing. | 242 // sandboxing. |
243 if (status_ == STATUS_ENABLED) { | 243 if (status_ == STATUS_ENABLED) { |
244 return status_; | 244 return status_; |
245 } | 245 } |
246 | 246 |
247 // Even if the sandbox was previously available, something might have | 247 // Even if the sandbox was previously available, something might have |
248 // changed in our run-time environment. Check one more time. | 248 // changed in our run-time environment. Check one more time. |
249 if (status_ == STATUS_AVAILABLE) { | 249 if (status_ == STATUS_AVAILABLE) { |
250 if (!isSingleThreaded(proc_fd)) { | 250 if (!IsSingleThreaded(proc_fd)) { |
251 status_ = STATUS_UNAVAILABLE; | 251 status_ = STATUS_UNAVAILABLE; |
252 } | 252 } |
253 return status_; | 253 return status_; |
254 } | 254 } |
255 | 255 |
256 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) { | 256 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) { |
257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately | 257 // All state transitions resulting in STATUS_UNAVAILABLE are immediately |
258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all | 258 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all |
259 // happen, if and only if they are triggered by the process being multi- | 259 // happen, if and only if they are triggered by the process being multi- |
260 // threaded. | 260 // threaded. |
261 // In other words, if a single-threaded process is currently in the | 261 // In other words, if a single-threaded process is currently in the |
262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is | 262 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is |
263 // actually available. | 263 // actually available. |
264 status_ = STATUS_AVAILABLE; | 264 status_ = STATUS_AVAILABLE; |
265 return status_; | 265 return status_; |
266 } | 266 } |
267 | 267 |
268 // If we have not previously checked for availability of the sandbox or if | 268 // If we have not previously checked for availability of the sandbox or if |
269 // we otherwise don't believe to have a good cached value, we have to | 269 // we otherwise don't believe to have a good cached value, we have to |
270 // perform a thorough check now. | 270 // perform a thorough check now. |
271 if (status_ == STATUS_UNKNOWN) { | 271 if (status_ == STATUS_UNKNOWN) { |
272 status_ = kernelSupportSeccompBPF(proc_fd) | 272 status_ = KernelSupportSeccompBPF(proc_fd) |
273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; | 273 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED; |
274 | 274 |
275 // As we are performing our tests from a child process, the run-time | 275 // As we are performing our tests from a child process, the run-time |
276 // environment that is visible to the sandbox is always guaranteed to be | 276 // environment that is visible to the sandbox is always guaranteed to be |
277 // single-threaded. Let's check here whether the caller is single- | 277 // single-threaded. Let's check here whether the caller is single- |
278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. | 278 // threaded. Otherwise, we mark the sandbox as temporarily unavailable. |
279 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) { | 279 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) { |
280 status_ = STATUS_UNAVAILABLE; | 280 status_ = STATUS_UNAVAILABLE; |
281 } | 281 } |
282 } | 282 } |
283 return status_; | 283 return status_; |
284 } | 284 } |
285 | 285 |
286 void Sandbox::setProcFd(int proc_fd) { | 286 void Sandbox::SetProcFd(int proc_fd) { |
287 proc_fd_ = proc_fd; | 287 proc_fd_ = proc_fd; |
288 } | 288 } |
289 | 289 |
290 void Sandbox::startSandboxInternal(bool quiet) { | 290 void Sandbox::StartSandboxInternal(bool quiet) { |
291 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { | 291 if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) { |
292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " | 292 SANDBOX_DIE("Trying to start sandbox, even though it is known to be " |
293 "unavailable"); | 293 "unavailable"); |
294 } else if (status_ == STATUS_ENABLED) { | 294 } else if (status_ == STATUS_ENABLED) { |
295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " | 295 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to " |
296 "setSandboxPolicy() to stack policies instead"); | 296 "setSandboxPolicy() to stack policies instead"); |
297 } | 297 } |
298 if (proc_fd_ < 0) { | 298 if (proc_fd_ < 0) { |
299 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); | 299 proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY); |
300 } | 300 } |
301 if (proc_fd_ < 0) { | 301 if (proc_fd_ < 0) { |
302 // For now, continue in degraded mode, if we can't access /proc. | 302 // For now, continue in degraded mode, if we can't access /proc. |
303 // In the future, we might want to tighten this requirement. | 303 // In the future, we might want to tighten this requirement. |
304 } | 304 } |
305 if (!isSingleThreaded(proc_fd_)) { | 305 if (!IsSingleThreaded(proc_fd_)) { |
306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); | 306 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded"); |
307 } | 307 } |
308 | 308 |
309 // We no longer need access to any files in /proc. We want to do this | 309 // We no longer need access to any files in /proc. We want to do this |
310 // before installing the filters, just in case that our policy denies | 310 // before installing the filters, just in case that our policy denies |
311 // close(). | 311 // close(). |
312 if (proc_fd_ >= 0) { | 312 if (proc_fd_ >= 0) { |
313 if (HANDLE_EINTR(close(proc_fd_))) { | 313 if (HANDLE_EINTR(close(proc_fd_))) { |
314 SANDBOX_DIE("Failed to close file descriptor for /proc"); | 314 SANDBOX_DIE("Failed to close file descriptor for /proc"); |
315 } | 315 } |
316 proc_fd_ = -1; | 316 proc_fd_ = -1; |
317 } | 317 } |
318 | 318 |
319 // Install the filters. | 319 // Install the filters. |
320 installFilter(quiet); | 320 InstallFilter(quiet); |
321 | 321 |
322 // We are now inside the sandbox. | 322 // We are now inside the sandbox. |
323 status_ = STATUS_ENABLED; | 323 status_ = STATUS_ENABLED; |
324 } | 324 } |
325 | 325 |
326 bool Sandbox::isSingleThreaded(int proc_fd) { | 326 bool Sandbox::IsSingleThreaded(int proc_fd) { |
327 if (proc_fd < 0) { | 327 if (proc_fd < 0) { |
328 // Cannot determine whether program is single-threaded. Hope for | 328 // Cannot determine whether program is single-threaded. Hope for |
329 // the best... | 329 // the best... |
330 return true; | 330 return true; |
331 } | 331 } |
332 | 332 |
333 struct stat sb; | 333 struct stat sb; |
334 int task = -1; | 334 int task = -1; |
335 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || | 335 if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 || |
336 fstat(task, &sb) != 0 || | 336 fstat(task, &sb) != 0 || |
337 sb.st_nlink != 3 || | 337 sb.st_nlink != 3 || |
338 HANDLE_EINTR(close(task))) { | 338 HANDLE_EINTR(close(task))) { |
339 if (task >= 0) { | 339 if (task >= 0) { |
340 if (HANDLE_EINTR(close(task))) { } | 340 if (HANDLE_EINTR(close(task))) { } |
341 } | 341 } |
342 return false; | 342 return false; |
343 } | 343 } |
344 return true; | 344 return true; |
345 } | 345 } |
346 | 346 |
347 bool Sandbox::isDenied(const ErrorCode& code) { | 347 bool Sandbox::IsDenied(const ErrorCode& code) { |
348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || | 348 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP || |
349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && | 349 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) && |
350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); | 350 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO)); |
351 } | 351 } |
352 | 352 |
353 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, | 353 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator, |
354 void *aux) { | 354 void *aux) { |
355 for (SyscallIterator iter(true); !iter.Done(); ) { | 355 for (SyscallIterator iter(true); !iter.Done(); ) { |
356 uint32_t sysnum = iter.Next(); | 356 uint32_t sysnum = iter.Next(); |
357 if (!isDenied(syscallEvaluator(sysnum, aux))) { | 357 if (!IsDenied(syscall_evaluator(sysnum, aux))) { |
358 SANDBOX_DIE("Policies should deny system calls that are outside the " | 358 SANDBOX_DIE("Policies should deny system calls that are outside the " |
359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); | 359 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)"); |
360 } | 360 } |
361 } | 361 } |
362 return; | 362 return; |
363 } | 363 } |
364 | 364 |
365 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { | 365 void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { |
366 if (BPF_CLASS(insn->code) == BPF_RET && | 366 if (BPF_CLASS(insn->code) == BPF_RET && |
367 insn->k > SECCOMP_RET_TRAP && | 367 insn->k > SECCOMP_RET_TRAP && |
368 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) { | 368 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) { |
369 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; | 369 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1]; |
370 if (!err.safe_) { | 370 if (!err.safe_) { |
371 bool *is_unsafe = static_cast<bool *>(aux); | 371 bool *is_unsafe = static_cast<bool *>(aux); |
372 *is_unsafe = true; | 372 *is_unsafe = true; |
373 } | 373 } |
374 } | 374 } |
375 } | 375 } |
376 | 376 |
377 void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { | 377 void Sandbox::RedirectToUserspace(Instruction *insn, void *) { |
378 // When inside an UnsafeTrap() callback, we want to allow all system calls. | 378 // When inside an UnsafeTrap() callback, we want to allow all system calls. |
379 // This means, we must conditionally disable the sandbox -- and that's not | 379 // This means, we must conditionally disable the sandbox -- and that's not |
380 // something that kernel-side BPF filters can do, as they cannot inspect | 380 // something that kernel-side BPF filters can do, as they cannot inspect |
381 // any state other than the syscall arguments. | 381 // any state other than the syscall arguments. |
382 // But if we redirect all error handlers to user-space, then we can easily | 382 // But if we redirect all error handlers to user-space, then we can easily |
383 // make this decision. | 383 // make this decision. |
384 // The performance penalty for this extra round-trip to user-space is not | 384 // The performance penalty for this extra round-trip to user-space is not |
385 // actually that bad, as we only ever pay it for denied system calls; and a | 385 // actually that bad, as we only ever pay it for denied system calls; and a |
386 // typical program has very few of these. | 386 // typical program has very few of these. |
387 if (BPF_CLASS(insn->code) == BPF_RET && | 387 if (BPF_CLASS(insn->code) == BPF_RET && |
388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | 388 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
389 insn->k = Trap(ReturnErrno, | 389 insn->k = Trap(ReturnErrno, |
390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); | 390 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); |
391 } | 391 } |
392 } | 392 } |
393 | 393 |
394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { | 394 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { |
395 // We need to replicate the behavior of RedirectToUserspace(), so that our | 395 // We need to replicate the behavior of RedirectToUserspace(), so that our |
396 // Verifier can still work correctly. | 396 // Verifier can still work correctly. |
397 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); | 397 Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); |
398 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); | 398 const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); |
399 ErrorCode err = evaluator.first(sysnum, evaluator.second); | 399 ErrorCode err = evaluator.first(sysnum, evaluator.second); |
400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { | 400 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { |
401 return Trap(ReturnErrno, | 401 return Trap(ReturnErrno, |
402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); | 402 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); |
403 } | 403 } |
404 return err; | 404 return err; |
405 } | 405 } |
406 | 406 |
407 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { | 407 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) { |
408 if (status_ == STATUS_ENABLED) { | 408 if (status_ == STATUS_ENABLED) { |
409 SANDBOX_DIE("Cannot change policy after sandbox has started"); | 409 SANDBOX_DIE("Cannot change policy after sandbox has started"); |
410 } | 410 } |
411 policySanityChecks(syscallEvaluator, aux); | 411 PolicySanityChecks(syscall_evaluator, aux); |
412 evaluators_.push_back(std::make_pair(syscallEvaluator, aux)); | 412 evaluators_.push_back(std::make_pair(syscall_evaluator, aux)); |
413 } | 413 } |
414 | 414 |
415 void Sandbox::installFilter(bool quiet) { | 415 void Sandbox::InstallFilter(bool quiet) { |
416 // Verify that the user pushed a policy. | 416 // Verify that the user pushed a policy. |
417 if (evaluators_.empty()) { | 417 if (evaluators_.empty()) { |
418 filter_failed: | 418 filter_failed: |
419 SANDBOX_DIE("Failed to configure system call filters"); | 419 SANDBOX_DIE("Failed to configure system call filters"); |
420 } | 420 } |
421 | 421 |
422 // Set new SIGSYS handler | 422 // Set new SIGSYS handler |
423 struct sigaction sa; | 423 struct sigaction sa; |
424 memset(&sa, 0, sizeof(sa)); | 424 memset(&sa, 0, sizeof(sa)); |
425 sa.sa_sigaction = sigSys; | 425 sa.sa_sigaction = SigSys; |
426 sa.sa_flags = SA_SIGINFO | SA_NODEFER; | 426 sa.sa_flags = SA_SIGINFO | SA_NODEFER; |
427 if (sigaction(SIGSYS, &sa, NULL) < 0) { | 427 if (sigaction(SIGSYS, &sa, NULL) < 0) { |
428 goto filter_failed; | 428 goto filter_failed; |
429 } | 429 } |
430 | 430 |
431 // Unmask SIGSYS | 431 // Unmask SIGSYS |
432 sigset_t mask; | 432 sigset_t mask; |
433 if (sigemptyset(&mask) || | 433 if (sigemptyset(&mask) || |
434 sigaddset(&mask, SIGSYS) || | 434 sigaddset(&mask, SIGSYS) || |
435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { | 435 sigprocmask(SIG_UNBLOCK, &mask, NULL)) { |
(...skipping 15 matching lines...) Expand all Loading... | |
451 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 451 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
452 // system call. | 452 // system call. |
453 Instruction *tail; | 453 Instruction *tail; |
454 Instruction *head = | 454 Instruction *head = |
455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | 455 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, |
456 offsetof(struct arch_seccomp_data, arch), | 456 offsetof(struct arch_seccomp_data, arch), |
457 tail = | 457 tail = |
458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, | 458 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, |
459 NULL, | 459 NULL, |
460 gen->MakeInstruction(BPF_RET+BPF_K, | 460 gen->MakeInstruction(BPF_RET+BPF_K, |
461 Kill( | 461 Kill("Invalid audit architecture in BPF filter")))); |
462 "Invalid audit architecture in BPF filter").err_))); | |
463 | 462 |
464 { | 463 { |
465 // Evaluate all possible system calls and group their ErrorCodes into | 464 // Evaluate all possible system calls and group their ErrorCodes into |
466 // ranges of identical codes. | 465 // ranges of identical codes. |
467 Ranges ranges; | 466 Ranges ranges; |
468 findRanges(&ranges); | 467 FindRanges(&ranges); |
469 | 468 |
470 // Compile the system call ranges to an optimized BPF jumptable | 469 // Compile the system call ranges to an optimized BPF jumptable |
471 Instruction *jumptable = | 470 Instruction *jumptable = |
472 assembleJumpTable(gen, ranges.begin(), ranges.end()); | 471 AssembleJumpTable(gen, ranges.begin(), ranges.end()); |
473 | 472 |
474 // If there is at least one UnsafeTrap() in our program, the entire sandbox | 473 // If there is at least one UnsafeTrap() in our program, the entire sandbox |
475 // is unsafe. We need to modify the program so that all non- | 474 // is unsafe. We need to modify the program so that all non- |
476 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then | 475 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then |
477 // allow us to temporarily disable sandboxing rules inside of callbacks to | 476 // allow us to temporarily disable sandboxing rules inside of callbacks to |
478 // UnsafeTrap(). | 477 // UnsafeTrap(). |
479 has_unsafe_traps_ = false; | 478 has_unsafe_traps_ = false; |
480 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); | 479 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); |
481 | 480 |
482 // Grab the system call number, so that we can implement jump tables. | 481 // Grab the system call number, so that we can implement jump tables. |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
618 // system memory allocator that is in effect, these operators can result | 617 // system memory allocator that is in effect, these operators can result |
619 // in system calls to things like munmap() or brk(). | 618 // in system calls to things like munmap() or brk(). |
620 struct sock_filter bpf[program->size()]; | 619 struct sock_filter bpf[program->size()]; |
621 const struct sock_fprog prog = { | 620 const struct sock_fprog prog = { |
622 static_cast<unsigned short>(program->size()), bpf }; | 621 static_cast<unsigned short>(program->size()), bpf }; |
623 memcpy(bpf, &(*program)[0], sizeof(bpf)); | 622 memcpy(bpf, &(*program)[0], sizeof(bpf)); |
624 delete program; | 623 delete program; |
625 | 624 |
626 // Release memory that is no longer needed | 625 // Release memory that is no longer needed |
627 evaluators_.clear(); | 626 evaluators_.clear(); |
627 conds_.clear(); | |
628 | 628 |
629 #if defined(SECCOMP_BPF_VALGRIND_HACKS) | 629 #if defined(SECCOMP_BPF_VALGRIND_HACKS) |
630 // Valgrind is really not happy about our sandbox. Disable it when running | 630 // Valgrind is really not happy about our sandbox. Disable it when running |
631 // in Valgrind. This feature is dangerous and should never be enabled by | 631 // in Valgrind. This feature is dangerous and should never be enabled by |
632 // default. We protect it behind a pre-processor option. | 632 // default. We protect it behind a pre-processor option. |
633 if (!RUNNING_ON_VALGRIND) | 633 if (!RUNNING_ON_VALGRIND) |
634 #endif | 634 #endif |
635 { | 635 { |
636 // Install BPF filter program | 636 // Install BPF filter program |
637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { | 637 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { |
638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); | 638 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs"); |
639 } else { | 639 } else { |
640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | 640 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { |
641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); | 641 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters"); |
642 } | 642 } |
643 } | 643 } |
644 } | 644 } |
645 | 645 |
646 return; | 646 return; |
647 } | 647 } |
648 | 648 |
649 void Sandbox::findRanges(Ranges *ranges) { | 649 void Sandbox::FindRanges(Ranges *ranges) { |
650 // Please note that "struct seccomp_data" defines system calls as a signed | 650 // Please note that "struct seccomp_data" defines system calls as a signed |
651 // int32_t, but BPF instructions always operate on unsigned quantities. We | 651 // int32_t, but BPF instructions always operate on unsigned quantities. We |
652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | 652 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, |
653 // and then verifying that the rest of the number range (both positive and | 653 // and then verifying that the rest of the number range (both positive and |
654 // negative) all return the same ErrorCode. | 654 // negative) all return the same ErrorCode. |
655 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | 655 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first; |
656 void *aux = evaluators_.begin()->second; | 656 void *aux = evaluators_.begin()->second; |
657 uint32_t oldSysnum = 0; | 657 uint32_t old_sysnum = 0; |
658 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux); | 658 ErrorCode old_err = evaluate_syscall(old_sysnum, aux); |
659 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux); | 659 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux); |
660 for (SyscallIterator iter(false); !iter.Done(); ) { | 660 for (SyscallIterator iter(false); !iter.Done(); ) { |
661 uint32_t sysnum = iter.Next(); | 661 uint32_t sysnum = iter.Next(); |
662 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux); | 662 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux); |
663 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) { | 663 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) { |
664 // A proper sandbox policy should always treat system calls outside of | 664 // A proper sandbox policy should always treat system calls outside of |
665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns | 665 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns |
666 // "false" for SyscallIterator::IsValid()) identically. Typically, all | 666 // "false" for SyscallIterator::IsValid()) identically. Typically, all |
667 // of these system calls would be denied with the same ErrorCode. | 667 // of these system calls would be denied with the same ErrorCode. |
668 SANDBOX_DIE("Invalid seccomp policy"); | 668 SANDBOX_DIE("Invalid seccomp policy"); |
669 } | 669 } |
670 if (!err.Equals(oldErr) || iter.Done()) { | 670 if (!err.Equals(old_err) || iter.Done()) { |
671 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr)); | 671 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err)); |
672 oldSysnum = sysnum; | 672 old_sysnum = sysnum; |
673 oldErr = err; | 673 old_err = err; |
674 } | 674 } |
675 } | 675 } |
676 } | 676 } |
677 | 677 |
678 Instruction *Sandbox::assembleJumpTable(CodeGen *gen, | 678 Instruction *Sandbox::AssembleJumpTable(CodeGen *gen, |
679 Ranges::const_iterator start, | 679 Ranges::const_iterator start, |
680 Ranges::const_iterator stop) { | 680 Ranges::const_iterator stop) { |
681 // We convert the list of system call ranges into jump table that performs | 681 // We convert the list of system call ranges into jump table that performs |
682 // a binary search over the ranges. | 682 // a binary search over the ranges. |
683 // As a sanity check, we need to have at least one distinct ranges for us | 683 // As a sanity check, we need to have at least one distinct ranges for us |
684 // to be able to build a jump table. | 684 // to be able to build a jump table. |
685 if (stop - start <= 0) { | 685 if (stop - start <= 0) { |
686 SANDBOX_DIE("Invalid set of system call ranges"); | 686 SANDBOX_DIE("Invalid set of system call ranges"); |
687 } else if (stop - start == 1) { | 687 } else if (stop - start == 1) { |
688 // If we have narrowed things down to a single range object, we can | 688 // If we have narrowed things down to a single range object, we can |
689 // return from the BPF filter program. | 689 // return from the BPF filter program. |
690 return gen->MakeInstruction(BPF_RET+BPF_K, start->err); | 690 return RetExpression(gen, start->err); |
691 } | 691 } |
692 | 692 |
693 // Pick the range object that is located at the mid point of our list. | 693 // Pick the range object that is located at the mid point of our list. |
694 // We compare our system call number against the lowest valid system call | 694 // We compare our system call number against the lowest valid system call |
695 // number in this range object. If our number is lower, it is outside of | 695 // number in this range object. If our number is lower, it is outside of |
696 // this range object. If it is greater or equal, it might be inside. | 696 // this range object. If it is greater or equal, it might be inside. |
697 Ranges::const_iterator mid = start + (stop - start)/2; | 697 Ranges::const_iterator mid = start + (stop - start)/2; |
698 | 698 |
699 // Sub-divide the list of ranges and continue recursively. | 699 // Sub-divide the list of ranges and continue recursively. |
700 Instruction *jf = assembleJumpTable(gen, start, mid); | 700 Instruction *jf = AssembleJumpTable(gen, start, mid); |
701 Instruction *jt = assembleJumpTable(gen, mid, stop); | 701 Instruction *jt = AssembleJumpTable(gen, mid, stop); |
702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); | 702 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf); |
703 } | 703 } |
704 | 704 |
705 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 705 Instruction *Sandbox::RetExpression(CodeGen *gen, const ErrorCode& cond) { |
706 if (cond.error_type_ == ErrorCode::ET_COND) { | |
707 return CondExpression(gen, cond); | |
708 } else { | |
709 return gen->MakeInstruction(BPF_RET+BPF_K, cond); | |
710 } | |
711 } | |
712 | |
713 Instruction *Sandbox::CondExpression(CodeGen *gen, const ErrorCode& cond) { | |
714 // We can only inspect the six system call arguments that are passed in | |
715 // CPU registers. | |
jln (very slow on Chromium)
2012/12/06 00:35:00
It's even worse than that and is architecture depe
Markus (顧孟勤)
2012/12/12 20:54:35
I am almost certain this is a red herring. Yes, fo
| |
716 if (cond.argno_ < 0 || cond.argno_ >= 6) { | |
717 SANDBOX_DIE("Internal compiler error; invalid argument number " | |
718 "encountered"); | |
719 } | |
720 | |
721 // BPF programs operate on 32bit entities. Load both halfs of the 64bit | |
722 // system call argument and then generate suitable conditional statements. | |
723 Instruction *msb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
724 offsetof(struct arch_seccomp_data, args) + | |
725 cond.argno_ * sizeof(uint64_t) + | |
726 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4)); // Most significant bits | |
727 Instruction *lsb = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, | |
728 offsetof(struct arch_seccomp_data, args) + | |
729 cond.argno_ * sizeof(uint64_t) + | |
730 (__BYTE_ORDER == __BIG_ENDIAN ? 4 : 0)); // Least significant bits | |
731 | |
732 // Emit a suitable comparison statement. | |
733 switch (cond.op_) { | |
734 case ErrorCode::OP_EQUAL: | |
735 // Compare the least significant bits for equality | |
736 gen->JoinInstructions(lsb, | |
737 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, | |
738 static_cast<uint32_t>(cond.value_), | |
739 RetExpression(gen, *cond.passed_), | |
740 RetExpression(gen, *cond.failed_))); | |
741 | |
742 // If we are looking at a 64bit argument, we need to also compare the | |
743 // most significant bits. | |
744 if (cond.width_ == ErrorCode::TP_64BIT) { | |
745 lsb = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, | |
746 static_cast<uint32_t>(cond.value_ >> 32), | |
747 lsb, | |
748 RetExpression(gen, *cond.failed_)); | |
749 } | |
750 break; | |
751 default: | |
752 // TODO(markus): We can only check for equality so far. | |
753 SANDBOX_DIE("Not implemented"); | |
754 break; | |
755 } | |
756 | |
757 // Ensure that we never pass a 64bit value, when we only expect a 32bit | |
758 // value. | |
759 if (cond.width_ == ErrorCode::TP_32BIT) { | |
760 gen->JoinInstructions(msb, | |
761 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0, lsb, | |
762 RetExpression(gen, Kill("Unexpected 64bit argument detected")))); | |
763 } else { | |
764 gen->JoinInstructions(msb, lsb); | |
765 } | |
766 | |
767 return msb; | |
768 } | |
769 | |
770 void Sandbox::SigSys(int nr, siginfo_t *info, void *void_context) { | |
706 // Various sanity checks to make sure we actually received a signal | 771 // Various sanity checks to make sure we actually received a signal |
707 // triggered by a BPF filter. If something else triggered SIGSYS | 772 // triggered by a BPF filter. If something else triggered SIGSYS |
708 // (e.g. kill()), there is really nothing we can do with this signal. | 773 // (e.g. kill()), there is really nothing we can do with this signal. |
709 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || | 774 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context || |
710 info->si_errno <= 0 || | 775 info->si_errno <= 0 || |
711 static_cast<size_t>(info->si_errno) > trapArraySize_) { | 776 static_cast<size_t>(info->si_errno) > trap_array_size_) { |
712 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal | 777 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal |
713 // safe and can lead to bugs. We should eventually implement a different | 778 // safe and can lead to bugs. We should eventually implement a different |
714 // logging and reporting mechanism that is safe to be called from | 779 // logging and reporting mechanism that is safe to be called from |
715 // the sigSys() handler. | 780 // the sigSys() handler. |
716 // TODO: If we feel confident that our code otherwise works correctly, we | 781 // TODO: If we feel confident that our code otherwise works correctly, we |
717 // could actually make an argument that spurious SIGSYS should | 782 // could actually make an argument that spurious SIGSYS should |
718 // just get silently ignored. TBD | 783 // just get silently ignored. TBD |
719 sigsys_err: | 784 sigsys_err: |
720 SANDBOX_DIE("Unexpected SIGSYS received"); | 785 SANDBOX_DIE("Unexpected SIGSYS received"); |
721 } | 786 } |
(...skipping 23 matching lines...) Expand all Loading... | |
745 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { | 810 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { |
746 errno = old_errno; | 811 errno = old_errno; |
747 if (sigsys.nr == __NR_clone) { | 812 if (sigsys.nr == __NR_clone) { |
748 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); | 813 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); |
749 } | 814 } |
750 rc = SandboxSyscall(sigsys.nr, | 815 rc = SandboxSyscall(sigsys.nr, |
751 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), | 816 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), |
752 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), | 817 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), |
753 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); | 818 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); |
754 } else { | 819 } else { |
755 const ErrorCode& err = trapArray_[info->si_errno - 1]; | 820 const ErrorCode& err = trap_array_[info->si_errno - 1]; |
756 if (!err.safe_) { | 821 if (!err.safe_) { |
757 SetIsInSigHandler(); | 822 SetIsInSigHandler(); |
758 } | 823 } |
759 | 824 |
760 // Copy the seccomp-specific data into a arch_seccomp_data structure. This | 825 // Copy the seccomp-specific data into a arch_seccomp_data structure. This |
761 // is what we are showing to TrapFnc callbacks that the system call | 826 // is what we are showing to TrapFnc callbacks that the system call |
762 // evaluator registered with the sandbox. | 827 // evaluator registered with the sandbox. |
763 struct arch_seccomp_data data = { | 828 struct arch_seccomp_data data = { |
764 sigsys.nr, | 829 sigsys.nr, |
765 SECCOMP_ARCH, | 830 SECCOMP_ARCH, |
(...skipping 30 matching lines...) Expand all Loading... | |
796 } else { | 861 } else { |
797 return safe < o.safe; | 862 return safe < o.safe; |
798 } | 863 } |
799 } | 864 } |
800 | 865 |
801 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, | 866 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, |
802 bool safe) { | 867 bool safe) { |
803 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance | 868 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance |
804 // of a SECCOMP_RET_TRAP. | 869 // of a SECCOMP_RET_TRAP. |
805 TrapKey key(fnc, aux, safe); | 870 TrapKey key(fnc, aux, safe); |
806 TrapIds::const_iterator iter = trapIds_.find(key); | 871 TrapIds::const_iterator iter = trap_ids_.find(key); |
807 uint16_t id; | 872 uint16_t id; |
808 if (iter != trapIds_.end()) { | 873 if (iter != trap_ids_.end()) { |
809 // We have seen this pair before. Return the same id that we assigned | 874 // We have seen this pair before. Return the same id that we assigned |
810 // earlier. | 875 // earlier. |
811 id = iter->second; | 876 id = iter->second; |
812 } else { | 877 } else { |
813 // This is a new pair. Remember it and assign a new id. | 878 // This is a new pair. Remember it and assign a new id. |
814 // Please note that we have to store traps in memory that doesn't get | 879 // Please note that we have to store traps in memory that doesn't get |
815 // deallocated when the program is shutting down. A memory leak is | 880 // deallocated when the program is shutting down. A memory leak is |
816 // intentional, because we might otherwise not be able to execute | 881 // intentional, because we might otherwise not be able to execute |
817 // system calls part way through the program shutting down | 882 // system calls part way through the program shutting down |
818 if (!traps_) { | 883 if (!traps_) { |
819 traps_ = new Traps(); | 884 traps_ = new Traps(); |
820 } | 885 } |
821 if (traps_->size() >= SECCOMP_RET_DATA) { | 886 if (traps_->size() >= SECCOMP_RET_DATA) { |
822 // In practice, this is pretty much impossible to trigger, as there | 887 // In practice, this is pretty much impossible to trigger, as there |
823 // are other kernel limitations that restrict overall BPF program sizes. | 888 // are other kernel limitations that restrict overall BPF program sizes. |
824 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); | 889 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); |
825 } | 890 } |
826 id = traps_->size() + 1; | 891 id = traps_->size() + 1; |
827 | 892 |
828 traps_->push_back(ErrorCode(fnc, aux, safe, id)); | 893 traps_->push_back(ErrorCode(fnc, aux, safe, id)); |
829 trapIds_[key] = id; | 894 trap_ids_[key] = id; |
830 | 895 |
831 // We want to access the traps_ vector from our signal handler. But | 896 // We want to access the traps_ vector from our signal handler. But |
832 // we are not assured that doing so is async-signal safe. On the other | 897 // we are not assured that doing so is async-signal safe. On the other |
833 // hand, C++ guarantees that the contents of a vector is stored in a | 898 // hand, C++ guarantees that the contents of a vector is stored in a |
834 // contiguous C-style array. | 899 // contiguous C-style array. |
835 // So, we look up the address and size of this array outside of the | 900 // So, we look up the address and size of this array outside of the |
836 // signal handler, where we can safely do so. | 901 // signal handler, where we can safely do so. |
837 trapArray_ = &(*traps_)[0]; | 902 trap_array_ = &(*traps_)[0]; |
838 trapArraySize_ = id; | 903 trap_array_size_ = id; |
839 return traps_->back(); | 904 return traps_->back(); |
840 } | 905 } |
841 | 906 |
842 return ErrorCode(fnc, aux, safe, id); | 907 return ErrorCode(fnc, aux, safe, id); |
843 } | 908 } |
844 | 909 |
845 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { | 910 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { |
846 return MakeTrap(fnc, aux, true /* Safe Trap */); | 911 return MakeTrap(fnc, aux, true /* Safe Trap */); |
847 } | 912 } |
848 | 913 |
(...skipping 13 matching lines...) Expand all Loading... | |
862 | 927 |
863 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { | 928 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { |
864 // TrapFnc functions report error by following the native kernel convention | 929 // TrapFnc functions report error by following the native kernel convention |
865 // of returning an exit code in the range of -1..-4096. They do not try to | 930 // of returning an exit code in the range of -1..-4096. They do not try to |
866 // set errno themselves. The glibc wrapper that triggered the SIGSYS will | 931 // set errno themselves. The glibc wrapper that triggered the SIGSYS will |
867 // ultimately do so for us. | 932 // ultimately do so for us. |
868 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; | 933 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; |
869 return -err; | 934 return -err; |
870 } | 935 } |
871 | 936 |
872 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { | 937 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width, |
938 ErrorCode::Operation op, uint64_t value, | |
939 const ErrorCode& passed, const ErrorCode& failed) { | |
940 return ErrorCode(argno, width, op, value, | |
941 &*conds_.insert(passed).first, | |
942 &*conds_.insert(failed).first); | |
943 } | |
944 | |
945 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) { | |
873 SANDBOX_DIE(static_cast<char *>(aux)); | 946 SANDBOX_DIE(static_cast<char *>(aux)); |
874 } | 947 } |
875 | 948 |
876 ErrorCode Sandbox::Kill(const char *msg) { | 949 ErrorCode Sandbox::Kill(const char *msg) { |
877 return Trap(bpfFailure, const_cast<char *>(msg)); | 950 return Trap(BpfFailure, const_cast<char *>(msg)); |
878 } | 951 } |
879 | 952 |
880 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 953 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
881 int Sandbox::proc_fd_ = -1; | 954 int Sandbox::proc_fd_ = -1; |
882 Sandbox::Evaluators Sandbox::evaluators_; | 955 Sandbox::Evaluators Sandbox::evaluators_; |
883 Sandbox::Traps *Sandbox::traps_ = NULL; | 956 Sandbox::Traps *Sandbox::traps_ = NULL; |
884 Sandbox::TrapIds Sandbox::trapIds_; | 957 Sandbox::TrapIds Sandbox::trap_ids_; |
885 ErrorCode *Sandbox::trapArray_ = NULL; | 958 ErrorCode *Sandbox::trap_array_ = NULL; |
886 size_t Sandbox::trapArraySize_ = 0; | 959 size_t Sandbox::trap_array_size_ = 0; |
887 bool Sandbox::has_unsafe_traps_ = false; | 960 bool Sandbox::has_unsafe_traps_ = false; |
961 Sandbox::Conds Sandbox::conds_; | |
888 | 962 |
889 } // namespace | 963 } // namespace |
OLD | NEW |