Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 6 #include "sandbox/linux/seccomp-bpf/verifier.h" | 6 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 7 | 7 |
| 8 // The kernel gives us a sandbox, we turn it into a playground :-) | 8 // The kernel gives us a sandbox, we turn it into a playground :-) |
| 9 // This is version 2 of the playground; version 1 was built on top of | 9 // This is version 2 of the playground; version 1 was built on top of |
| 10 // pre-BPF seccomp mode. | 10 // pre-BPF seccomp mode. |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 207 } | 207 } |
| 208 | 208 |
| 209 // We can't handle stacked evaluators, yet. We'll get there eventually | 209 // We can't handle stacked evaluators, yet. We'll get there eventually |
| 210 // though. Hang tight. | 210 // though. Hang tight. |
| 211 if (evaluators_.size() != 1) { | 211 if (evaluators_.size() != 1) { |
| 212 die("Not implemented"); | 212 die("Not implemented"); |
| 213 } | 213 } |
| 214 | 214 |
| 215 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 215 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
| 216 // system call. | 216 // system call. |
| 217 std::vector<struct sock_filter> program; | 217 Program program; |
| 218 program.push_back((struct sock_filter) | 218 program.push_back((struct sock_filter) |
| 219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); | 219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); |
| 220 program.push_back((struct sock_filter) | 220 program.push_back((struct sock_filter) |
| 221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | 221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); |
| 222 | 222 |
| 223 // TODO: Instead of killing outright, we should raise a SIGSYS and | 223 // TODO: Instead of killing outright, we should raise a SIGSYS and |
| 224 // report a useful error message. SIGKILL cannot be trapped by the | 224 // report a useful error message. SIGKILL cannot be trapped by the |
| 225 // debugger and essentially makes the program fail in a way that is | 225 // debugger and essentially makes the program fail in a way that is |
| 226 // almost impossible to debug. | 226 // almost impossible to debug. |
| 227 program.push_back((struct sock_filter) | 227 program.push_back((struct sock_filter) |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); | 240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); |
| 241 #else | 241 #else |
| 242 program.push_back((struct sock_filter) | 242 program.push_back((struct sock_filter) |
| 243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); | 243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); |
| 244 #endif | 244 #endif |
| 245 // TODO: raise a suitable SIGSYS signal | 245 // TODO: raise a suitable SIGSYS signal |
| 246 program.push_back((struct sock_filter) | 246 program.push_back((struct sock_filter) |
| 247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); | 247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); |
| 248 #endif | 248 #endif |
| 249 | 249 |
| 250 // Evaluate all possible system calls and depending on their | 250 // Evaluate all possible system calls and group their ErrorCodes into |
| 251 // exit codes generate a BPF filter. | 251 // ranges of identical codes. |
| 252 // This is very inefficient right now. We need to be much smarter | 252 Ranges ranges; |
| 253 // eventually. | 253 findRanges(&ranges); |
| 254 // We currently incur a O(N) overhead on each system call, with N | 254 |
| 255 // being the number of system calls. It is easy to get this down to | 255 // Compile the system call ranges to an optimized BPF program |
| 256 // O(log_2(M)) with M being the number of system calls that need special | 256 rangesToBPF(&program, ranges); |
| 257 // treatment. | 257 |
| 258 // Everything that isn't allowed is forbidden. Eventually, we would | |
| 259 // like to have a way to log forbidden calls, when in debug mode. | |
| 260 program.push_back((struct sock_filter) | |
| 261 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
| 262 | |
| 263 // Make sure compilation resulted in BPF program that executes | |
| 264 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 265 // There is really nothing the caller can do until the bug is fixed. | |
| 266 const char *err; | |
| 267 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 268 die(err); | |
| 269 } | |
| 270 | |
| 271 // Install BPF filter program | |
| 272 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 273 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 274 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
| 275 goto filter_failed; | |
| 276 } | |
| 277 | |
| 278 return; | |
| 279 } | |
| 280 | |
| 281 void Sandbox::findRanges(Ranges *ranges) { | |
| 282 // Please note that "struct seccomp_data" defines system calls as a signed | |
| 283 // int32_t, but BPF instructions always operate on unsigned quantities. We | |
| 284 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | |
| 285 // and then verifying that the rest of the number range (both positive and | |
| 286 // negative) all return the same ErrorCode. | |
| 287 // We don't actually iterate over all possible 2^32 values, though. We just | |
| 288 // perform spot checks at the boundaries. | |
| 258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | 289 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| 259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) { | 290 uint32_t oldSysnum = 0; |
| 260 ErrorCode err = evaluateSyscall(sysnum); | 291 ErrorCode oldErr = evaluateSyscall(oldSysnum); |
| 292 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL); | |
| 293 sysnum <= MAX_SYSCALL; | |
| 294 ++sysnum) { | |
| 295 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum)); | |
| 296 if (err != oldErr) { | |
| 297 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr)); | |
| 298 oldSysnum = sysnum; | |
| 299 oldErr = err; | |
| 300 } | |
| 301 } | |
| 302 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || | |
|
jln (very slow on Chromium)
2012/06/08 22:38:21
This makes it hard to review.
I would either:
- H
Markus (顧孟勤)
2012/06/09 00:30:58
This is just a glorified assert() statement. Unfor
| |
| 303 oldErr != evaluateSyscall(std::numeric_limits<int>::max() + 1) || | |
| 304 oldErr != evaluateSyscall(std::numeric_limits<unsigned>::max())) { | |
| 305 die("Invalid seccomp policy"); | |
| 306 } | |
| 307 ranges->push_back( | |
| 308 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); | |
| 309 } | |
| 310 | |
| 311 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) { | |
| 312 // TODO: We currently search linearly through all ranges. An improved | |
| 313 // algorithm should be doing a binary search. | |
| 314 | |
| 315 // System call ranges must cover the entire number range. | |
| 316 if (ranges.empty() || | |
| 317 ranges.begin()->from != 0 || | |
| 318 ranges.back().to != std::numeric_limits<unsigned>::max()) { | |
| 319 rangeError: | |
| 320 die("Invalid set of system call ranges"); | |
| 321 } | |
| 322 uint32_t last = static_cast<uint32_t>(-1); | |
|
jln (very slow on Chromium)
2012/06/08 22:38:21
I know this is correct and allowed by standards, b
Markus (顧孟勤)
2012/06/09 00:30:58
I slightly changed the logic, and can thus avoid t
| |
| 323 for (Ranges::const_iterator iter = ranges.begin(); | |
| 324 iter != ranges.end(); | |
| 325 ++iter) { | |
| 326 // Ranges most be contiguous and monotonically increasing. | |
|
jln (very slow on Chromium)
2012/06/08 22:38:21
s/most/must
Markus (顧孟勤)
2012/06/09 00:30:58
Done.
| |
| 327 if (iter->from > iter->to || | |
| 328 iter->from != last+1) { | |
|
jln (very slow on Chromium)
2012/06/08 22:38:21
nit: last + 1 (spaces)
also add a comment explain
| |
| 329 goto rangeError; | |
| 330 } | |
| 331 last = iter->to; | |
| 332 | |
| 333 // Convert ErrorCodes to return values that are acceptable for | |
| 334 // BPF filters. | |
| 261 int ret; | 335 int ret; |
| 262 switch (err) { | 336 switch (iter->err) { |
| 263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | 337 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: |
| 264 die("Not implemented"); | 338 die("Not implemented"); |
| 265 case SB_TRAP: | 339 case SB_TRAP: |
| 266 ret = SECCOMP_RET_TRAP; | 340 ret = SECCOMP_RET_TRAP; |
| 267 break; | 341 break; |
| 268 case SB_ALLOWED: | 342 case SB_ALLOWED: |
| 269 ret = SECCOMP_RET_ALLOW; | 343 ret = SECCOMP_RET_ALLOW; |
| 270 break; | 344 break; |
| 271 default: | 345 default: |
| 272 if (err >= static_cast<ErrorCode>(1) && | 346 if (iter->err >= static_cast<ErrorCode>(1) && |
| 273 err <= static_cast<ErrorCode>(4096)) { | 347 iter->err <= static_cast<ErrorCode>(4096)) { |
| 274 // We limit errno values to a reasonable range. In fact, the Linux ABI | 348 // We limit errno values to a reasonable range. In fact, the Linux ABI |
| 275 // doesn't support errno values outside of this range. | 349 // doesn't support errno values outside of this range. |
| 276 ret = SECCOMP_RET_ERRNO + err; | 350 ret = SECCOMP_RET_ERRNO + iter->err; |
| 277 } else { | 351 } else { |
| 278 die("Invalid ErrorCode reported by sandbox system call evaluator"); | 352 die("Invalid ErrorCode reported by sandbox system call evaluator"); |
| 279 } | 353 } |
| 280 break; | 354 break; |
| 281 } | 355 } |
| 282 if (sysnum <= MAX_SYSCALL) { | 356 |
| 283 // We compute the default behavior (e.g. fail open or fail closed) by | 357 // Emit BPF instructions matching this range. |
| 284 // calling the system call evaluator with a system call bigger than | 358 if (iter->to != std::numeric_limits<unsigned>::max()) { |
| 285 // MAX_SYSCALL. | 359 program->push_back((struct sock_filter) |
| 286 // In other words, the very last iteration in our loop becomes the | 360 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0)); |
| 287 // fallback case and we don't need to do any comparisons. | |
| 288 program.push_back((struct sock_filter) | |
| 289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
| 290 } | 361 } |
| 291 program.push_back((struct sock_filter) | 362 program->push_back((struct sock_filter) |
| 292 BPF_STMT(BPF_RET+BPF_K, ret)); | 363 BPF_STMT(BPF_RET+BPF_K, ret)); |
| 293 } | 364 } |
| 294 | |
| 295 // Make sure compilation resulted in BPF program that executes | |
| 296 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 297 // There is really nothing the caller can do until the bug is fixed. | |
| 298 const char *err; | |
| 299 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 300 die(err); | |
| 301 } | |
| 302 | |
| 303 // Install BPF filter program | |
| 304 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
| 307 goto filter_failed; | |
| 308 } | |
| 309 | |
| 310 return; | 365 return; |
| 311 } | 366 } |
| 312 | 367 |
| 313 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 368 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| 314 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | 369 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { |
| 315 // die() can call LOG(FATAL). This is not normally async-signal safe | 370 // die() can call LOG(FATAL). This is not normally async-signal safe |
| 316 // and can lead to bugs. We should eventually implement a different | 371 // and can lead to bugs. We should eventually implement a different |
| 317 // logging and reporting mechanism that is safe to be called from | 372 // logging and reporting mechanism that is safe to be called from |
| 318 // the sigSys() handler. | 373 // the sigSys() handler. |
| 319 die("Unexpected SIGSYS received"); | 374 die("Unexpected SIGSYS received"); |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 340 return; | 395 return; |
| 341 } | 396 } |
| 342 | 397 |
| 343 | 398 |
| 344 bool Sandbox::suppressLogging_ = false; | 399 bool Sandbox::suppressLogging_ = false; |
| 345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 400 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 346 int Sandbox::proc_fd_ = -1; | 401 int Sandbox::proc_fd_ = -1; |
| 347 Sandbox::Evaluators Sandbox::evaluators_; | 402 Sandbox::Evaluators Sandbox::evaluators_; |
| 348 | 403 |
| 349 } // namespace | 404 } // namespace |
| OLD | NEW |