| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 6 #include "sandbox/linux/seccomp-bpf/verifier.h" | 6 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 7 | 7 |
| 8 // The kernel gives us a sandbox, we turn it into a playground :-) | 8 // The kernel gives us a sandbox, we turn it into a playground :-) |
| 9 // This is version 2 of the playground; version 1 was built on top of | 9 // This is version 2 of the playground; version 1 was built on top of |
| 10 // pre-BPF seccomp mode. | 10 // pre-BPF seccomp mode. |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 206 sb.st_nlink != 3 || | 206 sb.st_nlink != 3 || |
| 207 HANDLE_EINTR(close(task))) { | 207 HANDLE_EINTR(close(task))) { |
| 208 if (task >= 0) { | 208 if (task >= 0) { |
| 209 if (HANDLE_EINTR(close(task))) { } | 209 if (HANDLE_EINTR(close(task))) { } |
| 210 } | 210 } |
| 211 return false; | 211 return false; |
| 212 } | 212 } |
| 213 return true; | 213 return true; |
| 214 } | 214 } |
| 215 | 215 |
| 216 static bool isDenied(Sandbox::ErrorCode code) { |
| 217 return code == Sandbox::SB_TRAP || |
| 218 (code >= (Sandbox::ErrorCode)1 && |
| 219 code <= (Sandbox::ErrorCode)4095); // errno value |
| 220 } |
| 221 |
| 222 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, |
| 223 EvaluateArguments) { |
| 224 // Do some sanity checks on the policy. This will warn users if they do |
| 225 // things that are likely unsafe and unintended. |
| 226 // We also have similar checks later, when we actually compile the BPF |
| 227 // program. That catches problems with incorrectly stacked evaluators. |
| 228 if (!isDenied(syscallEvaluator(-1))) { |
| 229 die("Negative system calls should always be disallowed by policy"); |
| 230 } |
| 231 #ifndef NDEBUG |
| 232 #if defined(__i386__) || defined(__x86_64__) |
| 233 #if defined(__x86_64__) && defined(__ILP32__) |
| 234 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u; |
| 235 sysnum <= (MAX_SYSCALL & ~0x40000000u); |
| 236 ++sysnum) { |
| 237 if (!isDenied(syscallEvaluator(sysnum))) { |
| 238 die("In x32 mode, you should not allow any non-x32 system calls"); |
| 239 } |
| 240 } |
| 241 #else |
| 242 for (unsigned int sysnum = MIN_SYSCALL | 0x40000000u; |
| 243 sysnum <= (MAX_SYSCALL | 0x40000000u); |
| 244 ++sysnum) { |
| 245 if (!isDenied(syscallEvaluator(sysnum))) { |
| 246 die("x32 system calls should be explicitly disallowed"); |
| 247 } |
| 248 } |
| 249 #endif |
| 250 #endif |
| 251 #endif |
| 252 // Check interesting boundary values just outside of the valid system call |
| 253 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1. |
| 254 // They all should be denied. |
| 255 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) || |
| 256 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) || |
| 257 !isDenied(syscallEvaluator(-1)) || |
| 258 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) || |
| 259 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) { |
| 260 die("Even for default-allow policies, you must never allow system calls " |
| 261 "outside of the standard system call range"); |
| 262 } |
| 263 return; |
| 264 } |
| 265 |
| 216 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | 266 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, |
| 217 EvaluateArguments argumentEvaluator) { | 267 EvaluateArguments argumentEvaluator) { |
| 268 policySanityChecks(syscallEvaluator, argumentEvaluator); |
| 218 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | 269 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); |
| 219 } | 270 } |
| 220 | 271 |
| 221 void Sandbox::installFilter() { | 272 void Sandbox::installFilter() { |
| 222 // Verify that the user pushed a policy. | 273 // Verify that the user pushed a policy. |
| 223 if (evaluators_.empty()) { | 274 if (evaluators_.empty()) { |
| 224 filter_failed: | 275 filter_failed: |
| 225 die("Failed to configure system call filters"); | 276 die("Failed to configure system call filters"); |
| 226 } | 277 } |
| 227 | 278 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 281 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); | 332 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); |
| 282 #else | 333 #else |
| 283 program->push_back((struct sock_filter) | 334 program->push_back((struct sock_filter) |
| 284 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); | 335 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); |
| 285 #endif | 336 #endif |
| 286 // TODO: raise a suitable SIGSYS signal | 337 // TODO: raise a suitable SIGSYS signal |
| 287 program->push_back((struct sock_filter) | 338 program->push_back((struct sock_filter) |
| 288 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); | 339 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); |
| 289 #endif | 340 #endif |
| 290 | 341 |
| 291 // Evaluate all possible system calls and depending on their | 342 // Evaluate all possible system calls and group their ErrorCodes into |
| 292 // exit codes generate a BPF filter. | 343 // ranges of identical codes. |
| 293 // This is very inefficient right now. We need to be much smarter | 344 Ranges ranges; |
| 294 // eventually. | 345 findRanges(&ranges); |
| 295 // We currently incur a O(N) overhead on each system call, with N | 346 |
| 296 // being the number of system calls. It is easy to get this down to | 347 // Compile the system call ranges to an optimized BPF program. |
| 297 // O(log_2(M)) with M being the number of system calls that need special | 348 rangesToBPF(program, ranges); |
| 298 // treatment. | |
| 299 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | |
| 300 for (uint32_t sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) { | |
| 301 ErrorCode err = evaluateSyscall(sysnum); | |
| 302 int ret; | |
| 303 switch (err) { | |
| 304 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | |
| 305 die("Not implemented"); | |
| 306 case SB_TRAP: | |
| 307 ret = SECCOMP_RET_TRAP; | |
| 308 break; | |
| 309 case SB_ALLOWED: | |
| 310 ret = SECCOMP_RET_ALLOW; | |
| 311 break; | |
| 312 default: | |
| 313 if (err >= static_cast<ErrorCode>(1) && | |
| 314 err <= static_cast<ErrorCode>(4096)) { | |
| 315 // We limit errno values to a reasonable range. In fact, the Linux ABI | |
| 316 // doesn't support errno values outside of this range. | |
| 317 ret = SECCOMP_RET_ERRNO + err; | |
| 318 } else { | |
| 319 die("Invalid ErrorCode reported by sandbox system call evaluator"); | |
| 320 } | |
| 321 break; | |
| 322 } | |
| 323 if (sysnum <= MAX_SYSCALL) { | |
| 324 // We compute the default behavior (e.g. fail open or fail closed) by | |
| 325 // calling the system call evaluator with a system call bigger than | |
| 326 // MAX_SYSCALL. | |
| 327 // In other words, the very last iteration in our loop becomes the | |
| 328 // fallback case and we don't need to do any comparisons. | |
| 329 program->push_back((struct sock_filter) | |
| 330 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
| 331 } | |
| 332 program->push_back((struct sock_filter) | |
| 333 BPF_STMT(BPF_RET+BPF_K, ret)); | |
| 334 } | |
| 335 | 349 |
| 336 // Everything that isn't allowed is forbidden. Eventually, we would | 350 // Everything that isn't allowed is forbidden. Eventually, we would |
| 337 // like to have a way to log forbidden calls, when in debug mode. | 351 // like to have a way to log forbidden calls, when in debug mode. |
| 338 // TODO: raise a suitable SIGSYS signal | |
| 339 program->push_back((struct sock_filter) | 352 program->push_back((struct sock_filter) |
| 340 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); | 353 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); |
| 341 | 354 |
| 342 // Make sure compilation resulted in BPF program that executes | 355 // Make sure compilation resulted in BPF program that executes |
| 343 // correctly. Otherwise, there is an internal error in our BPF compiler. | 356 // correctly. Otherwise, there is an internal error in our BPF compiler. |
| 344 // There is really nothing the caller can do until the bug is fixed. | 357 // There is really nothing the caller can do until the bug is fixed. |
| 345 #ifndef NDEBUG | 358 #ifndef NDEBUG |
| 346 const char *err = NULL; | 359 const char *err = NULL; |
| 347 if (!Verifier::verifyBPF(*program, evaluators_, &err)) { | 360 if (!Verifier::verifyBPF(*program, evaluators_, &err)) { |
| 348 die(err); | 361 die(err); |
| 349 } | 362 } |
| 350 #endif | 363 #endif |
| (...skipping 19 matching lines...) Expand all Loading... |
| 370 die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs"); | 383 die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs"); |
| 371 } else { | 384 } else { |
| 372 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | 385 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { |
| 373 die(dryRun_ ? NULL : "Kernel refuses to turn on BPF filters"); | 386 die(dryRun_ ? NULL : "Kernel refuses to turn on BPF filters"); |
| 374 } | 387 } |
| 375 } | 388 } |
| 376 | 389 |
| 377 return; | 390 return; |
| 378 } | 391 } |
| 379 | 392 |
| 393 void Sandbox::findRanges(Ranges *ranges) { |
| 394 // Please note that "struct seccomp_data" defines system calls as a signed |
| 395 // int32_t, but BPF instructions always operate on unsigned quantities. We |
| 396 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, |
| 397 // and then verifying that the rest of the number range (both positive and |
| 398 // negative) all return the same ErrorCode. |
| 399 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| 400 uint32_t oldSysnum = 0; |
| 401 ErrorCode oldErr = evaluateSyscall(oldSysnum); |
| 402 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL); |
| 403 sysnum <= MAX_SYSCALL + 1; |
| 404 ++sysnum) { |
| 405 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum)); |
| 406 if (err != oldErr) { |
| 407 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr)); |
| 408 oldSysnum = sysnum; |
| 409 oldErr = err; |
| 410 } |
| 411 } |
| 412 |
| 413 // As we looped all the way past the valid system calls (i.e. MAX_SYSCALL+1), |
| 414 // "oldErr" should at this point be the "default" policy for all system call |
| 415 // numbers that don't have an explicit handler in the system call evaluator. |
| 416 // But as we are quite paranoid, we perform some more sanity checks to verify |
| 417 // that there actually is a consistent "default" policy in the first place. |
| 418 // We don't actually iterate over all possible 2^32 values, though. We just |
| 419 // perform spot checks at the boundaries. |
| 420 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF. |
| 421 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || |
| 422 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) || |
| 423 oldErr != evaluateSyscall(-1)) { |
| 424 die("Invalid seccomp policy"); |
| 425 } |
| 426 ranges->push_back( |
| 427 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); |
| 428 } |
| 429 |
| 430 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) { |
| 431 // TODO: We currently search linearly through all ranges. An improved |
| 432 // algorithm should be doing a binary search. |
| 433 |
| 434 // System call ranges must cover the entire number range. |
| 435 if (ranges.empty() || |
| 436 ranges.begin()->from != 0 || |
| 437 ranges.back().to != std::numeric_limits<unsigned>::max()) { |
| 438 rangeError: |
| 439 die("Invalid set of system call ranges"); |
| 440 } |
| 441 uint32_t from = 0; |
| 442 for (Ranges::const_iterator iter = ranges.begin(); |
| 443 iter != ranges.end(); |
| 444 ++iter) { |
| 445 // Ranges must be contiguous and monotonically increasing. |
| 446 if (iter->from > iter->to || |
| 447 iter->from != from) { |
| 448 goto rangeError; |
| 449 } |
| 450 from = iter->to + 1; |
| 451 |
| 452 // Convert ErrorCodes to return values that are acceptable for |
| 453 // BPF filters. |
| 454 int ret; |
| 455 switch (iter->err) { |
| 456 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: |
| 457 die("Not implemented"); |
| 458 case SB_TRAP: |
| 459 ret = SECCOMP_RET_TRAP; |
| 460 break; |
| 461 case SB_ALLOWED: |
| 462 ret = SECCOMP_RET_ALLOW; |
| 463 break; |
| 464 default: |
| 465 if (iter->err >= static_cast<ErrorCode>(1) && |
| 466 iter->err <= static_cast<ErrorCode>(4096)) { |
| 467 // We limit errno values to a reasonable range. In fact, the Linux ABI |
| 468 // doesn't support errno values outside of this range. |
| 469 ret = SECCOMP_RET_ERRNO + iter->err; |
| 470 } else { |
| 471 die("Invalid ErrorCode reported by sandbox system call evaluator"); |
| 472 } |
| 473 break; |
| 474 } |
| 475 |
| 476 // Emit BPF instructions matching this range. |
| 477 if (iter->to != std::numeric_limits<unsigned>::max()) { |
| 478 program->push_back((struct sock_filter) |
| 479 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0)); |
| 480 } |
| 481 program->push_back((struct sock_filter) |
| 482 BPF_STMT(BPF_RET+BPF_K, ret)); |
| 483 } |
| 484 return; |
| 485 } |
| 486 |
| 380 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 487 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| 381 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | 488 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { |
| 382 // die() can call LOG(FATAL). This is not normally async-signal safe | 489 // die() can call LOG(FATAL). This is not normally async-signal safe |
| 383 // and can lead to bugs. We should eventually implement a different | 490 // and can lead to bugs. We should eventually implement a different |
| 384 // logging and reporting mechanism that is safe to be called from | 491 // logging and reporting mechanism that is safe to be called from |
| 385 // the sigSys() handler. | 492 // the sigSys() handler. |
| 386 die("Unexpected SIGSYS received"); | 493 die("Unexpected SIGSYS received"); |
| 387 } | 494 } |
| 388 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); | 495 ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context); |
| 389 int old_errno = errno; | 496 int old_errno = errno; |
| (...skipping 17 matching lines...) Expand all Loading... |
| 407 return; | 514 return; |
| 408 } | 515 } |
| 409 | 516 |
| 410 | 517 |
| 411 bool Sandbox::dryRun_ = false; | 518 bool Sandbox::dryRun_ = false; |
| 412 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 519 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 413 int Sandbox::proc_fd_ = -1; | 520 int Sandbox::proc_fd_ = -1; |
| 414 Sandbox::Evaluators Sandbox::evaluators_; | 521 Sandbox::Evaluators Sandbox::evaluators_; |
| 415 | 522 |
| 416 } // namespace | 523 } // namespace |
| OLD | NEW |