Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 6 #include "sandbox/linux/seccomp-bpf/verifier.h" | 6 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 7 | 7 |
| 8 // The kernel gives us a sandbox, we turn it into a playground :-) | 8 // The kernel gives us a sandbox, we turn it into a playground :-) |
| 9 // This is version 2 of the playground; version 1 was built on top of | 9 // This is version 2 of the playground; version 1 was built on top of |
| 10 // pre-BPF seccomp mode. | 10 // pre-BPF seccomp mode. |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 170 sb.st_nlink != 3 || | 170 sb.st_nlink != 3 || |
| 171 HANDLE_EINTR(close(task))) { | 171 HANDLE_EINTR(close(task))) { |
| 172 if (task >= 0) { | 172 if (task >= 0) { |
| 173 (void) HANDLE_EINTR(close(task)); | 173 (void) HANDLE_EINTR(close(task)); |
| 174 } | 174 } |
| 175 return false; | 175 return false; |
| 176 } | 176 } |
| 177 return true; | 177 return true; |
| 178 } | 178 } |
| 179 | 179 |
| 180 static bool isDenied(Sandbox::ErrorCode code) { | |
| 181 return code == Sandbox::SB_TRAP || | |
| 182 (code >= (Sandbox::ErrorCode)1 && | |
| 183 code <= (Sandbox::ErrorCode)4095); // errno value | |
| 184 } | |
| 185 | |
| 186 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, | |
| 187 EvaluateArguments argumentEvaluator) { | |
|
Chris Evans
2012/06/12 18:11:55
I think this should be debug only. That will be mo
Markus (顧孟勤)
2012/06/12 19:02:59
Done.
I left in the cheap tests, but disabled the
| |
| 188 // Do some sanity checks on the policy. This will warn users if they do | |
| 189 // things that are likely unsafe and unintended. | |
| 190 // We also have similar checks later, when we actually compile the BPF | |
| 191 // program. That catches problems with incorrectly stacked evaluators. | |
| 192 if (!isDenied(syscallEvaluator(-1))) { | |
| 193 die("Negative system calls should always be disallowed by policy"); | |
| 194 } | |
| 195 #if defined(__i386__) || defined(__x86_64__) | |
| 196 #if defined(__x86_64__) && defined(__ILP32__) | |
| 197 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u; | |
| 198 sysnum <= (MAX_SYSCALL & ~0x40000000u); | |
| 199 ++sysnum) { | |
| 200 if (!isDenied(syscallEvaluator(sysnum))) { | |
| 201 die("In x32 mode, you should not allow any non-x32 system calls"); | |
| 202 } | |
| 203 } | |
| 204 #else | |
| 205 for (unsigned int sysnum = MIN_SYSCALL | 0x40000000u; | |
| 206 sysnum <= (MAX_SYSCALL | 0x40000000u); | |
| 207 ++sysnum) { | |
| 208 if (!isDenied(syscallEvaluator(sysnum))) { | |
| 209 die("x32 system calls should be explicitly disallowed"); | |
| 210 } | |
| 211 } | |
| 212 #endif | |
| 213 #endif | |
| 214 // Check interesting boundary values just outside of the valid system call | |
| 215 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1. | |
| 216 // They all should be denied. | |
| 217 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) || | |
| 218 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) || | |
| 219 !isDenied(syscallEvaluator(-1)) || | |
| 220 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) || | |
| 221 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) { | |
| 222 die("Even for default-allow policies, you must never allow system calls " | |
| 223 "outside of the standard system call range"); | |
| 224 } | |
| 225 return; | |
| 226 } | |
| 227 | |
| 180 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | 228 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, |
| 181 EvaluateArguments argumentEvaluator) { | 229 EvaluateArguments argumentEvaluator) { |
| 230 policySanityChecks(syscallEvaluator, argumentEvaluator); | |
| 182 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | 231 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); |
| 183 } | 232 } |
| 184 | 233 |
| 185 void Sandbox::installFilter() { | 234 void Sandbox::installFilter() { |
| 186 // Verify that the user pushed a policy. | 235 // Verify that the user pushed a policy. |
| 187 if (evaluators_.empty()) { | 236 if (evaluators_.empty()) { |
| 188 filter_failed: | 237 filter_failed: |
| 189 die("Failed to configure system call filters"); | 238 die("Failed to configure system call filters"); |
| 190 } | 239 } |
| 191 | 240 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 207 } | 256 } |
| 208 | 257 |
| 209 // We can't handle stacked evaluators, yet. We'll get there eventually | 258 // We can't handle stacked evaluators, yet. We'll get there eventually |
| 210 // though. Hang tight. | 259 // though. Hang tight. |
| 211 if (evaluators_.size() != 1) { | 260 if (evaluators_.size() != 1) { |
| 212 die("Not implemented"); | 261 die("Not implemented"); |
| 213 } | 262 } |
| 214 | 263 |
| 215 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 264 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
| 216 // system call. | 265 // system call. |
| 217 std::vector<struct sock_filter> program; | 266 Program program; |
| 218 program.push_back((struct sock_filter) | 267 program.push_back((struct sock_filter) |
| 219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); | 268 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); |
| 220 program.push_back((struct sock_filter) | 269 program.push_back((struct sock_filter) |
| 221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | 270 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); |
| 222 | 271 |
| 223 // TODO: Instead of killing outright, we should raise a SIGSYS and | 272 // TODO: Instead of killing outright, we should raise a SIGSYS and |
| 224 // report a useful error message. SIGKILL cannot be trapped by the | 273 // report a useful error message. SIGKILL cannot be trapped by the |
| 225 // debugger and essentially makes the program fail in a way that is | 274 // debugger and essentially makes the program fail in a way that is |
| 226 // almost impossible to debug. | 275 // almost impossible to debug. |
| 227 program.push_back((struct sock_filter) | 276 program.push_back((struct sock_filter) |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); | 289 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); |
| 241 #else | 290 #else |
| 242 program.push_back((struct sock_filter) | 291 program.push_back((struct sock_filter) |
| 243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); | 292 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); |
| 244 #endif | 293 #endif |
| 245 // TODO: raise a suitable SIGSYS signal | 294 // TODO: raise a suitable SIGSYS signal |
| 246 program.push_back((struct sock_filter) | 295 program.push_back((struct sock_filter) |
| 247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); | 296 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); |
| 248 #endif | 297 #endif |
| 249 | 298 |
| 250 // Evaluate all possible system calls and depending on their | 299 // Evaluate all possible system calls and group their ErrorCodes into |
| 251 // exit codes generate a BPF filter. | 300 // ranges of identical codes. |
| 252 // This is very inefficient right now. We need to be much smarter | 301 Ranges ranges; |
| 253 // eventually. | 302 findRanges(&ranges); |
| 254 // We currently incur a O(N) overhead on each system call, with N | 303 |
| 255 // being the number of system calls. It is easy to get this down to | 304 // Compile the system call ranges to an optimized BPF program |
| 256 // O(log_2(M)) with M being the number of system calls that need special | 305 rangesToBPF(&program, ranges); |
| 257 // treatment. | 306 |
| 307 // Everything that isn't allowed is forbidden. Eventually, we would | |
| 308 // like to have a way to log forbidden calls, when in debug mode. | |
| 309 program.push_back((struct sock_filter) | |
| 310 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
| 311 | |
| 312 // Make sure compilation resulted in BPF program that executes | |
| 313 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 314 // There is really nothing the caller can do until the bug is fixed. | |
|
Chris Evans
2012/06/12 18:13:47
Actually one more thing. I'm not entire groking th
Markus (顧孟勤)
2012/06/12 19:02:59
I'll upload a newly rebased version shortly, and i
| |
| 315 const char *err; | |
| 316 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 317 die(err); | |
| 318 } | |
| 319 | |
| 320 // Install BPF filter program | |
| 321 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 322 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 323 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
|
Chris Evans
2012/06/12 18:11:55
Nit (for the future): it'd be nice to differentiat
Markus (顧孟勤)
2012/06/12 19:02:59
Already fixed by rebasing.
| |
| 324 goto filter_failed; | |
| 325 } | |
| 326 | |
| 327 return; | |
| 328 } | |
| 329 | |
| 330 void Sandbox::findRanges(Ranges *ranges) { | |
| 331 // Please note that "struct seccomp_data" defines system calls as a signed | |
| 332 // int32_t, but BPF instructions always operate on unsigned quantities. We | |
| 333 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | |
| 334 // and then verifying that the rest of the number range (both positive and | |
| 335 // negative) all return the same ErrorCode. | |
| 258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | 336 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| 259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) { | 337 uint32_t oldSysnum = 0; |
| 260 ErrorCode err = evaluateSyscall(sysnum); | 338 ErrorCode oldErr = evaluateSyscall(oldSysnum); |
| 339 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL); | |
| 340 sysnum <= MAX_SYSCALL + 1; | |
| 341 ++sysnum) { | |
| 342 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum)); | |
| 343 if (err != oldErr) { | |
| 344 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr)); | |
| 345 oldSysnum = sysnum; | |
| 346 oldErr = err; | |
| 347 } | |
| 348 } | |
| 349 | |
| 350 // As we looped all the way past the valid system calls (i.e. MAX_SYSCALL+1), | |
| 351 // "oldErr" should at this point be the "default" policy for all system call | |
| 352 // numbers that don't have an explicit handler in the system call evaluator. | |
| 353 // But as we are quite paranoid, we perform some more sanity checks to verify | |
| 354 // that there actually is a consistent "default" policy in the first place. | |
| 355 // We don't actually iterate over all possible 2^32 values, though. We just | |
| 356 // perform spot checks at the boundaries. | |
| 357 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF. | |
| 358 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || | |
| 359 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) || | |
| 360 oldErr != evaluateSyscall(-1)) { | |
| 361 die("Invalid seccomp policy"); | |
| 362 } | |
| 363 ranges->push_back( | |
| 364 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); | |
| 365 } | |
| 366 | |
| 367 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) { | |
| 368 // TODO: We currently search linearly through all ranges. An improved | |
| 369 // algorithm should be doing a binary search. | |
| 370 | |
| 371 // System call ranges must cover the entire number range. | |
| 372 if (ranges.empty() || | |
| 373 ranges.begin()->from != 0 || | |
| 374 ranges.back().to != std::numeric_limits<unsigned>::max()) { | |
| 375 rangeError: | |
| 376 die("Invalid set of system call ranges"); | |
| 377 } | |
| 378 uint32_t from = 0; | |
| 379 for (Ranges::const_iterator iter = ranges.begin(); | |
| 380 iter != ranges.end(); | |
| 381 ++iter) { | |
| 382 // Ranges must be contiguous and monotonically increasing. | |
| 383 if (iter->from > iter->to || | |
| 384 iter->from != from) { | |
| 385 goto rangeError; | |
| 386 } | |
| 387 from = iter->to + 1; | |
| 388 | |
| 389 // Convert ErrorCodes to return values that are acceptable for | |
| 390 // BPF filters. | |
| 261 int ret; | 391 int ret; |
| 262 switch (err) { | 392 switch (iter->err) { |
| 263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | 393 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: |
| 264 die("Not implemented"); | 394 die("Not implemented"); |
| 265 case SB_TRAP: | 395 case SB_TRAP: |
| 266 ret = SECCOMP_RET_TRAP; | 396 ret = SECCOMP_RET_TRAP; |
| 267 break; | 397 break; |
| 268 case SB_ALLOWED: | 398 case SB_ALLOWED: |
| 269 ret = SECCOMP_RET_ALLOW; | 399 ret = SECCOMP_RET_ALLOW; |
| 270 break; | 400 break; |
| 271 default: | 401 default: |
| 272 if (err >= static_cast<ErrorCode>(1) && | 402 if (iter->err >= static_cast<ErrorCode>(1) && |
| 273 err <= static_cast<ErrorCode>(4096)) { | 403 iter->err <= static_cast<ErrorCode>(4096)) { |
| 274 // We limit errno values to a reasonable range. In fact, the Linux ABI | 404 // We limit errno values to a reasonable range. In fact, the Linux ABI |
| 275 // doesn't support errno values outside of this range. | 405 // doesn't support errno values outside of this range. |
| 276 ret = SECCOMP_RET_ERRNO + err; | 406 ret = SECCOMP_RET_ERRNO + iter->err; |
| 277 } else { | 407 } else { |
| 278 die("Invalid ErrorCode reported by sandbox system call evaluator"); | 408 die("Invalid ErrorCode reported by sandbox system call evaluator"); |
| 279 } | 409 } |
| 280 break; | 410 break; |
| 281 } | 411 } |
| 282 if (sysnum <= MAX_SYSCALL) { | 412 |
| 283 // We compute the default behavior (e.g. fail open or fail closed) by | 413 // Emit BPF instructions matching this range. |
| 284 // calling the system call evaluator with a system call bigger than | 414 if (iter->to != std::numeric_limits<unsigned>::max()) { |
| 285 // MAX_SYSCALL. | 415 program->push_back((struct sock_filter) |
| 286 // In other words, the very last iteration in our loop becomes the | 416 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0)); |
| 287 // fallback case and we don't need to do any comparisons. | |
| 288 program.push_back((struct sock_filter) | |
| 289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
| 290 } | 417 } |
| 291 program.push_back((struct sock_filter) | 418 program->push_back((struct sock_filter) |
| 292 BPF_STMT(BPF_RET+BPF_K, ret)); | 419 BPF_STMT(BPF_RET+BPF_K, ret)); |
| 293 } | 420 } |
| 294 | |
| 295 // Make sure compilation resulted in BPF program that executes | |
| 296 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 297 // There is really nothing the caller can do until the bug is fixed. | |
| 298 const char *err; | |
| 299 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 300 die(err); | |
| 301 } | |
| 302 | |
| 303 // Install BPF filter program | |
| 304 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
| 307 goto filter_failed; | |
| 308 } | |
| 309 | |
| 310 return; | 421 return; |
| 311 } | 422 } |
| 312 | 423 |
| 313 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 424 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| 314 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | 425 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { |
| 315 // die() can call LOG(FATAL). This is not normally async-signal safe | 426 // die() can call LOG(FATAL). This is not normally async-signal safe |
| 316 // and can lead to bugs. We should eventually implement a different | 427 // and can lead to bugs. We should eventually implement a different |
| 317 // logging and reporting mechanism that is safe to be called from | 428 // logging and reporting mechanism that is safe to be called from |
| 318 // the sigSys() handler. | 429 // the sigSys() handler. |
| 319 die("Unexpected SIGSYS received"); | 430 die("Unexpected SIGSYS received"); |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 340 return; | 451 return; |
| 341 } | 452 } |
| 342 | 453 |
| 343 | 454 |
| 344 bool Sandbox::suppressLogging_ = false; | 455 bool Sandbox::suppressLogging_ = false; |
| 345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 456 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 346 int Sandbox::proc_fd_ = -1; | 457 int Sandbox::proc_fd_ = -1; |
| 347 Sandbox::Evaluators Sandbox::evaluators_; | 458 Sandbox::Evaluators Sandbox::evaluators_; |
| 348 | 459 |
| 349 } // namespace | 460 } // namespace |
| OLD | NEW |