Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" | 5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" |
| 6 #include "sandbox/linux/seccomp-bpf/verifier.h" | 6 #include "sandbox/linux/seccomp-bpf/verifier.h" |
| 7 | 7 |
| 8 // The kernel gives us a sandbox, we turn it into a playground :-) | 8 // The kernel gives us a sandbox, we turn it into a playground :-) |
| 9 // This is version 2 of the playground; version 1 was built on top of | 9 // This is version 2 of the playground; version 1 was built on top of |
| 10 // pre-BPF seccomp mode. | 10 // pre-BPF seccomp mode. |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 170 sb.st_nlink != 3 || | 170 sb.st_nlink != 3 || |
| 171 HANDLE_EINTR(close(task))) { | 171 HANDLE_EINTR(close(task))) { |
| 172 if (task >= 0) { | 172 if (task >= 0) { |
| 173 (void) HANDLE_EINTR(close(task)); | 173 (void) HANDLE_EINTR(close(task)); |
| 174 } | 174 } |
| 175 return false; | 175 return false; |
| 176 } | 176 } |
| 177 return true; | 177 return true; |
| 178 } | 178 } |
| 179 | 179 |
| 180 static bool isDenied(Sandbox::ErrorCode code) { | |
| 181 return code == Sandbox::SB_TRAP || | |
| 182 (code >= (Sandbox::ErrorCode)1 && | |
| 183 code <= (Sandbox::ErrorCode)4095); // errno value | |
| 184 } | |
| 185 | |
| 180 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, | 186 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, |
|
jln (very slow on Chromium)
2012/06/09 01:06:13
Let's return a bool (and error string) here instea
| |
| 181 EvaluateArguments argumentEvaluator) { | 187 EvaluateArguments argumentEvaluator) { |
| 188 // Do some sanity checks on the policy. This will warn users if they do | |
| 189 // things that are likely unsafe and unintended. | |
| 190 // We also have similar checks later, when we actually compile the BPF | |
| 191 // program. That catches problems with incorrectly stacked evaluators. | |
| 192 if (!isDenied(syscallEvaluator(-1))) { | |
| 193 die("Negative system calls should always be disallowed by policy"); | |
| 194 } | |
| 195 #if defined(__i386__) || defined(__x86_64__) | |
| 196 #if defined(__x86_64__) && defined(__ILP32__) | |
| 197 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u; | |
| 198 sysnum <= (MAX_SYSCALL & ~0x40000000u); | |
| 199 ++sysnum) { | |
| 200 if (!isDenied(syscallEvaluator(sysnum))) { | |
| 201 die("In x32 mode, you should not allow any non-x32 system calls"); | |
| 202 } | |
| 203 } | |
| 204 #else | |
| 205 for (unsigned int sysnum = MIN_SYSCALL | 0x40000000u; | |
| 206 sysnum <= (MAX_SYSCALL | 0x40000000u); | |
| 207 ++sysnum) { | |
| 208 if (!isDenied(syscallEvaluator(sysnum))) { | |
| 209 die("x32 system calls should be explicitly disallowed"); | |
| 210 } | |
| 211 } | |
| 212 #endif | |
| 213 #endif | |
| 214 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::min())) || | |
| 215 !isDenied(syscallEvaluator(std::numeric_limits<int>::max())) || | |
| 216 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) || | |
| 217 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1)) || | |
| 218 !isDenied(syscallEvaluator((unsigned)std::numeric_limits<int>::max()+1))|| | |
| 219 !isDenied(syscallEvaluator(std::numeric_limits<unsigned>::max()))) { | |
| 220 die("Even for default-allow policies, you must never allow system calls " | |
| 221 "outside of the standard system call range"); | |
| 222 } | |
| 223 | |
| 182 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); | 224 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator)); |
| 183 } | 225 } |
| 184 | 226 |
| 185 void Sandbox::installFilter() { | 227 void Sandbox::installFilter() { |
| 186 // Verify that the user pushed a policy. | 228 // Verify that the user pushed a policy. |
| 187 if (evaluators_.empty()) { | 229 if (evaluators_.empty()) { |
| 188 filter_failed: | 230 filter_failed: |
| 189 die("Failed to configure system call filters"); | 231 die("Failed to configure system call filters"); |
| 190 } | 232 } |
| 191 | 233 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 207 } | 249 } |
| 208 | 250 |
| 209 // We can't handle stacked evaluators, yet. We'll get there eventually | 251 // We can't handle stacked evaluators, yet. We'll get there eventually |
| 210 // though. Hang tight. | 252 // though. Hang tight. |
| 211 if (evaluators_.size() != 1) { | 253 if (evaluators_.size() != 1) { |
| 212 die("Not implemented"); | 254 die("Not implemented"); |
| 213 } | 255 } |
| 214 | 256 |
| 215 // If the architecture doesn't match SECCOMP_ARCH, disallow the | 257 // If the architecture doesn't match SECCOMP_ARCH, disallow the |
| 216 // system call. | 258 // system call. |
| 217 std::vector<struct sock_filter> program; | 259 Program program; |
| 218 program.push_back((struct sock_filter) | 260 program.push_back((struct sock_filter) |
| 219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); | 261 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch))); |
| 220 program.push_back((struct sock_filter) | 262 program.push_back((struct sock_filter) |
| 221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); | 263 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0)); |
| 222 | 264 |
| 223 // TODO: Instead of killing outright, we should raise a SIGSYS and | 265 // TODO: Instead of killing outright, we should raise a SIGSYS and |
| 224 // report a useful error message. SIGKILL cannot be trapped by the | 266 // report a useful error message. SIGKILL cannot be trapped by the |
| 225 // debugger and essentially makes the program fail in a way that is | 267 // debugger and essentially makes the program fail in a way that is |
| 226 // almost impossible to debug. | 268 // almost impossible to debug. |
| 227 program.push_back((struct sock_filter) | 269 program.push_back((struct sock_filter) |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); | 282 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0)); |
| 241 #else | 283 #else |
| 242 program.push_back((struct sock_filter) | 284 program.push_back((struct sock_filter) |
| 243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); | 285 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1)); |
| 244 #endif | 286 #endif |
| 245 // TODO: raise a suitable SIGSYS signal | 287 // TODO: raise a suitable SIGSYS signal |
| 246 program.push_back((struct sock_filter) | 288 program.push_back((struct sock_filter) |
| 247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); | 289 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)); |
| 248 #endif | 290 #endif |
| 249 | 291 |
| 250 // Evaluate all possible system calls and depending on their | 292 // Evaluate all possible system calls and group their ErrorCodes into |
| 251 // exit codes generate a BPF filter. | 293 // ranges of identical codes. |
| 252 // This is very inefficient right now. We need to be much smarter | 294 Ranges ranges; |
| 253 // eventually. | 295 findRanges(&ranges); |
| 254 // We currently incur a O(N) overhead on each system call, with N | 296 |
| 255 // being the number of system calls. It is easy to get this down to | 297 // Compile the system call ranges to an optimized BPF program |
| 256 // O(log_2(M)) with M being the number of system calls that need special | 298 rangesToBPF(&program, ranges); |
| 257 // treatment. | 299 |
| 300 // Everything that isn't allowed is forbidden. Eventually, we would | |
| 301 // like to have a way to log forbidden calls, when in debug mode. | |
| 302 program.push_back((struct sock_filter) | |
| 303 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO)); | |
| 304 | |
| 305 // Make sure compilation resulted in BPF program that executes | |
| 306 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 307 // There is really nothing the caller can do until the bug is fixed. | |
| 308 const char *err; | |
| 309 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 310 die(err); | |
| 311 } | |
| 312 | |
| 313 // Install BPF filter program | |
| 314 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 315 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 316 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
| 317 goto filter_failed; | |
| 318 } | |
| 319 | |
| 320 return; | |
| 321 } | |
| 322 | |
| 323 void Sandbox::findRanges(Ranges *ranges) { | |
| 324 // Please note that "struct seccomp_data" defines system calls as a signed | |
| 325 // int32_t, but BPF instructions always operate on unsigned quantities. We | |
| 326 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, | |
| 327 // and then verifying that the rest of the number range (both positive and | |
| 328 // negative) all return the same ErrorCode. | |
| 329 // We don't actually iterate over all possible 2^32 values, though. We just | |
| 330 // perform spot checks at the boundaries. | |
| 258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; | 331 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; |
| 259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) { | 332 uint32_t oldSysnum = 0; |
| 260 ErrorCode err = evaluateSyscall(sysnum); | 333 ErrorCode oldErr = evaluateSyscall(oldSysnum); |
| 334 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL); | |
| 335 sysnum <= MAX_SYSCALL; | |
| 336 ++sysnum) { | |
| 337 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum)); | |
| 338 if (err != oldErr) { | |
| 339 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr)); | |
| 340 oldSysnum = sysnum; | |
| 341 oldErr = err; | |
| 342 } | |
| 343 } | |
| 344 if (oldErr != evaluateSyscall(std::numeric_limits<int>::min()) || | |
| 345 oldErr != evaluateSyscall(std::numeric_limits<int>::max()) || | |
| 346 oldErr != evaluateSyscall(-1) || | |
| 347 oldErr != evaluateSyscall((unsigned)std::numeric_limits<int>::max()+1) || | |
|
jln (very slow on Chromium)
2012/06/09 01:06:13
nit: max + 1 (spaces)
Please add comments, explain
| |
| 348 oldErr != evaluateSyscall(std::numeric_limits<unsigned>::max())) { | |
| 349 die("Invalid seccomp policy"); | |
| 350 } | |
| 351 ranges->push_back( | |
| 352 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr)); | |
| 353 } | |
| 354 | |
| 355 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) { | |
| 356 // TODO: We currently search linearly through all ranges. An improved | |
| 357 // algorithm should be doing a binary search. | |
| 358 | |
| 359 // System call ranges must cover the entire number range. | |
| 360 if (ranges.empty() || | |
| 361 ranges.begin()->from != 0 || | |
| 362 ranges.back().to != std::numeric_limits<unsigned>::max()) { | |
| 363 rangeError: | |
| 364 die("Invalid set of system call ranges"); | |
| 365 } | |
| 366 uint32_t from = 0; | |
| 367 for (Ranges::const_iterator iter = ranges.begin(); | |
| 368 iter != ranges.end(); | |
| 369 ++iter) { | |
| 370 // Ranges must be contiguous and monotonically increasing. | |
| 371 if (iter->from > iter->to || | |
| 372 iter->from != from) { | |
| 373 goto rangeError; | |
| 374 } | |
| 375 from = iter->to+1; | |
|
jln (very slow on Chromium)
2012/06/09 01:06:13
nit: to + 1 (spaces)
| |
| 376 | |
| 377 // Convert ErrorCodes to return values that are acceptable for | |
| 378 // BPF filters. | |
| 261 int ret; | 379 int ret; |
| 262 switch (err) { | 380 switch (iter->err) { |
| 263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: | 381 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6: |
| 264 die("Not implemented"); | 382 die("Not implemented"); |
| 265 case SB_TRAP: | 383 case SB_TRAP: |
| 266 ret = SECCOMP_RET_TRAP; | 384 ret = SECCOMP_RET_TRAP; |
| 267 break; | 385 break; |
| 268 case SB_ALLOWED: | 386 case SB_ALLOWED: |
| 269 ret = SECCOMP_RET_ALLOW; | 387 ret = SECCOMP_RET_ALLOW; |
| 270 break; | 388 break; |
| 271 default: | 389 default: |
| 272 if (err >= static_cast<ErrorCode>(1) && | 390 if (iter->err >= static_cast<ErrorCode>(1) && |
| 273 err <= static_cast<ErrorCode>(4096)) { | 391 iter->err <= static_cast<ErrorCode>(4096)) { |
| 274 // We limit errno values to a reasonable range. In fact, the Linux ABI | 392 // We limit errno values to a reasonable range. In fact, the Linux ABI |
| 275 // doesn't support errno values outside of this range. | 393 // doesn't support errno values outside of this range. |
| 276 ret = SECCOMP_RET_ERRNO + err; | 394 ret = SECCOMP_RET_ERRNO + iter->err; |
| 277 } else { | 395 } else { |
| 278 die("Invalid ErrorCode reported by sandbox system call evaluator"); | 396 die("Invalid ErrorCode reported by sandbox system call evaluator"); |
| 279 } | 397 } |
| 280 break; | 398 break; |
| 281 } | 399 } |
| 282 if (sysnum <= MAX_SYSCALL) { | 400 |
| 283 // We compute the default behavior (e.g. fail open or fail closed) by | 401 // Emit BPF instructions matching this range. |
| 284 // calling the system call evaluator with a system call bigger than | 402 if (iter->to != std::numeric_limits<unsigned>::max()) { |
| 285 // MAX_SYSCALL. | 403 program->push_back((struct sock_filter) |
| 286 // In other words, the very last iteration in our loop becomes the | 404 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0)); |
| 287 // fallback case and we don't need to do any comparisons. | |
| 288 program.push_back((struct sock_filter) | |
| 289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1)); | |
| 290 } | 405 } |
| 291 program.push_back((struct sock_filter) | 406 program->push_back((struct sock_filter) |
| 292 BPF_STMT(BPF_RET+BPF_K, ret)); | 407 BPF_STMT(BPF_RET+BPF_K, ret)); |
| 293 } | 408 } |
| 294 | |
| 295 // Make sure compilation resulted in BPF program that executes | |
| 296 // correctly. Otherwise, there is an internal error in our BPF compiler. | |
| 297 // There is really nothing the caller can do until the bug is fixed. | |
| 298 const char *err; | |
| 299 if (!Verifier::verifyBPF(program, evaluators_, &err)) { | |
| 300 die(err); | |
| 301 } | |
| 302 | |
| 303 // Install BPF filter program | |
| 304 const struct sock_fprog prog = { program.size(), &program[0] }; | |
| 305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || | |
| 306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { | |
| 307 goto filter_failed; | |
| 308 } | |
| 309 | |
| 310 return; | 409 return; |
| 311 } | 410 } |
| 312 | 411 |
| 313 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { | 412 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { |
| 314 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { | 413 if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) { |
| 315 // die() can call LOG(FATAL). This is not normally async-signal safe | 414 // die() can call LOG(FATAL). This is not normally async-signal safe |
| 316 // and can lead to bugs. We should eventually implement a different | 415 // and can lead to bugs. We should eventually implement a different |
| 317 // logging and reporting mechanism that is safe to be called from | 416 // logging and reporting mechanism that is safe to be called from |
| 318 // the sigSys() handler. | 417 // the sigSys() handler. |
| 319 die("Unexpected SIGSYS received"); | 418 die("Unexpected SIGSYS received"); |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 340 return; | 439 return; |
| 341 } | 440 } |
| 342 | 441 |
| 343 | 442 |
| 344 bool Sandbox::suppressLogging_ = false; | 443 bool Sandbox::suppressLogging_ = false; |
| 345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; | 444 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; |
| 346 int Sandbox::proc_fd_ = -1; | 445 int Sandbox::proc_fd_ = -1; |
| 347 Sandbox::Evaluators Sandbox::evaluators_; | 446 Sandbox::Evaluators Sandbox::evaluators_; |
| 348 | 447 |
| 349 } // namespace | 448 } // namespace |
| OLD | NEW |