sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

6 #include "sandbox/linux/seccomp-bpf/verifier.h"	6 #include "sandbox/linux/seccomp-bpf/verifier.h"

7	7

8 // The kernel gives us a sandbox, we turn it into a playground :-)	8 // The kernel gives us a sandbox, we turn it into a playground :-)

9 // This is version 2 of the playground; version 1 was built on top of	9 // This is version 2 of the playground; version 1 was built on top of

10 // pre-BPF seccomp mode.	10 // pre-BPF seccomp mode.

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
206 sb.st_nlink != 3 \|\|	206 sb.st_nlink != 3 \|\|

207 HANDLE_EINTR(close(task))) {	207 HANDLE_EINTR(close(task))) {

208 if (task >= 0) {	208 if (task >= 0) {

209 if (HANDLE_EINTR(close(task))) { }	209 if (HANDLE_EINTR(close(task))) { }

210 }	210 }

211 return false;	211 return false;

212 }	212 }

213 return true;	213 return true;

214 }	214 }

215	215

	216 static bool isDenied(Sandbox::ErrorCode code) {

	217 return code == Sandbox::SB_TRAP \|\|

	218 (code >= (Sandbox::ErrorCode)1 &&

	219 code <= (Sandbox::ErrorCode)4095); // errno value

	220 }

	221

	222 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,

	223 EvaluateArguments) {

	224 // Do some sanity checks on the policy. This will warn users if they do

	225 // things that are likely unsafe and unintended.

	226 // We also have similar checks later, when we actually compile the BPF

	227 // program. That catches problems with incorrectly stacked evaluators.

	228 if (!isDenied(syscallEvaluator(-1))) {

	229 die("Negative system calls should always be disallowed by policy");

	230 }

	231 #ifndef NDEBUG

	232 #if defined(__i386__) \|\| defined(__x86_64__)

	233 #if defined(__x86_64__) && defined(__ILP32__)

	234 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u;

	235 sysnum <= (MAX_SYSCALL & ~0x40000000u);

	236 ++sysnum) {

	237 if (!isDenied(syscallEvaluator(sysnum))) {

	238 die("In x32 mode, you should not allow any non-x32 system calls");

	239 }

	240 }

	241 #else

	242 for (unsigned int sysnum = MIN_SYSCALL \| 0x40000000u;

	243 sysnum <= (MAX_SYSCALL \| 0x40000000u);

	244 ++sysnum) {

	245 if (!isDenied(syscallEvaluator(sysnum))) {

	246 die("x32 system calls should be explicitly disallowed");

	247 }

	248 }

	249 #endif

	250 #endif

	251 #endif

	252 // Check interesting boundary values just outside of the valid system call

	253 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1.

	254 // They all should be denied.

	255 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) \|\|

	256 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) \|\|

	257 !isDenied(syscallEvaluator(-1)) \|\|

	258 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) \|\|

	259 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) {

	260 die("Even for default-allow policies, you must never allow system calls "

	261 "outside of the standard system call range");

	262 }

	263 return;

	264 }

	265

216 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,	266 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,

217 EvaluateArguments argumentEvaluator) {	267 EvaluateArguments argumentEvaluator) {

	268 policySanityChecks(syscallEvaluator, argumentEvaluator);

218 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));	269 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));

219 }	270 }

220	271

221 void Sandbox::installFilter() {	272 void Sandbox::installFilter() {

222 // Verify that the user pushed a policy.	273 // Verify that the user pushed a policy.

223 if (evaluators_.empty()) {	274 if (evaluators_.empty()) {

224 filter_failed:	275 filter_failed:

225 die("Failed to configure system call filters");	276 die("Failed to configure system call filters");

226 }	277 }

227	278

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
281 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));	332 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));

282 #else	333 #else

283 program->push_back((struct sock_filter)	334 program->push_back((struct sock_filter)

284 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));	335 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));

285 #endif	336 #endif

286 // TODO: raise a suitable SIGSYS signal	337 // TODO: raise a suitable SIGSYS signal

287 program->push_back((struct sock_filter)	338 program->push_back((struct sock_filter)

288 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));	339 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));

289 #endif	340 #endif

290	341

291 // Evaluate all possible system calls and depending on their	342 // Evaluate all possible system calls and group their ErrorCodes into

292 // exit codes generate a BPF filter.	343 // ranges of identical codes.

293 // This is very inefficient right now. We need to be much smarter	344 Ranges ranges;

294 // eventually.	345 findRanges(&ranges);

295 // We currently incur a O(N) overhead on each system call, with N	346

296 // being the number of system calls. It is easy to get this down to	347 // Compile the system call ranges to an optimized BPF program.

297 // O(log_2(M)) with M being the number of system calls that need special	348 rangesToBPF(program, ranges);

298 // treatment.

299 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

300 for (uint32_t sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) {

301 ErrorCode err = evaluateSyscall(sysnum);

302 int ret;

303 switch (err) {

304 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

305 die("Not implemented");

306 case SB_TRAP:

307 ret = SECCOMP_RET_TRAP;

308 break;

309 case SB_ALLOWED:

310 ret = SECCOMP_RET_ALLOW;

311 break;

312 default:

313 if (err >= static_cast<ErrorCode>(1) &&

314 err <= static_cast<ErrorCode>(4096)) {

315 // We limit errno values to a reasonable range. In fact, the Linux ABI

316 // doesn't support errno values outside of this range.

317 ret = SECCOMP_RET_ERRNO + err;

318 } else {

319 die("Invalid ErrorCode reported by sandbox system call evaluator");

320 }

321 break;

322 }

323 if (sysnum <= MAX_SYSCALL) {

324 // We compute the default behavior (e.g. fail open or fail closed) by

325 // calling the system call evaluator with a system call bigger than

326 // MAX_SYSCALL.

327 // In other words, the very last iteration in our loop becomes the

328 // fallback case and we don't need to do any comparisons.

329 program->push_back((struct sock_filter)

330 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

331 }

332 program->push_back((struct sock_filter)

333 BPF_STMT(BPF_RET+BPF_K, ret));

334 }

335	349

336 // Everything that isn't allowed is forbidden. Eventually, we would	350 // Everything that isn't allowed is forbidden. Eventually, we would

337 // like to have a way to log forbidden calls, when in debug mode.	351 // like to have a way to log forbidden calls, when in debug mode.

338 // TODO: raise a suitable SIGSYS signal

339 program->push_back((struct sock_filter)	352 program->push_back((struct sock_filter)

340 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));	353 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

341	354

342 // Make sure compilation resulted in BPF program that executes	355 // Make sure compilation resulted in BPF program that executes

343 // correctly. Otherwise, there is an internal error in our BPF compiler.	356 // correctly. Otherwise, there is an internal error in our BPF compiler.

344 // There is really nothing the caller can do until the bug is fixed.	357 // There is really nothing the caller can do until the bug is fixed.

345 #ifndef NDEBUG	358 #ifndef NDEBUG

346 const char *err = NULL;	359 const char *err = NULL;

347 if (!Verifier::verifyBPF(*program, evaluators_, &err)) {	360 if (!Verifier::verifyBPF(*program, evaluators_, &err)) {

348 die(err);	361 die(err);

349 }	362 }

350 #endif	363 #endif

(...skipping 19 matching lines...) Expand all Loading...
370 die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs");	383 die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs");

371 } else {	384 } else {

372 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {	385 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

373 die(dryRun_ ? NULL : "Kernel refuses to turn on BPF filters");	386 die(dryRun_ ? NULL : "Kernel refuses to turn on BPF filters");

374 }	387 }

375 }	388 }

376	389

377 return;	390 return;

378 }	391 }

379	392

	393 void Sandbox::findRanges(Ranges *ranges) {

	394 // Please note that "struct seccomp_data" defines system calls as a signed

	395 // int32_t, but BPF instructions always operate on unsigned quantities. We

	396 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

	397 // and then verifying that the rest of the number range (both positive and

	398 // negative) all return the same ErrorCode.

	399 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

	400 uint32_t oldSysnum = 0;

	401 ErrorCode oldErr = evaluateSyscall(oldSysnum);

	402 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL);

	403 sysnum <= MAX_SYSCALL + 1;

	404 ++sysnum) {

	405 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum));

	406 if (err != oldErr) {

	407 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr));

	408 oldSysnum = sysnum;

	409 oldErr = err;

	410 }

	411 }

	412

	413 // As we looped all the way past the valid system calls (i.e. MAX_SYSCALL+1),

	414 // "oldErr" should at this point be the "default" policy for all system call

	415 // numbers that don't have an explicit handler in the system call evaluator.

	416 // But as we are quite paranoid, we perform some more sanity checks to verify

	417 // that there actually is a consistent "default" policy in the first place.

	418 // We don't actually iterate over all possible 2^32 values, though. We just

	419 // perform spot checks at the boundaries.

	420 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF.

	421 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) \|\|

	422 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) \|\|

	423 oldErr != evaluateSyscall(-1)) {

	424 die("Invalid seccomp policy");

	425 }

	426 ranges->push_back(

	427 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr));

	428 }

	429

	430 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {

	431 // TODO: We currently search linearly through all ranges. An improved

	432 // algorithm should be doing a binary search.

	433

	434 // System call ranges must cover the entire number range.

	435 if (ranges.empty() \|\|

	436 ranges.begin()->from != 0 \|\|

	437 ranges.back().to != std::numeric_limits<unsigned>::max()) {

	438 rangeError:

	439 die("Invalid set of system call ranges");

	440 }

	441 uint32_t from = 0;

	442 for (Ranges::const_iterator iter = ranges.begin();

	443 iter != ranges.end();

	444 ++iter) {

	445 // Ranges must be contiguous and monotonically increasing.

	446 if (iter->from > iter->to \|\|

	447 iter->from != from) {

	448 goto rangeError;

	449 }

	450 from = iter->to + 1;

	451

	452 // Convert ErrorCodes to return values that are acceptable for

	453 // BPF filters.

	454 int ret;

	455 switch (iter->err) {

	456 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

	457 die("Not implemented");

	458 case SB_TRAP:

	459 ret = SECCOMP_RET_TRAP;

	460 break;

	461 case SB_ALLOWED:

	462 ret = SECCOMP_RET_ALLOW;

	463 break;

	464 default:

	465 if (iter->err >= static_cast<ErrorCode>(1) &&

	466 iter->err <= static_cast<ErrorCode>(4096)) {

	467 // We limit errno values to a reasonable range. In fact, the Linux ABI

	468 // doesn't support errno values outside of this range.

	469 ret = SECCOMP_RET_ERRNO + iter->err;

	470 } else {

	471 die("Invalid ErrorCode reported by sandbox system call evaluator");

	472 }

	473 break;

	474 }

	475

	476 // Emit BPF instructions matching this range.

	477 if (iter->to != std::numeric_limits<unsigned>::max()) {

	478 program->push_back((struct sock_filter)

	479 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));

	480 }

	481 program->push_back((struct sock_filter)

	482 BPF_STMT(BPF_RET+BPF_K, ret));

	483 }

	484 return;

	485 }

	486

380 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	487 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

381 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {	488 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {

382 // die() can call LOG(FATAL). This is not normally async-signal safe	489 // die() can call LOG(FATAL). This is not normally async-signal safe

383 // and can lead to bugs. We should eventually implement a different	490 // and can lead to bugs. We should eventually implement a different

384 // logging and reporting mechanism that is safe to be called from	491 // logging and reporting mechanism that is safe to be called from

385 // the sigSys() handler.	492 // the sigSys() handler.

386 die("Unexpected SIGSYS received");	493 die("Unexpected SIGSYS received");

387 }	494 }

388 ucontext_t ctx = reinterpret_cast<ucontext_t >(void_context);	495 ucontext_t ctx = reinterpret_cast<ucontext_t >(void_context);

389 int old_errno = errno;	496 int old_errno = errno;

(...skipping 17 matching lines...) Expand all Loading...
407 return;	514 return;

408 }	515 }

409	516

410	517

411 bool Sandbox::dryRun_ = false;	518 bool Sandbox::dryRun_ = false;

412 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	519 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

413 int Sandbox::proc_fd_ = -1;	520 int Sandbox::proc_fd_ = -1;

414 Sandbox::Evaluators Sandbox::evaluators_;	521 Sandbox::Evaluators Sandbox::evaluators_;

415	522

416 } // namespace	523 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/verifier.cc » ('j') | no next file with comments »