sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Does this result in easier-to-read diffs? Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

6 #include "sandbox/linux/seccomp-bpf/verifier.h"	6 #include "sandbox/linux/seccomp-bpf/verifier.h"

7	7

8 // The kernel gives us a sandbox, we turn it into a playground :-)	8 // The kernel gives us a sandbox, we turn it into a playground :-)

9 // This is version 2 of the playground; version 1 was built on top of	9 // This is version 2 of the playground; version 1 was built on top of

10 // pre-BPF seccomp mode.	10 // pre-BPF seccomp mode.

(...skipping 196 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
207 }	207 }

208	208

209 // We can't handle stacked evaluators, yet. We'll get there eventually	209 // We can't handle stacked evaluators, yet. We'll get there eventually

210 // though. Hang tight.	210 // though. Hang tight.

211 if (evaluators_.size() != 1) {	211 if (evaluators_.size() != 1) {

212 die("Not implemented");	212 die("Not implemented");

213 }	213 }

214	214

215 // If the architecture doesn't match SECCOMP_ARCH, disallow the	215 // If the architecture doesn't match SECCOMP_ARCH, disallow the

216 // system call.	216 // system call.

217 std::vector<struct sock_filter> program;	217 Program program;

218 program.push_back((struct sock_filter)	218 program.push_back((struct sock_filter)

219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));	219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));

220 program.push_back((struct sock_filter)	220 program.push_back((struct sock_filter)

221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));	221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));

222	222

223 // TODO: Instead of killing outright, we should raise a SIGSYS and	223 // TODO: Instead of killing outright, we should raise a SIGSYS and

224 // report a useful error message. SIGKILL cannot be trapped by the	224 // report a useful error message. SIGKILL cannot be trapped by the

225 // debugger and essentially makes the program fail in a way that is	225 // debugger and essentially makes the program fail in a way that is

226 // almost impossible to debug.	226 // almost impossible to debug.

227 program.push_back((struct sock_filter)	227 program.push_back((struct sock_filter)

(...skipping 12 matching lines...) Expand all Loading...
240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));	240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));

241 #else	241 #else

242 program.push_back((struct sock_filter)	242 program.push_back((struct sock_filter)

243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));	243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));

244 #endif	244 #endif

245 // TODO: raise a suitable SIGSYS signal	245 // TODO: raise a suitable SIGSYS signal

246 program.push_back((struct sock_filter)	246 program.push_back((struct sock_filter)

247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));	247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));

248 #endif	248 #endif

249	249

250 // Evaluate all possible system calls and depending on their	250 // Evaluate all possible system calls and group their ErrorCodes into

251 // exit codes generate a BPF filter.	251 // ranges of identical codes.

252 // This is very inefficient right now. We need to be much smarter	252 Ranges ranges;

253 // eventually.	253 findRanges(&ranges);

254 // We currently incur a O(N) overhead on each system call, with N	254

255 // being the number of system calls. It is easy to get this down to	255 // Compile the system call ranges to an optimized BPF program

256 // O(log_2(M)) with M being the number of system calls that need special	256 rangesToBPF(&program, ranges);

257 // treatment.	257

	258 // Everything that isn't allowed is forbidden. Eventually, we would

	259 // like to have a way to log forbidden calls, when in debug mode.

	260 program.push_back((struct sock_filter)

	261 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

	262

	263 // Make sure compilation resulted in BPF program that executes

	264 // correctly. Otherwise, there is an internal error in our BPF compiler.

	265 // There is really nothing the caller can do until the bug is fixed.

	266 const char *err;

	267 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

	268 die(err);

	269 }

	270

	271 // Install BPF filter program

	272 const struct sock_fprog prog = { program.size(), &program[0] };

	273 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

	274 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

	275 goto filter_failed;

	276 }

	277

	278 return;

	279 }

	280

	281 void Sandbox::findRanges(Ranges *ranges) {

	282 // Please note that "struct seccomp_data" defines system calls as a signed

	283 // int32_t, but BPF instructions always operate on unsigned quantities. We

	284 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

	285 // and then verifying that the rest of the number range (both positive and

	286 // negative) all return the same ErrorCode.

	287 // We don't actually iterate over all possible 2^32 values, though. We just

	288 // perform spot checks at the boundaries.

258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	289 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) {	290 uint32_t oldSysnum = 0;

260 ErrorCode err = evaluateSyscall(sysnum);	291 ErrorCode oldErr = evaluateSyscall(oldSysnum);

	292 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL);

	293 sysnum <= MAX_SYSCALL;

	294 ++sysnum) {

	295 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum));

	296 if (err != oldErr) {

	297 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr));

	298 oldSysnum = sysnum;

	299 oldErr = err;

	300 }

	301 }

	302 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) \|\|
	jln (very slow on Chromium) 2012/06/08 22:38:21 This makes it hard to review. I would either: - H This makes it hard to review. I would either: - Have a comment somewhere, that we don't support negative system call numbers, with the proper assert. - Add asserts in case the system call number is negative or after MAX_SYSCALL explaining this is untested. I would prefer the first solution much more. Simplicity! :) Also it's extremely unlikely that the caller will want to do that, ever, and I would rather err on the side of catching mistakes. I can already see bugs pop-up where callers expected "unknown" syscalls to be denied by default. Markus (顧孟勤) 2012/06/09 00:30:58 This is just a glorified assert() statement. Unfor This is just a glorified assert() statement. Unfortunately, it's a little verbose to actually write what we want to write. I added another set of checks to the place where the user first hands us the policy evaluator. And I explicitly deny negative system calls. Hopefully, this addresses your concern. But if you need more, or if you think there is something else that makes this code easier for you to review, please speak up.
	303 oldErr != evaluateSyscall(std::numeric_limits<int>::max() + 1) \|\|

	304 oldErr != evaluateSyscall(std::numeric_limits<unsigned>::max())) {

	305 die("Invalid seccomp policy");

	306 }

	307 ranges->push_back(

	308 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr));

	309 }

	310

	311 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {

	312 // TODO: We currently search linearly through all ranges. An improved

	313 // algorithm should be doing a binary search.

	314

	315 // System call ranges must cover the entire number range.

	316 if (ranges.empty() \|\|

	317 ranges.begin()->from != 0 \|\|

	318 ranges.back().to != std::numeric_limits<unsigned>::max()) {

	319 rangeError:

	320 die("Invalid set of system call ranges");

	321 }

	322 uint32_t last = static_cast<uint32_t>(-1);
	jln (very slow on Chromium) 2012/06/08 22:38:21 I know this is correct and allowed by standards, b I know this is correct and allowed by standards, but it's more readable with std::numeric_limits<uint32_t>::max (), no ? If you think it makes the code below more readable, add a comment: "guaranteed to be UINT32_MAX" Markus (顧孟勤) 2012/06/09 00:30:58 I slightly changed the logic, and can thus avoid t Show quoted text On 2012/06/08 22:38:21, Julien Tinnes wrote: > I know this is correct and allowed by standards, but it's more readable with > std::numeric_limits<uint32_t>::max (), no ? > > If you think it makes the code below more readable, add a comment: "guaranteed > to be UINT32_MAX" > I slightly changed the logic, and can thus avoid this issue.
	323 for (Ranges::const_iterator iter = ranges.begin();

	324 iter != ranges.end();

	325 ++iter) {

	326 // Ranges most be contiguous and monotonically increasing.
	jln (very slow on Chromium) 2012/06/08 22:38:21 s/most/must s/most/must Markus (顧孟勤) 2012/06/09 00:30:58 Done. Show quoted text On 2012/06/08 22:38:21, Julien Tinnes wrote: > s/most/must Done.
	327 if (iter->from > iter->to \|\|

	328 iter->from != last+1) {
	jln (very slow on Chromium) 2012/06/08 22:38:21 nit: last + 1 (spaces) also add a comment explain nit: last + 1 (spaces) also add a comment explaining that last + 1 will be 0 on the first iteration. I don't like having to rely on (defined but poorly known) behavior such as unsigned overflow. Agreed it allows us to not unroll the loop so that's good.
	329 goto rangeError;

	330 }

	331 last = iter->to;

	332

	333 // Convert ErrorCodes to return values that are acceptable for

	334 // BPF filters.

261 int ret;	335 int ret;

262 switch (err) {	336 switch (iter->err) {

263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:	337 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

264 die("Not implemented");	338 die("Not implemented");

265 case SB_TRAP:	339 case SB_TRAP:

266 ret = SECCOMP_RET_TRAP;	340 ret = SECCOMP_RET_TRAP;

267 break;	341 break;

268 case SB_ALLOWED:	342 case SB_ALLOWED:

269 ret = SECCOMP_RET_ALLOW;	343 ret = SECCOMP_RET_ALLOW;

270 break;	344 break;

271 default:	345 default:

272 if (err >= static_cast<ErrorCode>(1) &&	346 if (iter->err >= static_cast<ErrorCode>(1) &&

273 err <= static_cast<ErrorCode>(4096)) {	347 iter->err <= static_cast<ErrorCode>(4096)) {

274 // We limit errno values to a reasonable range. In fact, the Linux ABI	348 // We limit errno values to a reasonable range. In fact, the Linux ABI

275 // doesn't support errno values outside of this range.	349 // doesn't support errno values outside of this range.

276 ret = SECCOMP_RET_ERRNO + err;	350 ret = SECCOMP_RET_ERRNO + iter->err;

277 } else {	351 } else {

278 die("Invalid ErrorCode reported by sandbox system call evaluator");	352 die("Invalid ErrorCode reported by sandbox system call evaluator");

279 }	353 }

280 break;	354 break;

281 }	355 }

282 if (sysnum <= MAX_SYSCALL) {	356

283 // We compute the default behavior (e.g. fail open or fail closed) by	357 // Emit BPF instructions matching this range.

284 // calling the system call evaluator with a system call bigger than	358 if (iter->to != std::numeric_limits<unsigned>::max()) {

285 // MAX_SYSCALL.	359 program->push_back((struct sock_filter)

286 // In other words, the very last iteration in our loop becomes the	360 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));

287 // fallback case and we don't need to do any comparisons.

288 program.push_back((struct sock_filter)

289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

290 }	361 }

291 program.push_back((struct sock_filter)	362 program->push_back((struct sock_filter)

292 BPF_STMT(BPF_RET+BPF_K, ret));	363 BPF_STMT(BPF_RET+BPF_K, ret));

293 }	364 }

294

295 // Make sure compilation resulted in BPF program that executes

296 // correctly. Otherwise, there is an internal error in our BPF compiler.

297 // There is really nothing the caller can do until the bug is fixed.

298 const char *err;

299 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

300 die(err);

301 }

302

303 // Install BPF filter program

304 const struct sock_fprog prog = { program.size(), &program[0] };

305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

307 goto filter_failed;

308 }

309

310 return;	365 return;

311 }	366 }

312	367

313 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	368 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

314 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {	369 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {

315 // die() can call LOG(FATAL). This is not normally async-signal safe	370 // die() can call LOG(FATAL). This is not normally async-signal safe

316 // and can lead to bugs. We should eventually implement a different	371 // and can lead to bugs. We should eventually implement a different

317 // logging and reporting mechanism that is safe to be called from	372 // logging and reporting mechanism that is safe to be called from

318 // the sigSys() handler.	373 // the sigSys() handler.

319 die("Unexpected SIGSYS received");	374 die("Unexpected SIGSYS received");

(...skipping 20 matching lines...) Expand all Loading...
340 return;	395 return;

341 }	396 }

342	397

343	398

344 bool Sandbox::suppressLogging_ = false;	399 bool Sandbox::suppressLogging_ = false;

345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	400 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

346 int Sandbox::proc_fd_ = -1;	401 int Sandbox::proc_fd_ = -1;

347 Sandbox::Evaluators Sandbox::evaluators_;	402 Sandbox::Evaluators Sandbox::evaluators_;

348	403

349 } // namespace	404 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/verifier.cc » ('j') | no next file with comments »