sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Moved checking of policies into a separate method Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

6 #include "sandbox/linux/seccomp-bpf/verifier.h"	6 #include "sandbox/linux/seccomp-bpf/verifier.h"

7	7

8 // The kernel gives us a sandbox, we turn it into a playground :-)	8 // The kernel gives us a sandbox, we turn it into a playground :-)

9 // This is version 2 of the playground; version 1 was built on top of	9 // This is version 2 of the playground; version 1 was built on top of

10 // pre-BPF seccomp mode.	10 // pre-BPF seccomp mode.

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
170 sb.st_nlink != 3 \|\|	170 sb.st_nlink != 3 \|\|

171 HANDLE_EINTR(close(task))) {	171 HANDLE_EINTR(close(task))) {

172 if (task >= 0) {	172 if (task >= 0) {

173 (void) HANDLE_EINTR(close(task));	173 (void) HANDLE_EINTR(close(task));

174 }	174 }

175 return false;	175 return false;

176 }	176 }

177 return true;	177 return true;

178 }	178 }

179	179

	180 static bool isDenied(Sandbox::ErrorCode code) {

	181 return code == Sandbox::SB_TRAP \|\|

	182 (code >= (Sandbox::ErrorCode)1 &&

	183 code <= (Sandbox::ErrorCode)4095); // errno value

	184 }

	185

	186 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,

	187 EvaluateArguments argumentEvaluator) {
	Chris Evans 2012/06/12 18:11:55 I think this should be debug only. That will be mo I think this should be debug only. That will be more than sufficient to catch problems, without performing thousands of wasted iterations in production. Markus (顧孟勤) 2012/06/12 19:02:59 Done. I left in the cheap tests, but disabled the Show quoted text On 2012/06/12 18:11:55, Chris Evans wrote: > I think this should be debug only. That will be more than sufficient to catch > problems, without performing thousands of wasted iterations in production. Done. I left in the cheap tests, but disabled the expensive tests for production builds.
	188 // Do some sanity checks on the policy. This will warn users if they do

	189 // things that are likely unsafe and unintended.

	190 // We also have similar checks later, when we actually compile the BPF

	191 // program. That catches problems with incorrectly stacked evaluators.

	192 if (!isDenied(syscallEvaluator(-1))) {

	193 die("Negative system calls should always be disallowed by policy");

	194 }

	195 #if defined(__i386__) \|\| defined(__x86_64__)

	196 #if defined(__x86_64__) && defined(__ILP32__)

	197 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u;

	198 sysnum <= (MAX_SYSCALL & ~0x40000000u);

	199 ++sysnum) {

	200 if (!isDenied(syscallEvaluator(sysnum))) {

	201 die("In x32 mode, you should not allow any non-x32 system calls");

	202 }

	203 }

	204 #else

	205 for (unsigned int sysnum = MIN_SYSCALL \| 0x40000000u;

	206 sysnum <= (MAX_SYSCALL \| 0x40000000u);

	207 ++sysnum) {

	208 if (!isDenied(syscallEvaluator(sysnum))) {

	209 die("x32 system calls should be explicitly disallowed");

	210 }

	211 }

	212 #endif

	213 #endif

	214 // Check interesting boundary values just outside of the valid system call

	215 // range: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF, MIN_SYSCALL-1, MAX_SYSCALL+1.

	216 // They all should be denied.

	217 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::max())) \|\|

	218 !isDenied(syscallEvaluator(std::numeric_limits<int>::min())) \|\|

	219 !isDenied(syscallEvaluator(-1)) \|\|

	220 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) \|\|

	221 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1))) {

	222 die("Even for default-allow policies, you must never allow system calls "

	223 "outside of the standard system call range");

	224 }

	225 return;

	226 }

	227

180 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,	228 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,

181 EvaluateArguments argumentEvaluator) {	229 EvaluateArguments argumentEvaluator) {

	230 policySanityChecks(syscallEvaluator, argumentEvaluator);

182 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));	231 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));

183 }	232 }

184	233

185 void Sandbox::installFilter() {	234 void Sandbox::installFilter() {

186 // Verify that the user pushed a policy.	235 // Verify that the user pushed a policy.

187 if (evaluators_.empty()) {	236 if (evaluators_.empty()) {

188 filter_failed:	237 filter_failed:

189 die("Failed to configure system call filters");	238 die("Failed to configure system call filters");

190 }	239 }

191	240

(...skipping 15 matching lines...) Expand all Loading...
207 }	256 }

208	257

209 // We can't handle stacked evaluators, yet. We'll get there eventually	258 // We can't handle stacked evaluators, yet. We'll get there eventually

210 // though. Hang tight.	259 // though. Hang tight.

211 if (evaluators_.size() != 1) {	260 if (evaluators_.size() != 1) {

212 die("Not implemented");	261 die("Not implemented");

213 }	262 }

214	263

215 // If the architecture doesn't match SECCOMP_ARCH, disallow the	264 // If the architecture doesn't match SECCOMP_ARCH, disallow the

216 // system call.	265 // system call.

217 std::vector<struct sock_filter> program;	266 Program program;

218 program.push_back((struct sock_filter)	267 program.push_back((struct sock_filter)

219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));	268 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));

220 program.push_back((struct sock_filter)	269 program.push_back((struct sock_filter)

221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));	270 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));

222	271

223 // TODO: Instead of killing outright, we should raise a SIGSYS and	272 // TODO: Instead of killing outright, we should raise a SIGSYS and

224 // report a useful error message. SIGKILL cannot be trapped by the	273 // report a useful error message. SIGKILL cannot be trapped by the

225 // debugger and essentially makes the program fail in a way that is	274 // debugger and essentially makes the program fail in a way that is

226 // almost impossible to debug.	275 // almost impossible to debug.

227 program.push_back((struct sock_filter)	276 program.push_back((struct sock_filter)

(...skipping 12 matching lines...) Expand all Loading...
240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));	289 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));

241 #else	290 #else

242 program.push_back((struct sock_filter)	291 program.push_back((struct sock_filter)

243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));	292 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));

244 #endif	293 #endif

245 // TODO: raise a suitable SIGSYS signal	294 // TODO: raise a suitable SIGSYS signal

246 program.push_back((struct sock_filter)	295 program.push_back((struct sock_filter)

247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));	296 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));

248 #endif	297 #endif

249	298

250 // Evaluate all possible system calls and depending on their	299 // Evaluate all possible system calls and group their ErrorCodes into

251 // exit codes generate a BPF filter.	300 // ranges of identical codes.

252 // This is very inefficient right now. We need to be much smarter	301 Ranges ranges;

253 // eventually.	302 findRanges(&ranges);

254 // We currently incur a O(N) overhead on each system call, with N	303

255 // being the number of system calls. It is easy to get this down to	304 // Compile the system call ranges to an optimized BPF program

256 // O(log_2(M)) with M being the number of system calls that need special	305 rangesToBPF(&program, ranges);

257 // treatment.	306

	307 // Everything that isn't allowed is forbidden. Eventually, we would

	308 // like to have a way to log forbidden calls, when in debug mode.

	309 program.push_back((struct sock_filter)

	310 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

	311

	312 // Make sure compilation resulted in BPF program that executes

	313 // correctly. Otherwise, there is an internal error in our BPF compiler.

	314 // There is really nothing the caller can do until the bug is fixed.
	Chris Evans 2012/06/12 18:13:47 Actually one more thing. I'm not entire groking th Actually one more thing. I'm not entire groking the order of CLs here but it looks possible that the #ifndef NDEBUG got lost in the move here? Markus (顧孟勤) 2012/06/12 19:02:59 I'll upload a newly rebased version shortly, and i Show quoted text On 2012/06/12 18:13:47, Chris Evans wrote: > Actually one more thing. I'm not entire groking the order of CLs here but it > looks possible that the #ifndef NDEBUG got lost in the move here? I'll upload a newly rebased version shortly, and it'll show this NDEBUG test
	315 const char *err;

	316 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

	317 die(err);

	318 }

	319

	320 // Install BPF filter program

	321 const struct sock_fprog prog = { program.size(), &program[0] };

	322 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

	323 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
	Chris Evans 2012/06/12 18:11:55 Nit (for the future): it'd be nice to differentiat Nit (for the future): it'd be nice to differentiate which of those called failed. Markus (顧孟勤) 2012/06/12 19:02:59 Already fixed by rebasing. Show quoted text On 2012/06/12 18:11:55, Chris Evans wrote: > Nit (for the future): it'd be nice to differentiate which of those called > failed. Already fixed by rebasing.
	324 goto filter_failed;

	325 }

	326

	327 return;

	328 }

	329

	330 void Sandbox::findRanges(Ranges *ranges) {

	331 // Please note that "struct seccomp_data" defines system calls as a signed

	332 // int32_t, but BPF instructions always operate on unsigned quantities. We

	333 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

	334 // and then verifying that the rest of the number range (both positive and

	335 // negative) all return the same ErrorCode.

258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	336 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) {	337 uint32_t oldSysnum = 0;

260 ErrorCode err = evaluateSyscall(sysnum);	338 ErrorCode oldErr = evaluateSyscall(oldSysnum);

	339 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL);

	340 sysnum <= MAX_SYSCALL + 1;

	341 ++sysnum) {

	342 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum));

	343 if (err != oldErr) {

	344 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr));

	345 oldSysnum = sysnum;

	346 oldErr = err;

	347 }

	348 }

	349

	350 // As we looped all the way past the valid system calls (i.e. MAX_SYSCALL+1),

	351 // "oldErr" should at this point be the "default" policy for all system call

	352 // numbers that don't have an explicit handler in the system call evaluator.

	353 // But as we are quite paranoid, we perform some more sanity checks to verify

	354 // that there actually is a consistent "default" policy in the first place.

	355 // We don't actually iterate over all possible 2^32 values, though. We just

	356 // perform spot checks at the boundaries.

	357 // The cases that we test are: 0x7FFFFFFF, 0x80000000, 0xFFFFFFFF.

	358 if (oldErr != evaluateSyscall(std::numeric_limits<int>::max()) \|\|

	359 oldErr != evaluateSyscall(std::numeric_limits<int>::min()) \|\|

	360 oldErr != evaluateSyscall(-1)) {

	361 die("Invalid seccomp policy");

	362 }

	363 ranges->push_back(

	364 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr));

	365 }

	366

	367 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {

	368 // TODO: We currently search linearly through all ranges. An improved

	369 // algorithm should be doing a binary search.

	370

	371 // System call ranges must cover the entire number range.

	372 if (ranges.empty() \|\|

	373 ranges.begin()->from != 0 \|\|

	374 ranges.back().to != std::numeric_limits<unsigned>::max()) {

	375 rangeError:

	376 die("Invalid set of system call ranges");

	377 }

	378 uint32_t from = 0;

	379 for (Ranges::const_iterator iter = ranges.begin();

	380 iter != ranges.end();

	381 ++iter) {

	382 // Ranges must be contiguous and monotonically increasing.

	383 if (iter->from > iter->to \|\|

	384 iter->from != from) {

	385 goto rangeError;

	386 }

	387 from = iter->to + 1;

	388

	389 // Convert ErrorCodes to return values that are acceptable for

	390 // BPF filters.

261 int ret;	391 int ret;

262 switch (err) {	392 switch (iter->err) {

263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:	393 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

264 die("Not implemented");	394 die("Not implemented");

265 case SB_TRAP:	395 case SB_TRAP:

266 ret = SECCOMP_RET_TRAP;	396 ret = SECCOMP_RET_TRAP;

267 break;	397 break;

268 case SB_ALLOWED:	398 case SB_ALLOWED:

269 ret = SECCOMP_RET_ALLOW;	399 ret = SECCOMP_RET_ALLOW;

270 break;	400 break;

271 default:	401 default:

272 if (err >= static_cast<ErrorCode>(1) &&	402 if (iter->err >= static_cast<ErrorCode>(1) &&

273 err <= static_cast<ErrorCode>(4096)) {	403 iter->err <= static_cast<ErrorCode>(4096)) {

274 // We limit errno values to a reasonable range. In fact, the Linux ABI	404 // We limit errno values to a reasonable range. In fact, the Linux ABI

275 // doesn't support errno values outside of this range.	405 // doesn't support errno values outside of this range.

276 ret = SECCOMP_RET_ERRNO + err;	406 ret = SECCOMP_RET_ERRNO + iter->err;

277 } else {	407 } else {

278 die("Invalid ErrorCode reported by sandbox system call evaluator");	408 die("Invalid ErrorCode reported by sandbox system call evaluator");

279 }	409 }

280 break;	410 break;

281 }	411 }

282 if (sysnum <= MAX_SYSCALL) {	412

283 // We compute the default behavior (e.g. fail open or fail closed) by	413 // Emit BPF instructions matching this range.

284 // calling the system call evaluator with a system call bigger than	414 if (iter->to != std::numeric_limits<unsigned>::max()) {

285 // MAX_SYSCALL.	415 program->push_back((struct sock_filter)

286 // In other words, the very last iteration in our loop becomes the	416 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));

287 // fallback case and we don't need to do any comparisons.

288 program.push_back((struct sock_filter)

289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

290 }	417 }

291 program.push_back((struct sock_filter)	418 program->push_back((struct sock_filter)

292 BPF_STMT(BPF_RET+BPF_K, ret));	419 BPF_STMT(BPF_RET+BPF_K, ret));

293 }	420 }

294

295 // Make sure compilation resulted in BPF program that executes

296 // correctly. Otherwise, there is an internal error in our BPF compiler.

297 // There is really nothing the caller can do until the bug is fixed.

298 const char *err;

299 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

300 die(err);

301 }

302

303 // Install BPF filter program

304 const struct sock_fprog prog = { program.size(), &program[0] };

305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

307 goto filter_failed;

308 }

309

310 return;	421 return;

311 }	422 }

312	423

313 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	424 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

314 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {	425 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {

315 // die() can call LOG(FATAL). This is not normally async-signal safe	426 // die() can call LOG(FATAL). This is not normally async-signal safe

316 // and can lead to bugs. We should eventually implement a different	427 // and can lead to bugs. We should eventually implement a different

317 // logging and reporting mechanism that is safe to be called from	428 // logging and reporting mechanism that is safe to be called from

318 // the sigSys() handler.	429 // the sigSys() handler.

319 die("Unexpected SIGSYS received");	430 die("Unexpected SIGSYS received");

(...skipping 20 matching lines...) Expand all Loading...
340 return;	451 return;

341 }	452 }

342	453

343	454

344 bool Sandbox::suppressLogging_ = false;	455 bool Sandbox::suppressLogging_ = false;

345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	456 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

346 int Sandbox::proc_fd_ = -1;	457 int Sandbox::proc_fd_ = -1;

347 Sandbox::Evaluators Sandbox::evaluators_;	458 Sandbox::Evaluators Sandbox::evaluators_;

348	459

349 } // namespace	460 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/verifier.cc » ('j') | no next file with comments »