sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 10536048: Instead of outputting one BPF check per possible system call. Coalesce (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Added more asserts and tweak the existing ones a little bit Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

6 #include "sandbox/linux/seccomp-bpf/verifier.h"	6 #include "sandbox/linux/seccomp-bpf/verifier.h"

7	7

8 // The kernel gives us a sandbox, we turn it into a playground :-)	8 // The kernel gives us a sandbox, we turn it into a playground :-)

9 // This is version 2 of the playground; version 1 was built on top of	9 // This is version 2 of the playground; version 1 was built on top of

10 // pre-BPF seccomp mode.	10 // pre-BPF seccomp mode.

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
170 sb.st_nlink != 3 \|\|	170 sb.st_nlink != 3 \|\|

171 HANDLE_EINTR(close(task))) {	171 HANDLE_EINTR(close(task))) {

172 if (task >= 0) {	172 if (task >= 0) {

173 (void) HANDLE_EINTR(close(task));	173 (void) HANDLE_EINTR(close(task));

174 }	174 }

175 return false;	175 return false;

176 }	176 }

177 return true;	177 return true;

178 }	178 }

179	179

	180 static bool isDenied(Sandbox::ErrorCode code) {

	181 return code == Sandbox::SB_TRAP \|\|

	182 (code >= (Sandbox::ErrorCode)1 &&

	183 code <= (Sandbox::ErrorCode)4095); // errno value

	184 }

	185

180 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,	186 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
	jln (very slow on Chromium) 2012/06/09 01:06:13 Let's return a bool (and error string) here instea Let's return a bool (and error string) here instead of dying?
181 EvaluateArguments argumentEvaluator) {	187 EvaluateArguments argumentEvaluator) {

	188 // Do some sanity checks on the policy. This will warn users if they do

	189 // things that are likely unsafe and unintended.

	190 // We also have similar checks later, when we actually compile the BPF

	191 // program. That catches problems with incorrectly stacked evaluators.

	192 if (!isDenied(syscallEvaluator(-1))) {

	193 die("Negative system calls should always be disallowed by policy");

	194 }

	195 #if defined(__i386__) \|\| defined(__x86_64__)

	196 #if defined(__x86_64__) && defined(__ILP32__)

	197 for (unsigned int sysnum = MIN_SYSCALL & ~0x40000000u;

	198 sysnum <= (MAX_SYSCALL & ~0x40000000u);

	199 ++sysnum) {

	200 if (!isDenied(syscallEvaluator(sysnum))) {

	201 die("In x32 mode, you should not allow any non-x32 system calls");

	202 }

	203 }

	204 #else

	205 for (unsigned int sysnum = MIN_SYSCALL \| 0x40000000u;

	206 sysnum <= (MAX_SYSCALL \| 0x40000000u);

	207 ++sysnum) {

	208 if (!isDenied(syscallEvaluator(sysnum))) {

	209 die("x32 system calls should be explicitly disallowed");

	210 }

	211 }

	212 #endif

	213 #endif

	214 if (!isDenied(syscallEvaluator(std::numeric_limits<int>::min())) \|\|

	215 !isDenied(syscallEvaluator(std::numeric_limits<int>::max())) \|\|

	216 !isDenied(syscallEvaluator(static_cast<int>(MIN_SYSCALL) - 1)) \|\|

	217 !isDenied(syscallEvaluator(static_cast<int>(MAX_SYSCALL) + 1)) \|\|

	218 !isDenied(syscallEvaluator((unsigned)std::numeric_limits<int>::max()+1))\|\|

	219 !isDenied(syscallEvaluator(std::numeric_limits<unsigned>::max()))) {

	220 die("Even for default-allow policies, you must never allow system calls "

	221 "outside of the standard system call range");

	222 }

	223

182 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));	224 evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));

183 }	225 }

184	226

185 void Sandbox::installFilter() {	227 void Sandbox::installFilter() {

186 // Verify that the user pushed a policy.	228 // Verify that the user pushed a policy.

187 if (evaluators_.empty()) {	229 if (evaluators_.empty()) {

188 filter_failed:	230 filter_failed:

189 die("Failed to configure system call filters");	231 die("Failed to configure system call filters");

190 }	232 }

191	233

(...skipping 15 matching lines...) Expand all Loading...
207 }	249 }

208	250

209 // We can't handle stacked evaluators, yet. We'll get there eventually	251 // We can't handle stacked evaluators, yet. We'll get there eventually

210 // though. Hang tight.	252 // though. Hang tight.

211 if (evaluators_.size() != 1) {	253 if (evaluators_.size() != 1) {

212 die("Not implemented");	254 die("Not implemented");

213 }	255 }

214	256

215 // If the architecture doesn't match SECCOMP_ARCH, disallow the	257 // If the architecture doesn't match SECCOMP_ARCH, disallow the

216 // system call.	258 // system call.

217 std::vector<struct sock_filter> program;	259 Program program;

218 program.push_back((struct sock_filter)	260 program.push_back((struct sock_filter)

219 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));	261 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));

220 program.push_back((struct sock_filter)	262 program.push_back((struct sock_filter)

221 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));	263 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));

222	264

223 // TODO: Instead of killing outright, we should raise a SIGSYS and	265 // TODO: Instead of killing outright, we should raise a SIGSYS and

224 // report a useful error message. SIGKILL cannot be trapped by the	266 // report a useful error message. SIGKILL cannot be trapped by the

225 // debugger and essentially makes the program fail in a way that is	267 // debugger and essentially makes the program fail in a way that is

226 // almost impossible to debug.	268 // almost impossible to debug.

227 program.push_back((struct sock_filter)	269 program.push_back((struct sock_filter)

(...skipping 12 matching lines...) Expand all Loading...
240 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));	282 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));

241 #else	283 #else

242 program.push_back((struct sock_filter)	284 program.push_back((struct sock_filter)

243 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));	285 BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));

244 #endif	286 #endif

245 // TODO: raise a suitable SIGSYS signal	287 // TODO: raise a suitable SIGSYS signal

246 program.push_back((struct sock_filter)	288 program.push_back((struct sock_filter)

247 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));	289 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));

248 #endif	290 #endif

249	291

250 // Evaluate all possible system calls and depending on their	292 // Evaluate all possible system calls and group their ErrorCodes into

251 // exit codes generate a BPF filter.	293 // ranges of identical codes.

252 // This is very inefficient right now. We need to be much smarter	294 Ranges ranges;

253 // eventually.	295 findRanges(&ranges);

254 // We currently incur a O(N) overhead on each system call, with N	296

255 // being the number of system calls. It is easy to get this down to	297 // Compile the system call ranges to an optimized BPF program

256 // O(log_2(M)) with M being the number of system calls that need special	298 rangesToBPF(&program, ranges);

257 // treatment.	299

	300 // Everything that isn't allowed is forbidden. Eventually, we would

	301 // like to have a way to log forbidden calls, when in debug mode.

	302 program.push_back((struct sock_filter)

	303 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));

	304

	305 // Make sure compilation resulted in BPF program that executes

	306 // correctly. Otherwise, there is an internal error in our BPF compiler.

	307 // There is really nothing the caller can do until the bug is fixed.

	308 const char *err;

	309 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

	310 die(err);

	311 }

	312

	313 // Install BPF filter program

	314 const struct sock_fprog prog = { program.size(), &program[0] };

	315 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

	316 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

	317 goto filter_failed;

	318 }

	319

	320 return;

	321 }

	322

	323 void Sandbox::findRanges(Ranges *ranges) {

	324 // Please note that "struct seccomp_data" defines system calls as a signed

	325 // int32_t, but BPF instructions always operate on unsigned quantities. We

	326 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

	327 // and then verifying that the rest of the number range (both positive and

	328 // negative) all return the same ErrorCode.

	329 // We don't actually iterate over all possible 2^32 values, though. We just

	330 // perform spot checks at the boundaries.

258 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	331 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;

259 for (int sysnum = MIN_SYSCALL; sysnum <= MAX_SYSCALL+1; ++sysnum) {	332 uint32_t oldSysnum = 0;

260 ErrorCode err = evaluateSyscall(sysnum);	333 ErrorCode oldErr = evaluateSyscall(oldSysnum);

	334 for (uint32_t sysnum = std::max(1u, MIN_SYSCALL);

	335 sysnum <= MAX_SYSCALL;

	336 ++sysnum) {

	337 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum));

	338 if (err != oldErr) {

	339 ranges->push_back(Range(oldSysnum, sysnum-1, oldErr));

	340 oldSysnum = sysnum;

	341 oldErr = err;

	342 }

	343 }

	344 if (oldErr != evaluateSyscall(std::numeric_limits<int>::min()) \|\|

	345 oldErr != evaluateSyscall(std::numeric_limits<int>::max()) \|\|

	346 oldErr != evaluateSyscall(-1) \|\|

	347 oldErr != evaluateSyscall((unsigned)std::numeric_limits<int>::max()+1) \|\|
	jln (very slow on Chromium) 2012/06/09 01:06:13 nit: max + 1 (spaces) Please add comments, explain nit: max + 1 (spaces) Please add comments, explaining that you're trying to get the "default" error code etc. Also add a comment explicitly for the last line: "casting to unsigned to make sure to avoid overflow".
	348 oldErr != evaluateSyscall(std::numeric_limits<unsigned>::max())) {

	349 die("Invalid seccomp policy");

	350 }

	351 ranges->push_back(

	352 Range(oldSysnum, std::numeric_limits<unsigned>::max(), oldErr));

	353 }

	354

	355 void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {

	356 // TODO: We currently search linearly through all ranges. An improved

	357 // algorithm should be doing a binary search.

	358

	359 // System call ranges must cover the entire number range.

	360 if (ranges.empty() \|\|

	361 ranges.begin()->from != 0 \|\|

	362 ranges.back().to != std::numeric_limits<unsigned>::max()) {

	363 rangeError:

	364 die("Invalid set of system call ranges");

	365 }

	366 uint32_t from = 0;

	367 for (Ranges::const_iterator iter = ranges.begin();

	368 iter != ranges.end();

	369 ++iter) {

	370 // Ranges must be contiguous and monotonically increasing.

	371 if (iter->from > iter->to \|\|

	372 iter->from != from) {

	373 goto rangeError;

	374 }

	375 from = iter->to+1;
	jln (very slow on Chromium) 2012/06/09 01:06:13 nit: to + 1 (spaces) nit: to + 1 (spaces)
	376

	377 // Convert ErrorCodes to return values that are acceptable for

	378 // BPF filters.

261 int ret;	379 int ret;

262 switch (err) {	380 switch (iter->err) {

263 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:	381 case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:

264 die("Not implemented");	382 die("Not implemented");

265 case SB_TRAP:	383 case SB_TRAP:

266 ret = SECCOMP_RET_TRAP;	384 ret = SECCOMP_RET_TRAP;

267 break;	385 break;

268 case SB_ALLOWED:	386 case SB_ALLOWED:

269 ret = SECCOMP_RET_ALLOW;	387 ret = SECCOMP_RET_ALLOW;

270 break;	388 break;

271 default:	389 default:

272 if (err >= static_cast<ErrorCode>(1) &&	390 if (iter->err >= static_cast<ErrorCode>(1) &&

273 err <= static_cast<ErrorCode>(4096)) {	391 iter->err <= static_cast<ErrorCode>(4096)) {

274 // We limit errno values to a reasonable range. In fact, the Linux ABI	392 // We limit errno values to a reasonable range. In fact, the Linux ABI

275 // doesn't support errno values outside of this range.	393 // doesn't support errno values outside of this range.

276 ret = SECCOMP_RET_ERRNO + err;	394 ret = SECCOMP_RET_ERRNO + iter->err;

277 } else {	395 } else {

278 die("Invalid ErrorCode reported by sandbox system call evaluator");	396 die("Invalid ErrorCode reported by sandbox system call evaluator");

279 }	397 }

280 break;	398 break;

281 }	399 }

282 if (sysnum <= MAX_SYSCALL) {	400

283 // We compute the default behavior (e.g. fail open or fail closed) by	401 // Emit BPF instructions matching this range.

284 // calling the system call evaluator with a system call bigger than	402 if (iter->to != std::numeric_limits<unsigned>::max()) {

285 // MAX_SYSCALL.	403 program->push_back((struct sock_filter)

286 // In other words, the very last iteration in our loop becomes the	404 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));

287 // fallback case and we don't need to do any comparisons.

288 program.push_back((struct sock_filter)

289 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, sysnum, 0, 1));

290 }	405 }

291 program.push_back((struct sock_filter)	406 program->push_back((struct sock_filter)

292 BPF_STMT(BPF_RET+BPF_K, ret));	407 BPF_STMT(BPF_RET+BPF_K, ret));

293 }	408 }

294

295 // Make sure compilation resulted in BPF program that executes

296 // correctly. Otherwise, there is an internal error in our BPF compiler.

297 // There is really nothing the caller can do until the bug is fixed.

298 const char *err;

299 if (!Verifier::verifyBPF(program, evaluators_, &err)) {

300 die(err);

301 }

302

303 // Install BPF filter program

304 const struct sock_fprog prog = { program.size(), &program[0] };

305 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) \|\|

306 prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

307 goto filter_failed;

308 }

309

310 return;	409 return;

311 }	410 }

312	411

313 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	412 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {

314 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {	413 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context) {

315 // die() can call LOG(FATAL). This is not normally async-signal safe	414 // die() can call LOG(FATAL). This is not normally async-signal safe

316 // and can lead to bugs. We should eventually implement a different	415 // and can lead to bugs. We should eventually implement a different

317 // logging and reporting mechanism that is safe to be called from	416 // logging and reporting mechanism that is safe to be called from

318 // the sigSys() handler.	417 // the sigSys() handler.

319 die("Unexpected SIGSYS received");	418 die("Unexpected SIGSYS received");

(...skipping 20 matching lines...) Expand all Loading...
340 return;	439 return;

341 }	440 }

342	441

343	442

344 bool Sandbox::suppressLogging_ = false;	443 bool Sandbox::suppressLogging_ = false;

345 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	444 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

346 int Sandbox::proc_fd_ = -1;	445 int Sandbox::proc_fd_ = -1;

347 Sandbox::Evaluators Sandbox::evaluators_;	446 Sandbox::Evaluators Sandbox::evaluators_;

348	447

349 } // namespace	448 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/verifier.cc » ('j') | no next file with comments »