sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters.

Side by Side Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 11411254: SECCOMP-BPF: Added supported for inspection system call arguments from BPF filters. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Another attempt at fixing the rebase Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <endian.h>

6 #if __BYTE_ORDER == __BIG_ENDIAN

7 // The BPF "struct seccomp_data" layout has to deal with storing 64bit

8 // values that need to be inspected by a virtual machine that only ever

9 // operates on 32bit values. The kernel developers decided how values

10 // should be split into two 32bit words to achieve this goal. But at this

11 // time, there is no existing BPF implementation in the kernel that uses

12 // 64bit big endian values. So, all we have to go by is the consensus

13 // from a discussion on LKLM. Actual implementations, if and when they

14 // happen, might very well differ.

15 // If this code is ever going to be used with such a kernel, you should

16 // disable the "#error" and carefully test the code (e.g. run the unit

17 // tests). If things don't work, search for all occurrences of __BYTE_ORDER

18 // and verify that the proposed implementation agrees with what the kernel

19 // actually does.

20 #error Big endian operation is untested and expected to be broken

21 #endif

22

23 #ifndef SECCOMP_BPF_STANDALONE	5 #ifndef SECCOMP_BPF_STANDALONE

24 #include "base/logging.h"	6 #include "base/logging.h"

25 #include "base/posix/eintr_wrapper.h"	7 #include "base/posix/eintr_wrapper.h"

26 #endif	8 #endif

27	9

28 #include "sandbox/linux/seccomp-bpf/codegen.h"	10 #include "sandbox/linux/seccomp-bpf/codegen.h"

29 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"	11 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

30 #include "sandbox/linux/seccomp-bpf/syscall.h"	12 #include "sandbox/linux/seccomp-bpf/syscall.h"

31 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"	13 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"

32 #include "sandbox/linux/seccomp-bpf/verifier.h"	14 #include "sandbox/linux/seccomp-bpf/verifier.h"

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
72	54

73 // The kernel gives us a sandbox, we turn it into a playground :-)	55 // The kernel gives us a sandbox, we turn it into a playground :-)

74 // This is version 2 of the playground; version 1 was built on top of	56 // This is version 2 of the playground; version 1 was built on top of

75 // pre-BPF seccomp mode.	57 // pre-BPF seccomp mode.

76 namespace playground2 {	58 namespace playground2 {

77	59

78 const int kExpectedExitCode = 100;	60 const int kExpectedExitCode = 100;

79	61

80 // We define a really simple sandbox policy. It is just good enough for us	62 // We define a really simple sandbox policy. It is just good enough for us

81 // to tell that the sandbox has actually been activated.	63 // to tell that the sandbox has actually been activated.

82 ErrorCode Sandbox::probeEvaluator(int sysnum, void *) {	64 ErrorCode Sandbox::ProbeEvaluator(int sysnum, void *) {

83 switch (sysnum) {	65 switch (sysnum) {

84 case __NR_getpid:	66 case __NR_getpid:

85 // Return EPERM so that we can check that the filter actually ran.	67 // Return EPERM so that we can check that the filter actually ran.

86 return ErrorCode(EPERM);	68 return ErrorCode(EPERM);

87 case __NR_exit_group:	69 case __NR_exit_group:

88 // Allow exit() with a non-default return code.	70 // Allow exit() with a non-default return code.

89 return ErrorCode(ErrorCode::ERR_ALLOWED);	71 return ErrorCode(ErrorCode::ERR_ALLOWED);

90 default:	72 default:

91 // Make everything else fail in an easily recognizable way.	73 // Make everything else fail in an easily recognizable way.

92 return ErrorCode(EINVAL);	74 return ErrorCode(EINVAL);

93 }	75 }

94 }	76 }

95	77

96 void Sandbox::probeProcess(void) {	78 void Sandbox::ProbeProcess(void) {

97 if (syscall(__NR_getpid) < 0 && errno == EPERM) {	79 if (syscall(__NR_getpid) < 0 && errno == EPERM) {

98 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	80 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

99 }	81 }

100 }	82 }

101	83

102 bool Sandbox::isValidSyscallNumber(int sysnum) {	84 bool Sandbox::IsValidSyscallNumber(int sysnum) {

103 return SyscallIterator::IsValid(sysnum);	85 return SyscallIterator::IsValid(sysnum);

104 }	86 }

105	87

106 ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) {	88 ErrorCode Sandbox::AllowAllEvaluator(int sysnum, void *) {

107 if (!isValidSyscallNumber(sysnum)) {	89 if (!IsValidSyscallNumber(sysnum)) {

108 return ErrorCode(ENOSYS);	90 return ErrorCode(ENOSYS);

109 }	91 }

110 return ErrorCode(ErrorCode::ERR_ALLOWED);	92 return ErrorCode(ErrorCode::ERR_ALLOWED);

111 }	93 }

112	94

113 void Sandbox::tryVsyscallProcess(void) {	95 void Sandbox::TryVsyscallProcess(void) {

114 time_t current_time;	96 time_t current_time;

115 // time() is implemented as a vsyscall. With an older glibc, with	97 // time() is implemented as a vsyscall. With an older glibc, with

116 // vsyscall=emulate and some versions of the seccomp BPF patch	98 // vsyscall=emulate and some versions of the seccomp BPF patch

117 // we may get SIGKILL-ed. Detect this!	99 // we may get SIGKILL-ed. Detect this!

118 if (time(&current_time) != static_cast<time_t>(-1)) {	100 if (time(&current_time) != static_cast<time_t>(-1)) {

119 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));	101 syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

120 }	102 }

121 }	103 }

122	104

123 bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(),	105 bool Sandbox::RunFunctionInPolicy(void (*code_in_sandbox)(),

124 EvaluateSyscall syscallEvaluator,	106 EvaluateSyscall syscall_evaluator,

125 void *aux,	107 void *aux,

126 int proc_fd) {	108 int proc_fd) {

127 // Block all signals before forking a child process. This prevents an	109 // Block all signals before forking a child process. This prevents an

128 // attacker from manipulating our test by sending us an unexpected signal.	110 // attacker from manipulating our test by sending us an unexpected signal.

129 sigset_t oldMask, newMask;	111 sigset_t old_mask, new_mask;

130 if (sigfillset(&newMask) \|\|	112 if (sigfillset(&new_mask) \|\|

131 sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {	113 sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {

132 SANDBOX_DIE("sigprocmask() failed");	114 SANDBOX_DIE("sigprocmask() failed");

133 }	115 }

134 int fds[2];	116 int fds[2];

135 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {	117 if (pipe2(fds, O_NONBLOCK\|O_CLOEXEC)) {

136 SANDBOX_DIE("pipe() failed");	118 SANDBOX_DIE("pipe() failed");

137 }	119 }

138	120

139 if (fds[0] <= 2 \|\| fds[1] <= 2) {	121 if (fds[0] <= 2 \|\| fds[1] <= 2) {

140 SANDBOX_DIE("Process started without standard file descriptors");	122 SANDBOX_DIE("Process started without standard file descriptors");

141 }	123 }

142	124

143 pid_t pid = fork();	125 pid_t pid = fork();

144 if (pid < 0) {	126 if (pid < 0) {

145 // Die if we cannot fork(). We would probably fail a little later	127 // Die if we cannot fork(). We would probably fail a little later

146 // anyway, as the machine is likely very close to running out of	128 // anyway, as the machine is likely very close to running out of

147 // memory.	129 // memory.

148 // But what we don't want to do is return "false", as a crafty	130 // But what we don't want to do is return "false", as a crafty

149 // attacker might cause fork() to fail at will and could trick us	131 // attacker might cause fork() to fail at will and could trick us

150 // into running without a sandbox.	132 // into running without a sandbox.

151 sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails	133 sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails

152 SANDBOX_DIE("fork() failed unexpectedly");	134 SANDBOX_DIE("fork() failed unexpectedly");

153 }	135 }

154	136

155 // In the child process	137 // In the child process

156 if (!pid) {	138 if (!pid) {

157 // Test a very simple sandbox policy to verify that we can	139 // Test a very simple sandbox policy to verify that we can

158 // successfully turn on sandboxing.	140 // successfully turn on sandboxing.

159 Die::EnableSimpleExit();	141 Die::EnableSimpleExit();

160	142

161 errno = 0;	143 errno = 0;

(...skipping 22 matching lines...) Expand all Loading...
184 if (HANDLE_EINTR(close(fds[1]))) {	166 if (HANDLE_EINTR(close(fds[1]))) {

185 // This call to close() has been failing in strange ways. See	167 // This call to close() has been failing in strange ways. See

186 // crbug.com/152530. So we only fail in debug mode now.	168 // crbug.com/152530. So we only fail in debug mode now.

187 #if !defined(NDEBUG)	169 #if !defined(NDEBUG)

188 WriteFailedStderrSetupMessage(fds[1]);	170 WriteFailedStderrSetupMessage(fds[1]);

189 SANDBOX_DIE(NULL);	171 SANDBOX_DIE(NULL);

190 #endif	172 #endif

191 }	173 }

192	174

193 evaluators_.clear();	175 evaluators_.clear();

194 setSandboxPolicy(syscallEvaluator, aux);	176 SetSandboxPolicy(syscall_evaluator, aux);

195 setProcFd(proc_fd);	177 set_proc_fd(proc_fd);

196	178

197 // By passing "quiet=true" to "startSandboxInternal()" we suppress	179 // By passing "quiet=true" to "startSandboxInternal()" we suppress

198 // messages for expected and benign failures (e.g. if the current	180 // messages for expected and benign failures (e.g. if the current

199 // kernel lacks support for BPF filters).	181 // kernel lacks support for BPF filters).

200 startSandboxInternal(true);	182 StartSandboxInternal(true);

201	183

202 // Run our code in the sandbox.	184 // Run our code in the sandbox.

203 CodeInSandbox();	185 code_in_sandbox();

204	186

205 // CodeInSandbox() is not supposed to return here.	187 // code_in_sandbox() is not supposed to return here.

206 SANDBOX_DIE(NULL);	188 SANDBOX_DIE(NULL);

207 }	189 }

208	190

209 // In the parent process.	191 // In the parent process.

210 if (HANDLE_EINTR(close(fds[1]))) {	192 if (HANDLE_EINTR(close(fds[1]))) {

211 SANDBOX_DIE("close() failed");	193 SANDBOX_DIE("close() failed");

212 }	194 }

213 if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {	195 if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {

214 SANDBOX_DIE("sigprocmask() failed");	196 SANDBOX_DIE("sigprocmask() failed");

215 }	197 }

216 int status;	198 int status;

217 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {	199 if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {

218 SANDBOX_DIE("waitpid() failed unexpectedly");	200 SANDBOX_DIE("waitpid() failed unexpectedly");

219 }	201 }

220 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;	202 bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;

221	203

222 // If we fail to support sandboxing, there might be an additional	204 // If we fail to support sandboxing, there might be an additional

223 // error message. If so, this was an entirely unexpected and fatal	205 // error message. If so, this was an entirely unexpected and fatal

(...skipping 11 matching lines...) Expand all Loading...
235 SANDBOX_DIE(buf);	217 SANDBOX_DIE(buf);

236 }	218 }

237 }	219 }

238 if (HANDLE_EINTR(close(fds[0]))) {	220 if (HANDLE_EINTR(close(fds[0]))) {

239 SANDBOX_DIE("close() failed");	221 SANDBOX_DIE("close() failed");

240 }	222 }

241	223

242 return rc;	224 return rc;

243 }	225 }

244	226

245 bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {	227 bool Sandbox::KernelSupportSeccompBPF(int proc_fd) {

246 #if defined(SECCOMP_BPF_VALGRIND_HACKS)	228 #if defined(SECCOMP_BPF_VALGRIND_HACKS)

247 if (RUNNING_ON_VALGRIND) {	229 if (RUNNING_ON_VALGRIND) {

248 // Valgrind doesn't like our run-time test. Disable testing and assume we	230 // Valgrind doesn't like our run-time test. Disable testing and assume we

249 // always support sandboxing. This feature should only ever be enabled when	231 // always support sandboxing. This feature should only ever be enabled when

250 // debugging.	232 // debugging.

251 return true;	233 return true;

252 }	234 }

253 #endif	235 #endif

254	236

255 return	237 return

256 RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) &&	238 RunFunctionInPolicy(ProbeProcess, Sandbox::ProbeEvaluator, 0, proc_fd) &&

257 RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0,	239 RunFunctionInPolicy(TryVsyscallProcess, Sandbox::AllowAllEvaluator, 0,

258 proc_fd);	240 proc_fd);

259 }	241 }

260	242

261 Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {	243 Sandbox::SandboxStatus Sandbox::SupportsSeccompSandbox(int proc_fd) {

262 // It the sandbox is currently active, we clearly must have support for	244 // It the sandbox is currently active, we clearly must have support for

263 // sandboxing.	245 // sandboxing.

264 if (status_ == STATUS_ENABLED) {	246 if (status_ == STATUS_ENABLED) {

265 return status_;	247 return status_;

266 }	248 }

267	249

268 // Even if the sandbox was previously available, something might have	250 // Even if the sandbox was previously available, something might have

269 // changed in our run-time environment. Check one more time.	251 // changed in our run-time environment. Check one more time.

270 if (status_ == STATUS_AVAILABLE) {	252 if (status_ == STATUS_AVAILABLE) {

271 if (!isSingleThreaded(proc_fd)) {	253 if (!IsSingleThreaded(proc_fd)) {

272 status_ = STATUS_UNAVAILABLE;	254 status_ = STATUS_UNAVAILABLE;

273 }	255 }

274 return status_;	256 return status_;

275 }	257 }

276	258

277 if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {	259 if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {

278 // All state transitions resulting in STATUS_UNAVAILABLE are immediately	260 // All state transitions resulting in STATUS_UNAVAILABLE are immediately

279 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all	261 // preceded by STATUS_AVAILABLE. Furthermore, these transitions all

280 // happen, if and only if they are triggered by the process being multi-	262 // happen, if and only if they are triggered by the process being multi-

281 // threaded.	263 // threaded.

282 // In other words, if a single-threaded process is currently in the	264 // In other words, if a single-threaded process is currently in the

283 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is	265 // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

284 // actually available.	266 // actually available.

285 status_ = STATUS_AVAILABLE;	267 status_ = STATUS_AVAILABLE;

286 return status_;	268 return status_;

287 }	269 }

288	270

289 // If we have not previously checked for availability of the sandbox or if	271 // If we have not previously checked for availability of the sandbox or if

290 // we otherwise don't believe to have a good cached value, we have to	272 // we otherwise don't believe to have a good cached value, we have to

291 // perform a thorough check now.	273 // perform a thorough check now.

292 if (status_ == STATUS_UNKNOWN) {	274 if (status_ == STATUS_UNKNOWN) {

293 status_ = kernelSupportSeccompBPF(proc_fd)	275 status_ = KernelSupportSeccompBPF(proc_fd)

294 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;	276 ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;

295	277

296 // As we are performing our tests from a child process, the run-time	278 // As we are performing our tests from a child process, the run-time

297 // environment that is visible to the sandbox is always guaranteed to be	279 // environment that is visible to the sandbox is always guaranteed to be

298 // single-threaded. Let's check here whether the caller is single-	280 // single-threaded. Let's check here whether the caller is single-

299 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.	281 // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

300 if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {	282 if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {

301 status_ = STATUS_UNAVAILABLE;	283 status_ = STATUS_UNAVAILABLE;

302 }	284 }

303 }	285 }

304 return status_;	286 return status_;

305 }	287 }

306	288

307 void Sandbox::setProcFd(int proc_fd) {	289 void Sandbox::set_proc_fd(int proc_fd) {

308 proc_fd_ = proc_fd;	290 proc_fd_ = proc_fd;

309 }	291 }

310	292

311 void Sandbox::startSandboxInternal(bool quiet) {	293 void Sandbox::StartSandboxInternal(bool quiet) {

312 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {	294 if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {

313 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "	295 SANDBOX_DIE("Trying to start sandbox, even though it is known to be "

314 "unavailable");	296 "unavailable");

315 } else if (status_ == STATUS_ENABLED) {	297 } else if (status_ == STATUS_ENABLED) {

316 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "	298 SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "

317 "setSandboxPolicy() to stack policies instead");	299 "setSandboxPolicy() to stack policies instead");

318 }	300 }

319 if (proc_fd_ < 0) {	301 if (proc_fd_ < 0) {

320 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);	302 proc_fd_ = open("/proc", O_RDONLY\|O_DIRECTORY);

321 }	303 }

322 if (proc_fd_ < 0) {	304 if (proc_fd_ < 0) {

323 // For now, continue in degraded mode, if we can't access /proc.	305 // For now, continue in degraded mode, if we can't access /proc.

324 // In the future, we might want to tighten this requirement.	306 // In the future, we might want to tighten this requirement.

325 }	307 }

326 if (!isSingleThreaded(proc_fd_)) {	308 if (!IsSingleThreaded(proc_fd_)) {

327 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");	309 SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");

328 }	310 }

329	311

330 // We no longer need access to any files in /proc. We want to do this	312 // We no longer need access to any files in /proc. We want to do this

331 // before installing the filters, just in case that our policy denies	313 // before installing the filters, just in case that our policy denies

332 // close().	314 // close().

333 if (proc_fd_ >= 0) {	315 if (proc_fd_ >= 0) {

334 if (HANDLE_EINTR(close(proc_fd_))) {	316 if (HANDLE_EINTR(close(proc_fd_))) {

335 SANDBOX_DIE("Failed to close file descriptor for /proc");	317 SANDBOX_DIE("Failed to close file descriptor for /proc");

336 }	318 }

337 proc_fd_ = -1;	319 proc_fd_ = -1;

338 }	320 }

339	321

340 // Install the filters.	322 // Install the filters.

341 installFilter(quiet);	323 InstallFilter(quiet);

342	324

343 // We are now inside the sandbox.	325 // We are now inside the sandbox.

344 status_ = STATUS_ENABLED;	326 status_ = STATUS_ENABLED;

345 }	327 }

346	328

347 bool Sandbox::isSingleThreaded(int proc_fd) {	329 bool Sandbox::IsSingleThreaded(int proc_fd) {

348 if (proc_fd < 0) {	330 if (proc_fd < 0) {

349 // Cannot determine whether program is single-threaded. Hope for	331 // Cannot determine whether program is single-threaded. Hope for

350 // the best...	332 // the best...

351 return true;	333 return true;

352 }	334 }

353	335

354 struct stat sb;	336 struct stat sb;

355 int task = -1;	337 int task = -1;

356 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|	338 if ((task = openat(proc_fd, "self/task", O_RDONLY\|O_DIRECTORY)) < 0 \|\|

357 fstat(task, &sb) != 0 \|\|	339 fstat(task, &sb) != 0 \|\|

358 sb.st_nlink != 3 \|\|	340 sb.st_nlink != 3 \|\|

359 HANDLE_EINTR(close(task))) {	341 HANDLE_EINTR(close(task))) {

360 if (task >= 0) {	342 if (task >= 0) {

361 if (HANDLE_EINTR(close(task))) { }	343 if (HANDLE_EINTR(close(task))) { }

362 }	344 }

363 return false;	345 return false;

364 }	346 }

365 return true;	347 return true;

366 }	348 }

367	349

368 bool Sandbox::isDenied(const ErrorCode& code) {	350 bool Sandbox::IsDenied(const ErrorCode& code) {

369 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|	351 return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP \|\|

370 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&	352 (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&

371 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));	353 code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));

372 }	354 }

373	355

374 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,	356 void Sandbox::PolicySanityChecks(EvaluateSyscall syscall_evaluator,

375 void *aux) {	357 void *aux) {

376 for (SyscallIterator iter(true); !iter.Done(); ) {	358 for (SyscallIterator iter(true); !iter.Done(); ) {

377 uint32_t sysnum = iter.Next();	359 uint32_t sysnum = iter.Next();

378 if (!isDenied(syscallEvaluator(sysnum, aux))) {	360 if (!IsDenied(syscall_evaluator(sysnum, aux))) {

379 SANDBOX_DIE("Policies should deny system calls that are outside the "	361 SANDBOX_DIE("Policies should deny system calls that are outside the "

380 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");	362 "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");

381 }	363 }

382 }	364 }

383 return;	365 return;

384 }	366 }

385	367

386 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {	368 void Sandbox::CheckForUnsafeErrorCodes(Instruction insn, void aux) {

387 if (BPF_CLASS(insn->code) == BPF_RET &&	369 if (BPF_CLASS(insn->code) == BPF_RET &&

388 insn->k > SECCOMP_RET_TRAP &&	370 insn->k > SECCOMP_RET_TRAP &&

389 insn->k - SECCOMP_RET_TRAP <= trapArraySize_) {	371 insn->k - SECCOMP_RET_TRAP <= trap_array_size_) {

390 const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];	372 const ErrorCode& err = trap_array_[insn->k - SECCOMP_RET_TRAP - 1];

391 if (!err.safe_) {	373 if (!err.safe_) {

392 bool is_unsafe = static_cast<bool >(aux);	374 bool is_unsafe = static_cast<bool >(aux);

393 *is_unsafe = true;	375 *is_unsafe = true;

394 }	376 }

395 }	377 }

396 }	378 }

397	379

398 void Sandbox::RedirectToUserspace(Instruction insn, void aux) {	380 void Sandbox::RedirectToUserspace(Instruction insn, void ) {

399 // When inside an UnsafeTrap() callback, we want to allow all system calls.	381 // When inside an UnsafeTrap() callback, we want to allow all system calls.

400 // This means, we must conditionally disable the sandbox -- and that's not	382 // This means, we must conditionally disable the sandbox -- and that's not

401 // something that kernel-side BPF filters can do, as they cannot inspect	383 // something that kernel-side BPF filters can do, as they cannot inspect

402 // any state other than the syscall arguments.	384 // any state other than the syscall arguments.

403 // But if we redirect all error handlers to user-space, then we can easily	385 // But if we redirect all error handlers to user-space, then we can easily

404 // make this decision.	386 // make this decision.

405 // The performance penalty for this extra round-trip to user-space is not	387 // The performance penalty for this extra round-trip to user-space is not

406 // actually that bad, as we only ever pay it for denied system calls; and a	388 // actually that bad, as we only ever pay it for denied system calls; and a

407 // typical program has very few of these.	389 // typical program has very few of these.

408 if (BPF_CLASS(insn->code) == BPF_RET &&	390 if (BPF_CLASS(insn->code) == BPF_RET &&

409 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {	391 (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

410 insn->k = Trap(ReturnErrno,	392 insn->k = Trap(ReturnErrno,

411 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();	393 reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();

412 }	394 }

413 }	395 }

414	396

415 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {	397 ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {

416 // We need to replicate the behavior of RedirectToUserspace(), so that our	398 // We need to replicate the behavior of RedirectToUserspace(), so that our

417 // Verifier can still work correctly.	399 // Verifier can still work correctly.

418 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);	400 Evaluators evaluators = reinterpret_cast<Evaluators >(aux);

419 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();	401 const std::pair<EvaluateSyscall, void >& evaluator = evaluators->begin();

420 ErrorCode err = evaluator.first(sysnum, evaluator.second);	402 ErrorCode err = evaluator.first(sysnum, evaluator.second);

421 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {	403 if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {

422 return Trap(ReturnErrno,	404 return Trap(ReturnErrno,

423 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));	405 reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));

424 }	406 }

425 return err;	407 return err;

426 }	408 }

427	409

428 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {	410 void Sandbox::SetSandboxPolicy(EvaluateSyscall syscall_evaluator, void *aux) {

429 if (status_ == STATUS_ENABLED) {	411 if (status_ == STATUS_ENABLED) {

430 SANDBOX_DIE("Cannot change policy after sandbox has started");	412 SANDBOX_DIE("Cannot change policy after sandbox has started");

431 }	413 }

432 policySanityChecks(syscallEvaluator, aux);	414 PolicySanityChecks(syscall_evaluator, aux);

433 evaluators_.push_back(std::make_pair(syscallEvaluator, aux));	415 evaluators_.push_back(std::make_pair(syscall_evaluator, aux));

434 }	416 }

435	417

436 void Sandbox::installFilter(bool quiet) {	418 void Sandbox::InstallFilter(bool quiet) {

437 // Verify that the user pushed a policy.	419 // Verify that the user pushed a policy.

438 if (evaluators_.empty()) {	420 if (evaluators_.empty()) {

439 filter_failed:	421 filter_failed:

440 SANDBOX_DIE("Failed to configure system call filters");	422 SANDBOX_DIE("Failed to configure system call filters");

441 }	423 }

442	424

443 // Set new SIGSYS handler	425 // Set new SIGSYS handler

444 struct sigaction sa;	426 struct sigaction sa;

445 memset(&sa, 0, sizeof(sa));	427 memset(&sa, 0, sizeof(sa));

446 sa.sa_sigaction = sigSys;	428 sa.sa_sigaction = SigSys;

447 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;	429 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;

448 if (sigaction(SIGSYS, &sa, NULL) < 0) {	430 if (sigaction(SIGSYS, &sa, NULL) < 0) {

449 goto filter_failed;	431 goto filter_failed;

450 }	432 }

451	433

452 // Unmask SIGSYS	434 // Unmask SIGSYS

453 sigset_t mask;	435 sigset_t mask;

454 if (sigemptyset(&mask) \|\|	436 if (sigemptyset(&mask) \|\|

455 sigaddset(&mask, SIGSYS) \|\|	437 sigaddset(&mask, SIGSYS) \|\|

456 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {	438 sigprocmask(SIG_UNBLOCK, &mask, NULL)) {

457 goto filter_failed;	439 goto filter_failed;

458 }	440 }

459	441

460 // We can't handle stacked evaluators, yet. We'll get there eventually	442 // We can't handle stacked evaluators, yet. We'll get there eventually

461 // though. Hang tight.	443 // though. Hang tight.

462 if (evaluators_.size() != 1) {	444 if (evaluators_.size() != 1) {

463 SANDBOX_DIE("Not implemented");	445 SANDBOX_DIE("Not implemented");

464 }	446 }

465	447

466 // Assemble the BPF filter program.	448 // Assemble the BPF filter program.

467 CodeGen *gen = new CodeGen();	449 CodeGen *gen = new CodeGen();

468 if (!gen) {	450 if (!gen) {

469 SANDBOX_DIE("Out of memory");	451 SANDBOX_DIE("Out of memory");

470 }	452 }

471	453

472 // If the architecture doesn't match SECCOMP_ARCH, disallow the	454 // If the architecture doesn't match SECCOMP_ARCH, disallow the

473 // system call.	455 // system call.

474 Instruction *tail;	456 Instruction *tail;

475 Instruction *head =	457 Instruction *head =

476 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	458 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_ARCH_IDX,

477 offsetof(struct arch_seccomp_data, arch),

478 tail =	459 tail =

479 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,	460 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,

480 NULL,	461 NULL,

481 gen->MakeInstruction(BPF_RET+BPF_K,	462 gen->MakeInstruction(BPF_RET+BPF_K,

482 Kill(	463 Kill("Invalid audit architecture in BPF filter"))));

483 "Invalid audit architecture in BPF filter").err_)));

484	464

485 {	465 {

486 // Evaluate all possible system calls and group their ErrorCodes into	466 // Evaluate all possible system calls and group their ErrorCodes into

487 // ranges of identical codes.	467 // ranges of identical codes.

488 Ranges ranges;	468 Ranges ranges;

489 findRanges(&ranges);	469 FindRanges(&ranges);

490	470

491 // Compile the system call ranges to an optimized BPF jumptable	471 // Compile the system call ranges to an optimized BPF jumptable

492 Instruction *jumptable =	472 Instruction *jumptable =

493 assembleJumpTable(gen, ranges.begin(), ranges.end());	473 AssembleJumpTable(gen, ranges.begin(), ranges.end());

494	474

495 // If there is at least one UnsafeTrap() in our program, the entire sandbox	475 // If there is at least one UnsafeTrap() in our program, the entire sandbox

496 // is unsafe. We need to modify the program so that all non-	476 // is unsafe. We need to modify the program so that all non-

497 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then	477 // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then

498 // allow us to temporarily disable sandboxing rules inside of callbacks to	478 // allow us to temporarily disable sandboxing rules inside of callbacks to

499 // UnsafeTrap().	479 // UnsafeTrap().

500 has_unsafe_traps_ = false;	480 has_unsafe_traps_ = false;

501 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);	481 gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);

502	482

503 // Grab the system call number, so that we can implement jump tables.	483 // Grab the system call number, so that we can implement jump tables.

504 Instruction *load_nr =	484 Instruction *load_nr =

505 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	485 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_NR_IDX);

506 offsetof(struct arch_seccomp_data, nr));

507	486

508 // If our BPF program has unsafe jumps, enable support for them. This	487 // If our BPF program has unsafe jumps, enable support for them. This

509 // test happens very early in the BPF filter program. Even before we	488 // test happens very early in the BPF filter program. Even before we

510 // consider looking at system call numbers.	489 // consider looking at system call numbers.

511 // As support for unsafe jumps essentially defeats all the security	490 // As support for unsafe jumps essentially defeats all the security

512 // measures that the sandbox provides, we print a big warning message --	491 // measures that the sandbox provides, we print a big warning message --

513 // and of course, we make sure to only ever enable this feature if it	492 // and of course, we make sure to only ever enable this feature if it

514 // is actually requested by the sandbox policy.	493 // is actually requested by the sandbox policy.

515 if (has_unsafe_traps_) {	494 if (has_unsafe_traps_) {

516 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {	495 if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {

(...skipping 26 matching lines...) Expand all Loading...
543 // Allow system calls, if they originate from our magic return address	522 // Allow system calls, if they originate from our magic return address

544 // (which we can query by calling SandboxSyscall(-1)).	523 // (which we can query by calling SandboxSyscall(-1)).

545 uintptr_t syscall_entry_point =	524 uintptr_t syscall_entry_point =

546 static_cast<uintptr_t>(SandboxSyscall(-1));	525 static_cast<uintptr_t>(SandboxSyscall(-1));

547 uint32_t low = static_cast<uint32_t>(syscall_entry_point);	526 uint32_t low = static_cast<uint32_t>(syscall_entry_point);

548 #if __SIZEOF_POINTER__ > 4	527 #if __SIZEOF_POINTER__ > 4

549 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);	528 uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);

550 #endif	529 #endif

551	530

552 // BPF cannot do native 64bit comparisons. On 64bit architectures, we	531 // BPF cannot do native 64bit comparisons. On 64bit architectures, we

553 // have to compare both 32bit halfs of the instruction pointer. If they	532 // have to compare both 32bit halves of the instruction pointer. If they

554 // match what we expect, we return ERR_ALLOWED. If either or both don't	533 // match what we expect, we return ERR_ALLOWED. If either or both don't

555 // match, we continue evalutating the rest of the sandbox policy.	534 // match, we continue evalutating the rest of the sandbox policy.

556 Instruction *escape_hatch =	535 Instruction *escape_hatch =

557 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	536 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_LSB_IDX,

558 offsetof(struct arch_seccomp_data,

559 instruction_pointer) +

560 (__SIZEOF_POINTER__ > 4 &&

561 __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),

562 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,	537 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,

563 #if __SIZEOF_POINTER__ > 4	538 #if __SIZEOF_POINTER__ > 4

564 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,	539 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, SECCOMP_IP_MSB_IDX,

565 offsetof(struct arch_seccomp_data,

566 instruction_pointer) +

567 (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),

568 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,	540 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,

569 #endif	541 #endif

570 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),	542 gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),

571 #if __SIZEOF_POINTER__ > 4	543 #if __SIZEOF_POINTER__ > 4

572 load_nr)),	544 load_nr)),

573 #endif	545 #endif

574 load_nr));	546 load_nr));

575 gen->JoinInstructions(tail, escape_hatch);	547 gen->JoinInstructions(tail, escape_hatch);

576 } else {	548 } else {

577 gen->JoinInstructions(tail, load_nr);	549 gen->JoinInstructions(tail, load_nr);

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
639 // system memory allocator that is in effect, these operators can result	611 // system memory allocator that is in effect, these operators can result

640 // in system calls to things like munmap() or brk().	612 // in system calls to things like munmap() or brk().

641 struct sock_filter bpf[program->size()];	613 struct sock_filter bpf[program->size()];

642 const struct sock_fprog prog = {	614 const struct sock_fprog prog = {

643 static_cast<unsigned short>(program->size()), bpf };	615 static_cast<unsigned short>(program->size()), bpf };

644 memcpy(bpf, &(*program)[0], sizeof(bpf));	616 memcpy(bpf, &(*program)[0], sizeof(bpf));

645 delete program;	617 delete program;

646	618

647 // Release memory that is no longer needed	619 // Release memory that is no longer needed

648 evaluators_.clear();	620 evaluators_.clear();

	621 conds_.clear();

649	622

650 #if defined(SECCOMP_BPF_VALGRIND_HACKS)	623 #if defined(SECCOMP_BPF_VALGRIND_HACKS)

651 // Valgrind is really not happy about our sandbox. Disable it when running	624 // Valgrind is really not happy about our sandbox. Disable it when running

652 // in Valgrind. This feature is dangerous and should never be enabled by	625 // in Valgrind. This feature is dangerous and should never be enabled by

653 // default. We protect it behind a pre-processor option.	626 // default. We protect it behind a pre-processor option.

654 if (!RUNNING_ON_VALGRIND)	627 if (!RUNNING_ON_VALGRIND)

655 #endif	628 #endif

656 {	629 {

657 // Install BPF filter program	630 // Install BPF filter program

658 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {	631 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

659 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");	632 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");

660 } else {	633 } else {

661 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {	634 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

662 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");	635 SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");

663 }	636 }

664 }	637 }

665 }	638 }

666	639

667 return;	640 return;

668 }	641 }

669	642

670 void Sandbox::findRanges(Ranges *ranges) {	643 void Sandbox::FindRanges(Ranges *ranges) {

671 // Please note that "struct seccomp_data" defines system calls as a signed	644 // Please note that "struct seccomp_data" defines system calls as a signed

672 // int32_t, but BPF instructions always operate on unsigned quantities. We	645 // int32_t, but BPF instructions always operate on unsigned quantities. We

673 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,	646 // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,

674 // and then verifying that the rest of the number range (both positive and	647 // and then verifying that the rest of the number range (both positive and

675 // negative) all return the same ErrorCode.	648 // negative) all return the same ErrorCode.

676 EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;	649 EvaluateSyscall evaluate_syscall = evaluators_.begin()->first;

677 void *aux = evaluators_.begin()->second;	650 void *aux = evaluators_.begin()->second;

678 uint32_t oldSysnum = 0;	651 uint32_t old_sysnum = 0;

679 ErrorCode oldErr = evaluateSyscall(oldSysnum, aux);	652 ErrorCode old_err = evaluate_syscall(old_sysnum, aux);

680 ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux);	653 ErrorCode invalid_err = evaluate_syscall(MIN_SYSCALL - 1, aux);

681 for (SyscallIterator iter(false); !iter.Done(); ) {	654 for (SyscallIterator iter(false); !iter.Done(); ) {

682 uint32_t sysnum = iter.Next();	655 uint32_t sysnum = iter.Next();

683 ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux);	656 ErrorCode err = evaluate_syscall(static_cast<int>(sysnum), aux);

684 if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) {	657 if (!iter.IsValid(sysnum) && !invalid_err.Equals(err)) {

685 // A proper sandbox policy should always treat system calls outside of	658 // A proper sandbox policy should always treat system calls outside of

686 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns	659 // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns

687 // "false" for SyscallIterator::IsValid()) identically. Typically, all	660 // "false" for SyscallIterator::IsValid()) identically. Typically, all

688 // of these system calls would be denied with the same ErrorCode.	661 // of these system calls would be denied with the same ErrorCode.

689 SANDBOX_DIE("Invalid seccomp policy");	662 SANDBOX_DIE("Invalid seccomp policy");

690 }	663 }

691 if (!err.Equals(oldErr) \|\| iter.Done()) {	664 if (!err.Equals(old_err) \|\| iter.Done()) {

692 ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr));	665 ranges->push_back(Range(old_sysnum, sysnum - 1, old_err));

693 oldSysnum = sysnum;	666 old_sysnum = sysnum;

694 oldErr = err;	667 old_err = err;

695 }	668 }

696 }	669 }

697 }	670 }

698	671

699 Instruction Sandbox::assembleJumpTable(CodeGen gen,	672 Instruction Sandbox::AssembleJumpTable(CodeGen gen,

700 Ranges::const_iterator start,	673 Ranges::const_iterator start,

701 Ranges::const_iterator stop) {	674 Ranges::const_iterator stop) {

702 // We convert the list of system call ranges into jump table that performs	675 // We convert the list of system call ranges into jump table that performs

703 // a binary search over the ranges.	676 // a binary search over the ranges.

704 // As a sanity check, we need to have at least one distinct ranges for us	677 // As a sanity check, we need to have at least one distinct ranges for us

705 // to be able to build a jump table.	678 // to be able to build a jump table.

706 if (stop - start <= 0) {	679 if (stop - start <= 0) {

707 SANDBOX_DIE("Invalid set of system call ranges");	680 SANDBOX_DIE("Invalid set of system call ranges");

708 } else if (stop - start == 1) {	681 } else if (stop - start == 1) {

709 // If we have narrowed things down to a single range object, we can	682 // If we have narrowed things down to a single range object, we can

710 // return from the BPF filter program.	683 // return from the BPF filter program.

711 return gen->MakeInstruction(BPF_RET+BPF_K, start->err);	684 return RetExpression(gen, start->err);

712 }	685 }

713	686

714 // Pick the range object that is located at the mid point of our list.	687 // Pick the range object that is located at the mid point of our list.

715 // We compare our system call number against the lowest valid system call	688 // We compare our system call number against the lowest valid system call

716 // number in this range object. If our number is lower, it is outside of	689 // number in this range object. If our number is lower, it is outside of

717 // this range object. If it is greater or equal, it might be inside.	690 // this range object. If it is greater or equal, it might be inside.

718 Ranges::const_iterator mid = start + (stop - start)/2;	691 Ranges::const_iterator mid = start + (stop - start)/2;

719	692

720 // Sub-divide the list of ranges and continue recursively.	693 // Sub-divide the list of ranges and continue recursively.

721 Instruction *jf = assembleJumpTable(gen, start, mid);	694 Instruction *jf = AssembleJumpTable(gen, start, mid);

722 Instruction *jt = assembleJumpTable(gen, mid, stop);	695 Instruction *jt = AssembleJumpTable(gen, mid, stop);

723 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);	696 return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);

724 }	697 }

725	698

726 void Sandbox::sigSys(int nr, siginfo_t info, void void_context) {	699 Instruction Sandbox::RetExpression(CodeGen gen, const ErrorCode& cond) {

	700 if (cond.error_type_ == ErrorCode::ET_COND) {

	701 return CondExpression(gen, cond);

	702 } else {

	703 return gen->MakeInstruction(BPF_RET+BPF_K, cond);

	704 }

	705 }

	706

	707 Instruction Sandbox::CondExpression(CodeGen gen, const ErrorCode& cond) {

	708 // We can only inspect the six system call arguments that are passed in

	709 // CPU registers.

	710 if (cond.argno_ < 0 \|\| cond.argno_ >= 6) {

	711 SANDBOX_DIE("Internal compiler error; invalid argument number "

	712 "encountered");

	713 }

	714

	715 // BPF programs operate on 32bit entities. Load both halfs of the 64bit

	716 // system call argument and then generate suitable conditional statements.

	717 Instruction *msb_head =

	718 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	719 SECCOMP_ARG_MSB_IDX(cond.argno_));

	720 Instruction *msb_tail = msb_head;

	721 Instruction *lsb_head =

	722 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	723 SECCOMP_ARG_LSB_IDX(cond.argno_));

	724 Instruction *lsb_tail = lsb_head;

	725

	726 // Emit a suitable comparison statement.

	727 switch (cond.op_) {

	728 case ErrorCode::OP_EQUAL:

	729 // Compare the least significant bits for equality

	730 lsb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,

	731 static_cast<uint32_t>(cond.value_),

	732 RetExpression(gen, *cond.passed_),

	733 RetExpression(gen, *cond.failed_));

	734 gen->JoinInstructions(lsb_head, lsb_tail);

	735

	736 // If we are looking at a 64bit argument, we need to also compare the

	737 // most significant bits.

	738 if (cond.width_ == ErrorCode::TP_64BIT) {

	739 msb_tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K,

	740 static_cast<uint32_t>(cond.value_ >> 32),

	741 NULL,

	742 RetExpression(gen, *cond.failed_));

	743 gen->JoinInstructions(msb_head, msb_tail);

	744 }

	745 break;

	746 default:

	747 // TODO(markus): We can only check for equality so far.

	748 SANDBOX_DIE("Not implemented");

	749 break;

	750 }

	751

	752 // Ensure that we never pass a 64bit value, when we only expect a 32bit

	753 // value. This is somewhat complicated by the fact that on 64bit systems,

	754 // callers could legitimately pass in a non-zero value in the MSB, iff the

	755 // LSB has been sign-extended into the MSB.

	756 if (cond.width_ == ErrorCode::TP_32BIT) {

	757 if (cond.value_ >> 32) {

	758 SANDBOX_DIE("Invalid comparison of a 32bit system call argument "

	759 "against a 64bit constant; this test is always false.");

	760 }

	761

	762 Instruction *invalid_64bit = RetExpression(gen, Unexpected64bitArgument());

	763 #if __SIZEOF_POINTER__ > 4

	764 invalid_64bit =

	765 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0xFFFFFFFF,

	766 gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,

	767 SECCOMP_ARG_LSB_IDX(cond.argno_),

	768 gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, 0x80000000,

	769 lsb_head,

	770 invalid_64bit)),

	771 invalid_64bit);

	772 #endif

	773 gen->JoinInstructions(

	774 msb_tail,

	775 gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 0,

	776 lsb_head,

	777 invalid_64bit));

	778 } else {

	779 gen->JoinInstructions(msb_tail, lsb_head);

	780 }

	781

	782 return msb_head;

	783 }

	784

	785 ErrorCode Sandbox::Unexpected64bitArgument() {

	786 return Kill("Unexpected 64bit argument detected");

	787 }

	788

	789 void Sandbox::SigSys(int nr, siginfo_t info, void void_context) {

727 // Various sanity checks to make sure we actually received a signal	790 // Various sanity checks to make sure we actually received a signal

728 // triggered by a BPF filter. If something else triggered SIGSYS	791 // triggered by a BPF filter. If something else triggered SIGSYS

729 // (e.g. kill()), there is really nothing we can do with this signal.	792 // (e.g. kill()), there is really nothing we can do with this signal.

730 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context \|\|	793 if (nr != SIGSYS \|\| info->si_code != SYS_SECCOMP \|\| !void_context \|\|

731 info->si_errno <= 0 \|\|	794 info->si_errno <= 0 \|\|

732 static_cast<size_t>(info->si_errno) > trapArraySize_) {	795 static_cast<size_t>(info->si_errno) > trap_array_size_) {

733 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal	796 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal

734 // safe and can lead to bugs. We should eventually implement a different	797 // safe and can lead to bugs. We should eventually implement a different

735 // logging and reporting mechanism that is safe to be called from	798 // logging and reporting mechanism that is safe to be called from

736 // the sigSys() handler.	799 // the sigSys() handler.

737 // TODO: If we feel confident that our code otherwise works correctly, we	800 // TODO: If we feel confident that our code otherwise works correctly, we

738 // could actually make an argument that spurious SIGSYS should	801 // could actually make an argument that spurious SIGSYS should

739 // just get silently ignored. TBD	802 // just get silently ignored. TBD

740 sigsys_err:	803 sigsys_err:

741 SANDBOX_DIE("Unexpected SIGSYS received");	804 SANDBOX_DIE("Unexpected SIGSYS received");

742 }	805 }

(...skipping 23 matching lines...) Expand all Loading...
766 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {	829 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {

767 errno = old_errno;	830 errno = old_errno;

768 if (sigsys.nr == __NR_clone) {	831 if (sigsys.nr == __NR_clone) {

769 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");	832 SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");

770 }	833 }

771 rc = SandboxSyscall(sigsys.nr,	834 rc = SandboxSyscall(sigsys.nr,

772 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),	835 SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),

773 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),	836 SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),

774 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));	837 SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));

775 } else {	838 } else {

776 const ErrorCode& err = trapArray_[info->si_errno - 1];	839 const ErrorCode& err = trap_array_[info->si_errno - 1];

777 if (!err.safe_) {	840 if (!err.safe_) {

778 SetIsInSigHandler();	841 SetIsInSigHandler();

779 }	842 }

780	843

781 // Copy the seccomp-specific data into a arch_seccomp_data structure. This	844 // Copy the seccomp-specific data into a arch_seccomp_data structure. This

782 // is what we are showing to TrapFnc callbacks that the system call	845 // is what we are showing to TrapFnc callbacks that the system call

783 // evaluator registered with the sandbox.	846 // evaluator registered with the sandbox.

784 struct arch_seccomp_data data = {	847 struct arch_seccomp_data data = {

785 sigsys.nr,	848 sigsys.nr,

786 SECCOMP_ARCH,	849 SECCOMP_ARCH,

(...skipping 30 matching lines...) Expand all Loading...
817 } else {	880 } else {

818 return safe < o.safe;	881 return safe < o.safe;

819 }	882 }

820 }	883 }

821	884

822 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,	885 ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,

823 bool safe) {	886 bool safe) {

824 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance	887 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance

825 // of a SECCOMP_RET_TRAP.	888 // of a SECCOMP_RET_TRAP.

826 TrapKey key(fnc, aux, safe);	889 TrapKey key(fnc, aux, safe);

827 TrapIds::const_iterator iter = trapIds_.find(key);	890 TrapIds::const_iterator iter = trap_ids_.find(key);

828 uint16_t id;	891 uint16_t id;

829 if (iter != trapIds_.end()) {	892 if (iter != trap_ids_.end()) {

830 // We have seen this pair before. Return the same id that we assigned	893 // We have seen this pair before. Return the same id that we assigned

831 // earlier.	894 // earlier.

832 id = iter->second;	895 id = iter->second;

833 } else {	896 } else {

834 // This is a new pair. Remember it and assign a new id.	897 // This is a new pair. Remember it and assign a new id.

835 // Please note that we have to store traps in memory that doesn't get	898 // Please note that we have to store traps in memory that doesn't get

836 // deallocated when the program is shutting down. A memory leak is	899 // deallocated when the program is shutting down. A memory leak is

837 // intentional, because we might otherwise not be able to execute	900 // intentional, because we might otherwise not be able to execute

838 // system calls part way through the program shutting down	901 // system calls part way through the program shutting down

839 if (!traps_) {	902 if (!traps_) {

840 traps_ = new Traps();	903 traps_ = new Traps();

841 }	904 }

842 if (traps_->size() >= SECCOMP_RET_DATA) {	905 if (traps_->size() >= SECCOMP_RET_DATA) {

843 // In practice, this is pretty much impossible to trigger, as there	906 // In practice, this is pretty much impossible to trigger, as there

844 // are other kernel limitations that restrict overall BPF program sizes.	907 // are other kernel limitations that restrict overall BPF program sizes.

845 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");	908 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");

846 }	909 }

847 id = traps_->size() + 1;	910 id = traps_->size() + 1;

848	911

849 traps_->push_back(ErrorCode(fnc, aux, safe, id));	912 traps_->push_back(ErrorCode(fnc, aux, safe, id));

850 trapIds_[key] = id;	913 trap_ids_[key] = id;

851	914

852 // We want to access the traps_ vector from our signal handler. But	915 // We want to access the traps_ vector from our signal handler. But

853 // we are not assured that doing so is async-signal safe. On the other	916 // we are not assured that doing so is async-signal safe. On the other

854 // hand, C++ guarantees that the contents of a vector is stored in a	917 // hand, C++ guarantees that the contents of a vector is stored in a

855 // contiguous C-style array.	918 // contiguous C-style array.

856 // So, we look up the address and size of this array outside of the	919 // So, we look up the address and size of this array outside of the

857 // signal handler, where we can safely do so.	920 // signal handler, where we can safely do so.

858 trapArray_ = &(*traps_)[0];	921 trap_array_ = &(*traps_)[0];

859 trapArraySize_ = id;	922 trap_array_size_ = id;

860 return traps_->back();	923 return traps_->back();

861 }	924 }

862	925

863 return ErrorCode(fnc, aux, safe, id);	926 return ErrorCode(fnc, aux, safe, id);

864 }	927 }

865	928

866 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {	929 ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {

867 return MakeTrap(fnc, aux, true /* Safe Trap */);	930 return MakeTrap(fnc, aux, true /* Safe Trap */);

868 }	931 }

869	932

(...skipping 13 matching lines...) Expand all Loading...
883	946

884 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {	947 intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {

885 // TrapFnc functions report error by following the native kernel convention	948 // TrapFnc functions report error by following the native kernel convention

886 // of returning an exit code in the range of -1..-4096. They do not try to	949 // of returning an exit code in the range of -1..-4096. They do not try to

887 // set errno themselves. The glibc wrapper that triggered the SIGSYS will	950 // set errno themselves. The glibc wrapper that triggered the SIGSYS will

888 // ultimately do so for us.	951 // ultimately do so for us.

889 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;	952 int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;

890 return -err;	953 return -err;

891 }	954 }

892	955

893 intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {	956 ErrorCode Sandbox::Cond(int argno, ErrorCode::ArgType width,

	957 ErrorCode::Operation op, uint64_t value,

	958 const ErrorCode& passed, const ErrorCode& failed) {

	959 return ErrorCode(argno, width, op, value,

	960 &*conds_.insert(passed).first,

	961 &*conds_.insert(failed).first);

	962 }

	963

	964 intptr_t Sandbox::BpfFailure(const struct arch_seccomp_data&, void *aux) {

894 SANDBOX_DIE(static_cast<char *>(aux));	965 SANDBOX_DIE(static_cast<char *>(aux));

895 }	966 }

896	967

897 ErrorCode Sandbox::Kill(const char *msg) {	968 ErrorCode Sandbox::Kill(const char *msg) {

898 return Trap(bpfFailure, const_cast<char *>(msg));	969 return Trap(BpfFailure, const_cast<char *>(msg));

899 }	970 }

900	971

901 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;	972 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

902 int Sandbox::proc_fd_ = -1;	973 int Sandbox::proc_fd_ = -1;

903 Sandbox::Evaluators Sandbox::evaluators_;	974 Sandbox::Evaluators Sandbox::evaluators_;

904 Sandbox::Traps *Sandbox::traps_ = NULL;	975 Sandbox::Traps *Sandbox::traps_ = NULL;

905 Sandbox::TrapIds Sandbox::trapIds_;	976 Sandbox::TrapIds Sandbox::trap_ids_;

906 ErrorCode *Sandbox::trapArray_ = NULL;	977 ErrorCode *Sandbox::trap_array_ = NULL;

907 size_t Sandbox::trapArraySize_ = 0;	978 size_t Sandbox::trap_array_size_ = 0;

908 bool Sandbox::has_unsafe_traps_ = false;	979 bool Sandbox::has_unsafe_traps_ = false;

	980 Sandbox::Conds Sandbox::conds_;

909	981

910 } // namespace	982 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc » ('j') | no next file with comments »